summaryrefslogtreecommitdiff
path: root/block/blk-lib.c
diff options
context:
space:
mode:
Diffstat (limited to 'block/blk-lib.c')
-rw-r--r--block/blk-lib.c482
1 files changed, 276 insertions, 206 deletions
diff --git a/block/blk-lib.c b/block/blk-lib.c
index d6f50d572565..9e2cc58f881f 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Functions related to generic helpers functions
*/
@@ -9,267 +10,288 @@
#include "blk.h"
-struct bio_batch {
- atomic_t done;
- unsigned long flags;
- struct completion *wait;
-};
+static sector_t bio_discard_limit(struct block_device *bdev, sector_t sector)
+{
+ unsigned int discard_granularity = bdev_discard_granularity(bdev);
+ sector_t granularity_aligned_sector;
+
+ if (bdev_is_partition(bdev))
+ sector += bdev->bd_start_sect;
+
+ granularity_aligned_sector =
+ round_up(sector, discard_granularity >> SECTOR_SHIFT);
+
+ /*
+ * Make sure subsequent bios start aligned to the discard granularity if
+ * it needs to be split.
+ */
+ if (granularity_aligned_sector != sector)
+ return granularity_aligned_sector - sector;
+
+ /*
+ * Align the bio size to the discard granularity to make splitting the bio
+ * at discard granularity boundaries easier in the driver if needed.
+ */
+ return round_down(UINT_MAX, discard_granularity) >> SECTOR_SHIFT;
+}
-static void bio_batch_end_io(struct bio *bio, int err)
+struct bio *blk_alloc_discard_bio(struct block_device *bdev,
+ sector_t *sector, sector_t *nr_sects, gfp_t gfp_mask)
{
- struct bio_batch *bb = bio->bi_private;
+ sector_t bio_sects = min(*nr_sects, bio_discard_limit(bdev, *sector));
+ struct bio *bio;
+
+ if (!bio_sects)
+ return NULL;
- if (err && (err != -EOPNOTSUPP))
- clear_bit(BIO_UPTODATE, &bb->flags);
- if (atomic_dec_and_test(&bb->done))
- complete(bb->wait);
- bio_put(bio);
+ bio = bio_alloc(bdev, 0, REQ_OP_DISCARD, gfp_mask);
+ if (!bio)
+ return NULL;
+ bio->bi_iter.bi_sector = *sector;
+ bio->bi_iter.bi_size = bio_sects << SECTOR_SHIFT;
+ *sector += bio_sects;
+ *nr_sects -= bio_sects;
+ /*
+ * We can loop for a long time in here if someone does full device
+ * discards (like mkfs). Be nice and allow us to schedule out to avoid
+ * softlocking if preempt is disabled.
+ */
+ cond_resched();
+ return bio;
}
+int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
+ sector_t nr_sects, gfp_t gfp_mask, struct bio **biop)
+{
+ struct bio *bio;
+
+ while ((bio = blk_alloc_discard_bio(bdev, &sector, &nr_sects,
+ gfp_mask)))
+ *biop = bio_chain_and_submit(*biop, bio);
+ return 0;
+}
+EXPORT_SYMBOL(__blkdev_issue_discard);
+
/**
* blkdev_issue_discard - queue a discard
* @bdev: blockdev to issue discard for
* @sector: start sector
* @nr_sects: number of sectors to discard
* @gfp_mask: memory allocation flags (for bio_alloc)
- * @flags: BLKDEV_IFL_* flags to control behaviour
*
* Description:
* Issue a discard request for the sectors in question.
*/
int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
- sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
+ sector_t nr_sects, gfp_t gfp_mask)
{
- DECLARE_COMPLETION_ONSTACK(wait);
- struct request_queue *q = bdev_get_queue(bdev);
- int type = REQ_WRITE | REQ_DISCARD;
- sector_t max_discard_sectors;
- sector_t granularity, alignment;
- struct bio_batch bb;
- struct bio *bio;
- int ret = 0;
+ struct bio *bio = NULL;
struct blk_plug plug;
+ int ret = 0;
- if (!q)
- return -ENXIO;
-
- if (!blk_queue_discard(q))
- return -EOPNOTSUPP;
+ blk_start_plug(&plug);
+ __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, &bio);
+ if (bio) {
+ ret = submit_bio_wait(bio);
+ if (ret == -EOPNOTSUPP)
+ ret = 0;
+ bio_put(bio);
+ }
+ blk_finish_plug(&plug);
- /* Zero-sector (unknown) and one-sector granularities are the same. */
- granularity = max(q->limits.discard_granularity >> 9, 1U);
- alignment = bdev_discard_alignment(bdev) >> 9;
- alignment = sector_div(alignment, granularity);
+ return ret;
+}
+EXPORT_SYMBOL(blkdev_issue_discard);
- /*
- * Ensure that max_discard_sectors is of the proper
- * granularity, so that requests stay aligned after a split.
- */
- max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9);
- sector_div(max_discard_sectors, granularity);
- max_discard_sectors *= granularity;
- if (unlikely(!max_discard_sectors)) {
- /* Avoid infinite loop below. Being cautious never hurts. */
- return -EOPNOTSUPP;
- }
+static sector_t bio_write_zeroes_limit(struct block_device *bdev)
+{
+ sector_t bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
- if (flags & BLKDEV_DISCARD_SECURE) {
- if (!blk_queue_secdiscard(q))
- return -EOPNOTSUPP;
- type |= REQ_SECURE;
- }
+ return min(bdev_write_zeroes_sectors(bdev),
+ (UINT_MAX >> SECTOR_SHIFT) & ~bs_mask);
+}
- atomic_set(&bb.done, 1);
- bb.flags = 1 << BIO_UPTODATE;
- bb.wait = &wait;
+/*
+ * There is no reliable way for the SCSI subsystem to determine whether a
+ * device supports a WRITE SAME operation without actually performing a write
+ * to media. As a result, write_zeroes is enabled by default and will be
+ * disabled if a zeroing operation subsequently fails. This means that this
+ * queue limit is likely to change at runtime.
+ */
+static void __blkdev_issue_write_zeroes(struct block_device *bdev,
+ sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
+ struct bio **biop, unsigned flags, sector_t limit)
+{
- blk_start_plug(&plug);
while (nr_sects) {
- unsigned int req_sects;
- sector_t end_sect, tmp;
+ unsigned int len = min(nr_sects, limit);
+ struct bio *bio;
- bio = bio_alloc(gfp_mask, 1);
- if (!bio) {
- ret = -ENOMEM;
+ if ((flags & BLKDEV_ZERO_KILLABLE) &&
+ fatal_signal_pending(current))
break;
- }
- req_sects = min_t(sector_t, nr_sects, max_discard_sectors);
-
- /*
- * If splitting a request, and the next starting sector would be
- * misaligned, stop the discard at the previous aligned sector.
- */
- end_sect = sector + req_sects;
- tmp = end_sect;
- if (req_sects < nr_sects &&
- sector_div(tmp, granularity) != alignment) {
- end_sect = end_sect - alignment;
- sector_div(end_sect, granularity);
- end_sect = end_sect * granularity + alignment;
- req_sects = end_sect - sector;
- }
-
- bio->bi_sector = sector;
- bio->bi_end_io = bio_batch_end_io;
- bio->bi_bdev = bdev;
- bio->bi_private = &bb;
+ bio = bio_alloc(bdev, 0, REQ_OP_WRITE_ZEROES, gfp_mask);
+ bio->bi_iter.bi_sector = sector;
+ if (flags & BLKDEV_ZERO_NOUNMAP)
+ bio->bi_opf |= REQ_NOUNMAP;
- bio->bi_size = req_sects << 9;
- nr_sects -= req_sects;
- sector = end_sect;
+ bio->bi_iter.bi_size = len << SECTOR_SHIFT;
+ *biop = bio_chain_and_submit(*biop, bio);
- atomic_inc(&bb.done);
- submit_bio(type, bio);
+ nr_sects -= len;
+ sector += len;
+ cond_resched();
}
- blk_finish_plug(&plug);
+}
- /* Wait for bios in-flight */
- if (!atomic_dec_and_test(&bb.done))
- wait_for_completion_io(&wait);
+static int blkdev_issue_write_zeroes(struct block_device *bdev, sector_t sector,
+ sector_t nr_sects, gfp_t gfp, unsigned flags)
+{
+ sector_t limit = bio_write_zeroes_limit(bdev);
+ struct bio *bio = NULL;
+ struct blk_plug plug;
+ int ret = 0;
- if (!test_bit(BIO_UPTODATE, &bb.flags))
- ret = -EIO;
+ blk_start_plug(&plug);
+ __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp, &bio,
+ flags, limit);
+ if (bio) {
+ if ((flags & BLKDEV_ZERO_KILLABLE) &&
+ fatal_signal_pending(current)) {
+ bio_await_chain(bio);
+ blk_finish_plug(&plug);
+ return -EINTR;
+ }
+ ret = submit_bio_wait(bio);
+ bio_put(bio);
+ }
+ blk_finish_plug(&plug);
+ /*
+ * For some devices there is no non-destructive way to verify whether
+ * WRITE ZEROES is actually supported. These will clear the capability
+ * on an I/O error, in which case we'll turn any error into
+ * "not supported" here.
+ */
+ if (ret && !bdev_write_zeroes_sectors(bdev))
+ return -EOPNOTSUPP;
return ret;
}
-EXPORT_SYMBOL(blkdev_issue_discard);
-/**
- * blkdev_issue_write_same - queue a write same operation
- * @bdev: target blockdev
- * @sector: start sector
- * @nr_sects: number of sectors to write
- * @gfp_mask: memory allocation flags (for bio_alloc)
- * @page: page containing data to write
- *
- * Description:
- * Issue a write same request for the sectors in question.
+/*
+ * Convert a number of 512B sectors to a number of pages.
+ * The result is limited to a number of pages that can fit into a BIO.
+ * Also make sure that the result is always at least 1 (page) for the cases
+ * where nr_sects is lower than the number of sectors in a page.
*/
-int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
- sector_t nr_sects, gfp_t gfp_mask,
- struct page *page)
+static unsigned int __blkdev_sectors_to_bio_pages(sector_t nr_sects)
{
- DECLARE_COMPLETION_ONSTACK(wait);
- struct request_queue *q = bdev_get_queue(bdev);
- unsigned int max_write_same_sectors;
- struct bio_batch bb;
- struct bio *bio;
- int ret = 0;
-
- if (!q)
- return -ENXIO;
+ sector_t pages = DIV_ROUND_UP_SECTOR_T(nr_sects, PAGE_SIZE / 512);
- max_write_same_sectors = q->limits.max_write_same_sectors;
-
- if (max_write_same_sectors == 0)
- return -EOPNOTSUPP;
+ return min(pages, (sector_t)BIO_MAX_VECS);
+}
- atomic_set(&bb.done, 1);
- bb.flags = 1 << BIO_UPTODATE;
- bb.wait = &wait;
+static void __blkdev_issue_zero_pages(struct block_device *bdev,
+ sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
+ struct bio **biop, unsigned int flags)
+{
+ struct folio *zero_folio = largest_zero_folio();
while (nr_sects) {
- bio = bio_alloc(gfp_mask, 1);
- if (!bio) {
- ret = -ENOMEM;
+ unsigned int nr_vecs = __blkdev_sectors_to_bio_pages(nr_sects);
+ struct bio *bio;
+
+ if ((flags & BLKDEV_ZERO_KILLABLE) &&
+ fatal_signal_pending(current))
break;
- }
- bio->bi_sector = sector;
- bio->bi_end_io = bio_batch_end_io;
- bio->bi_bdev = bdev;
- bio->bi_private = &bb;
- bio->bi_vcnt = 1;
- bio->bi_io_vec->bv_page = page;
- bio->bi_io_vec->bv_offset = 0;
- bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev);
-
- if (nr_sects > max_write_same_sectors) {
- bio->bi_size = max_write_same_sectors << 9;
- nr_sects -= max_write_same_sectors;
- sector += max_write_same_sectors;
- } else {
- bio->bi_size = nr_sects << 9;
- nr_sects = 0;
- }
+ bio = bio_alloc(bdev, nr_vecs, REQ_OP_WRITE, gfp_mask);
+ bio->bi_iter.bi_sector = sector;
+
+ do {
+ unsigned int len;
+
+ len = min_t(sector_t, folio_size(zero_folio),
+ nr_sects << SECTOR_SHIFT);
+ if (!bio_add_folio(bio, zero_folio, len, 0))
+ break;
+ nr_sects -= len >> SECTOR_SHIFT;
+ sector += len >> SECTOR_SHIFT;
+ } while (nr_sects);
- atomic_inc(&bb.done);
- submit_bio(REQ_WRITE | REQ_WRITE_SAME, bio);
+ *biop = bio_chain_and_submit(*biop, bio);
+ cond_resched();
}
+}
- /* Wait for bios in-flight */
- if (!atomic_dec_and_test(&bb.done))
- wait_for_completion_io(&wait);
+static int blkdev_issue_zero_pages(struct block_device *bdev, sector_t sector,
+ sector_t nr_sects, gfp_t gfp, unsigned flags)
+{
+ struct bio *bio = NULL;
+ struct blk_plug plug;
+ int ret = 0;
- if (!test_bit(BIO_UPTODATE, &bb.flags))
- ret = -ENOTSUPP;
+ if (flags & BLKDEV_ZERO_NOFALLBACK)
+ return -EOPNOTSUPP;
+
+ blk_start_plug(&plug);
+ __blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp, &bio, flags);
+ if (bio) {
+ if ((flags & BLKDEV_ZERO_KILLABLE) &&
+ fatal_signal_pending(current)) {
+ bio_await_chain(bio);
+ blk_finish_plug(&plug);
+ return -EINTR;
+ }
+ ret = submit_bio_wait(bio);
+ bio_put(bio);
+ }
+ blk_finish_plug(&plug);
return ret;
}
-EXPORT_SYMBOL(blkdev_issue_write_same);
/**
- * blkdev_issue_zeroout - generate number of zero filed write bios
+ * __blkdev_issue_zeroout - generate number of zero filed write bios
* @bdev: blockdev to issue
* @sector: start sector
* @nr_sects: number of sectors to write
* @gfp_mask: memory allocation flags (for bio_alloc)
+ * @biop: pointer to anchor bio
+ * @flags: controls detailed behavior
*
* Description:
- * Generate and issue number of bios with zerofiled pages.
+ * Zero-fill a block range, either using hardware offload or by explicitly
+ * writing zeroes to the device.
+ *
+ * If a device is using logical block provisioning, the underlying space will
+ * not be released if %flags contains BLKDEV_ZERO_NOUNMAP.
+ *
+ * If %flags contains BLKDEV_ZERO_NOFALLBACK, the function will return
+ * -EOPNOTSUPP if no explicit hardware offload for zeroing is provided.
*/
-
int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
- sector_t nr_sects, gfp_t gfp_mask)
+ sector_t nr_sects, gfp_t gfp_mask, struct bio **biop,
+ unsigned flags)
{
- int ret;
- struct bio *bio;
- struct bio_batch bb;
- unsigned int sz;
- DECLARE_COMPLETION_ONSTACK(wait);
-
- atomic_set(&bb.done, 1);
- bb.flags = 1 << BIO_UPTODATE;
- bb.wait = &wait;
-
- ret = 0;
- while (nr_sects != 0) {
- bio = bio_alloc(gfp_mask,
- min(nr_sects, (sector_t)BIO_MAX_PAGES));
- if (!bio) {
- ret = -ENOMEM;
- break;
- }
-
- bio->bi_sector = sector;
- bio->bi_bdev = bdev;
- bio->bi_end_io = bio_batch_end_io;
- bio->bi_private = &bb;
-
- while (nr_sects != 0) {
- sz = min((sector_t) PAGE_SIZE >> 9 , nr_sects);
- ret = bio_add_page(bio, ZERO_PAGE(0), sz << 9, 0);
- nr_sects -= ret >> 9;
- sector += ret >> 9;
- if (ret < (sz << 9))
- break;
- }
- ret = 0;
- atomic_inc(&bb.done);
- submit_bio(WRITE, bio);
- }
+ sector_t limit = bio_write_zeroes_limit(bdev);
- /* Wait for bios in-flight */
- if (!atomic_dec_and_test(&bb.done))
- wait_for_completion_io(&wait);
+ if (bdev_read_only(bdev))
+ return -EPERM;
- if (!test_bit(BIO_UPTODATE, &bb.flags))
- /* One of bios in the batch was completed with error.*/
- ret = -EIO;
-
- return ret;
+ if (limit) {
+ __blkdev_issue_write_zeroes(bdev, sector, nr_sects,
+ gfp_mask, biop, flags, limit);
+ } else {
+ if (flags & BLKDEV_ZERO_NOFALLBACK)
+ return -EOPNOTSUPP;
+ __blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp_mask,
+ biop, flags);
+ }
+ return 0;
}
+EXPORT_SYMBOL(__blkdev_issue_zeroout);
/**
* blkdev_issue_zeroout - zero-fill a block range
@@ -277,25 +299,73 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
* @sector: start sector
* @nr_sects: number of sectors to write
* @gfp_mask: memory allocation flags (for bio_alloc)
+ * @flags: controls detailed behavior
*
* Description:
- * Generate and issue number of bios with zerofiled pages.
+ * Zero-fill a block range, either using hardware offload or by explicitly
+ * writing zeroes to the device. See __blkdev_issue_zeroout() for the
+ * valid values for %flags.
*/
-
int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
- sector_t nr_sects, gfp_t gfp_mask)
+ sector_t nr_sects, gfp_t gfp_mask, unsigned flags)
{
- if (bdev_write_same(bdev)) {
- unsigned char bdn[BDEVNAME_SIZE];
+ int ret;
- if (!blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask,
- ZERO_PAGE(0)))
- return 0;
+ if ((sector | nr_sects) & ((bdev_logical_block_size(bdev) >> 9) - 1))
+ return -EINVAL;
+ if (bdev_read_only(bdev))
+ return -EPERM;
- bdevname(bdev, bdn);
- pr_err("%s: WRITE SAME failed. Manually zeroing.\n", bdn);
+ if (bdev_write_zeroes_sectors(bdev)) {
+ ret = blkdev_issue_write_zeroes(bdev, sector, nr_sects,
+ gfp_mask, flags);
+ if (ret != -EOPNOTSUPP)
+ return ret;
}
- return __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask);
+ return blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp_mask, flags);
}
EXPORT_SYMBOL(blkdev_issue_zeroout);
+
+int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector,
+ sector_t nr_sects, gfp_t gfp)
+{
+ sector_t bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
+ unsigned int max_sectors = bdev_max_secure_erase_sectors(bdev);
+ struct bio *bio = NULL;
+ struct blk_plug plug;
+ int ret = 0;
+
+ /* make sure that "len << SECTOR_SHIFT" doesn't overflow */
+ if (max_sectors > UINT_MAX >> SECTOR_SHIFT)
+ max_sectors = UINT_MAX >> SECTOR_SHIFT;
+ max_sectors &= ~bs_mask;
+
+ if (max_sectors == 0)
+ return -EOPNOTSUPP;
+ if ((sector | nr_sects) & bs_mask)
+ return -EINVAL;
+ if (bdev_read_only(bdev))
+ return -EPERM;
+
+ blk_start_plug(&plug);
+ while (nr_sects) {
+ unsigned int len = min_t(sector_t, nr_sects, max_sectors);
+
+ bio = blk_next_bio(bio, bdev, 0, REQ_OP_SECURE_ERASE, gfp);
+ bio->bi_iter.bi_sector = sector;
+ bio->bi_iter.bi_size = len << SECTOR_SHIFT;
+
+ sector += len;
+ nr_sects -= len;
+ cond_resched();
+ }
+ if (bio) {
+ ret = submit_bio_wait(bio);
+ bio_put(bio);
+ }
+ blk_finish_plug(&plug);
+
+ return ret;
+}
+EXPORT_SYMBOL(blkdev_issue_secure_erase);