diff options
Diffstat (limited to 'block/blk-lib.c')
| -rw-r--r-- | block/blk-lib.c | 482 |
1 files changed, 276 insertions, 206 deletions
diff --git a/block/blk-lib.c b/block/blk-lib.c index d6f50d572565..9e2cc58f881f 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Functions related to generic helpers functions */ @@ -9,267 +10,288 @@ #include "blk.h" -struct bio_batch { - atomic_t done; - unsigned long flags; - struct completion *wait; -}; +static sector_t bio_discard_limit(struct block_device *bdev, sector_t sector) +{ + unsigned int discard_granularity = bdev_discard_granularity(bdev); + sector_t granularity_aligned_sector; + + if (bdev_is_partition(bdev)) + sector += bdev->bd_start_sect; + + granularity_aligned_sector = + round_up(sector, discard_granularity >> SECTOR_SHIFT); + + /* + * Make sure subsequent bios start aligned to the discard granularity if + * it needs to be split. + */ + if (granularity_aligned_sector != sector) + return granularity_aligned_sector - sector; + + /* + * Align the bio size to the discard granularity to make splitting the bio + * at discard granularity boundaries easier in the driver if needed. + */ + return round_down(UINT_MAX, discard_granularity) >> SECTOR_SHIFT; +} -static void bio_batch_end_io(struct bio *bio, int err) +struct bio *blk_alloc_discard_bio(struct block_device *bdev, + sector_t *sector, sector_t *nr_sects, gfp_t gfp_mask) { - struct bio_batch *bb = bio->bi_private; + sector_t bio_sects = min(*nr_sects, bio_discard_limit(bdev, *sector)); + struct bio *bio; + + if (!bio_sects) + return NULL; - if (err && (err != -EOPNOTSUPP)) - clear_bit(BIO_UPTODATE, &bb->flags); - if (atomic_dec_and_test(&bb->done)) - complete(bb->wait); - bio_put(bio); + bio = bio_alloc(bdev, 0, REQ_OP_DISCARD, gfp_mask); + if (!bio) + return NULL; + bio->bi_iter.bi_sector = *sector; + bio->bi_iter.bi_size = bio_sects << SECTOR_SHIFT; + *sector += bio_sects; + *nr_sects -= bio_sects; + /* + * We can loop for a long time in here if someone does full device + * discards (like mkfs). Be nice and allow us to schedule out to avoid + * softlocking if preempt is disabled. + */ + cond_resched(); + return bio; } +int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, + sector_t nr_sects, gfp_t gfp_mask, struct bio **biop) +{ + struct bio *bio; + + while ((bio = blk_alloc_discard_bio(bdev, §or, &nr_sects, + gfp_mask))) + *biop = bio_chain_and_submit(*biop, bio); + return 0; +} +EXPORT_SYMBOL(__blkdev_issue_discard); + /** * blkdev_issue_discard - queue a discard * @bdev: blockdev to issue discard for * @sector: start sector * @nr_sects: number of sectors to discard * @gfp_mask: memory allocation flags (for bio_alloc) - * @flags: BLKDEV_IFL_* flags to control behaviour * * Description: * Issue a discard request for the sectors in question. */ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, - sector_t nr_sects, gfp_t gfp_mask, unsigned long flags) + sector_t nr_sects, gfp_t gfp_mask) { - DECLARE_COMPLETION_ONSTACK(wait); - struct request_queue *q = bdev_get_queue(bdev); - int type = REQ_WRITE | REQ_DISCARD; - sector_t max_discard_sectors; - sector_t granularity, alignment; - struct bio_batch bb; - struct bio *bio; - int ret = 0; + struct bio *bio = NULL; struct blk_plug plug; + int ret = 0; - if (!q) - return -ENXIO; - - if (!blk_queue_discard(q)) - return -EOPNOTSUPP; + blk_start_plug(&plug); + __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, &bio); + if (bio) { + ret = submit_bio_wait(bio); + if (ret == -EOPNOTSUPP) + ret = 0; + bio_put(bio); + } + blk_finish_plug(&plug); - /* Zero-sector (unknown) and one-sector granularities are the same. */ - granularity = max(q->limits.discard_granularity >> 9, 1U); - alignment = bdev_discard_alignment(bdev) >> 9; - alignment = sector_div(alignment, granularity); + return ret; +} +EXPORT_SYMBOL(blkdev_issue_discard); - /* - * Ensure that max_discard_sectors is of the proper - * granularity, so that requests stay aligned after a split. - */ - max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9); - sector_div(max_discard_sectors, granularity); - max_discard_sectors *= granularity; - if (unlikely(!max_discard_sectors)) { - /* Avoid infinite loop below. Being cautious never hurts. */ - return -EOPNOTSUPP; - } +static sector_t bio_write_zeroes_limit(struct block_device *bdev) +{ + sector_t bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1; - if (flags & BLKDEV_DISCARD_SECURE) { - if (!blk_queue_secdiscard(q)) - return -EOPNOTSUPP; - type |= REQ_SECURE; - } + return min(bdev_write_zeroes_sectors(bdev), + (UINT_MAX >> SECTOR_SHIFT) & ~bs_mask); +} - atomic_set(&bb.done, 1); - bb.flags = 1 << BIO_UPTODATE; - bb.wait = &wait; +/* + * There is no reliable way for the SCSI subsystem to determine whether a + * device supports a WRITE SAME operation without actually performing a write + * to media. As a result, write_zeroes is enabled by default and will be + * disabled if a zeroing operation subsequently fails. This means that this + * queue limit is likely to change at runtime. + */ +static void __blkdev_issue_write_zeroes(struct block_device *bdev, + sector_t sector, sector_t nr_sects, gfp_t gfp_mask, + struct bio **biop, unsigned flags, sector_t limit) +{ - blk_start_plug(&plug); while (nr_sects) { - unsigned int req_sects; - sector_t end_sect, tmp; + unsigned int len = min(nr_sects, limit); + struct bio *bio; - bio = bio_alloc(gfp_mask, 1); - if (!bio) { - ret = -ENOMEM; + if ((flags & BLKDEV_ZERO_KILLABLE) && + fatal_signal_pending(current)) break; - } - req_sects = min_t(sector_t, nr_sects, max_discard_sectors); - - /* - * If splitting a request, and the next starting sector would be - * misaligned, stop the discard at the previous aligned sector. - */ - end_sect = sector + req_sects; - tmp = end_sect; - if (req_sects < nr_sects && - sector_div(tmp, granularity) != alignment) { - end_sect = end_sect - alignment; - sector_div(end_sect, granularity); - end_sect = end_sect * granularity + alignment; - req_sects = end_sect - sector; - } - - bio->bi_sector = sector; - bio->bi_end_io = bio_batch_end_io; - bio->bi_bdev = bdev; - bio->bi_private = &bb; + bio = bio_alloc(bdev, 0, REQ_OP_WRITE_ZEROES, gfp_mask); + bio->bi_iter.bi_sector = sector; + if (flags & BLKDEV_ZERO_NOUNMAP) + bio->bi_opf |= REQ_NOUNMAP; - bio->bi_size = req_sects << 9; - nr_sects -= req_sects; - sector = end_sect; + bio->bi_iter.bi_size = len << SECTOR_SHIFT; + *biop = bio_chain_and_submit(*biop, bio); - atomic_inc(&bb.done); - submit_bio(type, bio); + nr_sects -= len; + sector += len; + cond_resched(); } - blk_finish_plug(&plug); +} - /* Wait for bios in-flight */ - if (!atomic_dec_and_test(&bb.done)) - wait_for_completion_io(&wait); +static int blkdev_issue_write_zeroes(struct block_device *bdev, sector_t sector, + sector_t nr_sects, gfp_t gfp, unsigned flags) +{ + sector_t limit = bio_write_zeroes_limit(bdev); + struct bio *bio = NULL; + struct blk_plug plug; + int ret = 0; - if (!test_bit(BIO_UPTODATE, &bb.flags)) - ret = -EIO; + blk_start_plug(&plug); + __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp, &bio, + flags, limit); + if (bio) { + if ((flags & BLKDEV_ZERO_KILLABLE) && + fatal_signal_pending(current)) { + bio_await_chain(bio); + blk_finish_plug(&plug); + return -EINTR; + } + ret = submit_bio_wait(bio); + bio_put(bio); + } + blk_finish_plug(&plug); + /* + * For some devices there is no non-destructive way to verify whether + * WRITE ZEROES is actually supported. These will clear the capability + * on an I/O error, in which case we'll turn any error into + * "not supported" here. + */ + if (ret && !bdev_write_zeroes_sectors(bdev)) + return -EOPNOTSUPP; return ret; } -EXPORT_SYMBOL(blkdev_issue_discard); -/** - * blkdev_issue_write_same - queue a write same operation - * @bdev: target blockdev - * @sector: start sector - * @nr_sects: number of sectors to write - * @gfp_mask: memory allocation flags (for bio_alloc) - * @page: page containing data to write - * - * Description: - * Issue a write same request for the sectors in question. +/* + * Convert a number of 512B sectors to a number of pages. + * The result is limited to a number of pages that can fit into a BIO. + * Also make sure that the result is always at least 1 (page) for the cases + * where nr_sects is lower than the number of sectors in a page. */ -int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, - sector_t nr_sects, gfp_t gfp_mask, - struct page *page) +static unsigned int __blkdev_sectors_to_bio_pages(sector_t nr_sects) { - DECLARE_COMPLETION_ONSTACK(wait); - struct request_queue *q = bdev_get_queue(bdev); - unsigned int max_write_same_sectors; - struct bio_batch bb; - struct bio *bio; - int ret = 0; - - if (!q) - return -ENXIO; + sector_t pages = DIV_ROUND_UP_SECTOR_T(nr_sects, PAGE_SIZE / 512); - max_write_same_sectors = q->limits.max_write_same_sectors; - - if (max_write_same_sectors == 0) - return -EOPNOTSUPP; + return min(pages, (sector_t)BIO_MAX_VECS); +} - atomic_set(&bb.done, 1); - bb.flags = 1 << BIO_UPTODATE; - bb.wait = &wait; +static void __blkdev_issue_zero_pages(struct block_device *bdev, + sector_t sector, sector_t nr_sects, gfp_t gfp_mask, + struct bio **biop, unsigned int flags) +{ + struct folio *zero_folio = largest_zero_folio(); while (nr_sects) { - bio = bio_alloc(gfp_mask, 1); - if (!bio) { - ret = -ENOMEM; + unsigned int nr_vecs = __blkdev_sectors_to_bio_pages(nr_sects); + struct bio *bio; + + if ((flags & BLKDEV_ZERO_KILLABLE) && + fatal_signal_pending(current)) break; - } - bio->bi_sector = sector; - bio->bi_end_io = bio_batch_end_io; - bio->bi_bdev = bdev; - bio->bi_private = &bb; - bio->bi_vcnt = 1; - bio->bi_io_vec->bv_page = page; - bio->bi_io_vec->bv_offset = 0; - bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev); - - if (nr_sects > max_write_same_sectors) { - bio->bi_size = max_write_same_sectors << 9; - nr_sects -= max_write_same_sectors; - sector += max_write_same_sectors; - } else { - bio->bi_size = nr_sects << 9; - nr_sects = 0; - } + bio = bio_alloc(bdev, nr_vecs, REQ_OP_WRITE, gfp_mask); + bio->bi_iter.bi_sector = sector; + + do { + unsigned int len; + + len = min_t(sector_t, folio_size(zero_folio), + nr_sects << SECTOR_SHIFT); + if (!bio_add_folio(bio, zero_folio, len, 0)) + break; + nr_sects -= len >> SECTOR_SHIFT; + sector += len >> SECTOR_SHIFT; + } while (nr_sects); - atomic_inc(&bb.done); - submit_bio(REQ_WRITE | REQ_WRITE_SAME, bio); + *biop = bio_chain_and_submit(*biop, bio); + cond_resched(); } +} - /* Wait for bios in-flight */ - if (!atomic_dec_and_test(&bb.done)) - wait_for_completion_io(&wait); +static int blkdev_issue_zero_pages(struct block_device *bdev, sector_t sector, + sector_t nr_sects, gfp_t gfp, unsigned flags) +{ + struct bio *bio = NULL; + struct blk_plug plug; + int ret = 0; - if (!test_bit(BIO_UPTODATE, &bb.flags)) - ret = -ENOTSUPP; + if (flags & BLKDEV_ZERO_NOFALLBACK) + return -EOPNOTSUPP; + + blk_start_plug(&plug); + __blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp, &bio, flags); + if (bio) { + if ((flags & BLKDEV_ZERO_KILLABLE) && + fatal_signal_pending(current)) { + bio_await_chain(bio); + blk_finish_plug(&plug); + return -EINTR; + } + ret = submit_bio_wait(bio); + bio_put(bio); + } + blk_finish_plug(&plug); return ret; } -EXPORT_SYMBOL(blkdev_issue_write_same); /** - * blkdev_issue_zeroout - generate number of zero filed write bios + * __blkdev_issue_zeroout - generate number of zero filed write bios * @bdev: blockdev to issue * @sector: start sector * @nr_sects: number of sectors to write * @gfp_mask: memory allocation flags (for bio_alloc) + * @biop: pointer to anchor bio + * @flags: controls detailed behavior * * Description: - * Generate and issue number of bios with zerofiled pages. + * Zero-fill a block range, either using hardware offload or by explicitly + * writing zeroes to the device. + * + * If a device is using logical block provisioning, the underlying space will + * not be released if %flags contains BLKDEV_ZERO_NOUNMAP. + * + * If %flags contains BLKDEV_ZERO_NOFALLBACK, the function will return + * -EOPNOTSUPP if no explicit hardware offload for zeroing is provided. */ - int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, - sector_t nr_sects, gfp_t gfp_mask) + sector_t nr_sects, gfp_t gfp_mask, struct bio **biop, + unsigned flags) { - int ret; - struct bio *bio; - struct bio_batch bb; - unsigned int sz; - DECLARE_COMPLETION_ONSTACK(wait); - - atomic_set(&bb.done, 1); - bb.flags = 1 << BIO_UPTODATE; - bb.wait = &wait; - - ret = 0; - while (nr_sects != 0) { - bio = bio_alloc(gfp_mask, - min(nr_sects, (sector_t)BIO_MAX_PAGES)); - if (!bio) { - ret = -ENOMEM; - break; - } - - bio->bi_sector = sector; - bio->bi_bdev = bdev; - bio->bi_end_io = bio_batch_end_io; - bio->bi_private = &bb; - - while (nr_sects != 0) { - sz = min((sector_t) PAGE_SIZE >> 9 , nr_sects); - ret = bio_add_page(bio, ZERO_PAGE(0), sz << 9, 0); - nr_sects -= ret >> 9; - sector += ret >> 9; - if (ret < (sz << 9)) - break; - } - ret = 0; - atomic_inc(&bb.done); - submit_bio(WRITE, bio); - } + sector_t limit = bio_write_zeroes_limit(bdev); - /* Wait for bios in-flight */ - if (!atomic_dec_and_test(&bb.done)) - wait_for_completion_io(&wait); + if (bdev_read_only(bdev)) + return -EPERM; - if (!test_bit(BIO_UPTODATE, &bb.flags)) - /* One of bios in the batch was completed with error.*/ - ret = -EIO; - - return ret; + if (limit) { + __blkdev_issue_write_zeroes(bdev, sector, nr_sects, + gfp_mask, biop, flags, limit); + } else { + if (flags & BLKDEV_ZERO_NOFALLBACK) + return -EOPNOTSUPP; + __blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp_mask, + biop, flags); + } + return 0; } +EXPORT_SYMBOL(__blkdev_issue_zeroout); /** * blkdev_issue_zeroout - zero-fill a block range @@ -277,25 +299,73 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, * @sector: start sector * @nr_sects: number of sectors to write * @gfp_mask: memory allocation flags (for bio_alloc) + * @flags: controls detailed behavior * * Description: - * Generate and issue number of bios with zerofiled pages. + * Zero-fill a block range, either using hardware offload or by explicitly + * writing zeroes to the device. See __blkdev_issue_zeroout() for the + * valid values for %flags. */ - int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, - sector_t nr_sects, gfp_t gfp_mask) + sector_t nr_sects, gfp_t gfp_mask, unsigned flags) { - if (bdev_write_same(bdev)) { - unsigned char bdn[BDEVNAME_SIZE]; + int ret; - if (!blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask, - ZERO_PAGE(0))) - return 0; + if ((sector | nr_sects) & ((bdev_logical_block_size(bdev) >> 9) - 1)) + return -EINVAL; + if (bdev_read_only(bdev)) + return -EPERM; - bdevname(bdev, bdn); - pr_err("%s: WRITE SAME failed. Manually zeroing.\n", bdn); + if (bdev_write_zeroes_sectors(bdev)) { + ret = blkdev_issue_write_zeroes(bdev, sector, nr_sects, + gfp_mask, flags); + if (ret != -EOPNOTSUPP) + return ret; } - return __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask); + return blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp_mask, flags); } EXPORT_SYMBOL(blkdev_issue_zeroout); + +int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector, + sector_t nr_sects, gfp_t gfp) +{ + sector_t bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1; + unsigned int max_sectors = bdev_max_secure_erase_sectors(bdev); + struct bio *bio = NULL; + struct blk_plug plug; + int ret = 0; + + /* make sure that "len << SECTOR_SHIFT" doesn't overflow */ + if (max_sectors > UINT_MAX >> SECTOR_SHIFT) + max_sectors = UINT_MAX >> SECTOR_SHIFT; + max_sectors &= ~bs_mask; + + if (max_sectors == 0) + return -EOPNOTSUPP; + if ((sector | nr_sects) & bs_mask) + return -EINVAL; + if (bdev_read_only(bdev)) + return -EPERM; + + blk_start_plug(&plug); + while (nr_sects) { + unsigned int len = min_t(sector_t, nr_sects, max_sectors); + + bio = blk_next_bio(bio, bdev, 0, REQ_OP_SECURE_ERASE, gfp); + bio->bi_iter.bi_sector = sector; + bio->bi_iter.bi_size = len << SECTOR_SHIFT; + + sector += len; + nr_sects -= len; + cond_resched(); + } + if (bio) { + ret = submit_bio_wait(bio); + bio_put(bio); + } + blk_finish_plug(&plug); + + return ret; +} +EXPORT_SYMBOL(blkdev_issue_secure_erase); |
