diff options
Diffstat (limited to 'block/blk-lib.c')
| -rw-r--r-- | block/blk-lib.c | 460 |
1 files changed, 227 insertions, 233 deletions
diff --git a/block/blk-lib.c b/block/blk-lib.c index 3fe0aec90597..9e2cc58f881f 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Functions related to generic helpers functions */ @@ -9,91 +10,64 @@ #include "blk.h" -static struct bio *next_bio(struct bio *bio, unsigned int nr_pages, - gfp_t gfp) +static sector_t bio_discard_limit(struct block_device *bdev, sector_t sector) { - struct bio *new = bio_alloc(gfp, nr_pages); - - if (bio) { - bio_chain(bio, new); - submit_bio(bio); - } + unsigned int discard_granularity = bdev_discard_granularity(bdev); + sector_t granularity_aligned_sector; + + if (bdev_is_partition(bdev)) + sector += bdev->bd_start_sect; + + granularity_aligned_sector = + round_up(sector, discard_granularity >> SECTOR_SHIFT); + + /* + * Make sure subsequent bios start aligned to the discard granularity if + * it needs to be split. + */ + if (granularity_aligned_sector != sector) + return granularity_aligned_sector - sector; + + /* + * Align the bio size to the discard granularity to make splitting the bio + * at discard granularity boundaries easier in the driver if needed. + */ + return round_down(UINT_MAX, discard_granularity) >> SECTOR_SHIFT; +} - return new; +struct bio *blk_alloc_discard_bio(struct block_device *bdev, + sector_t *sector, sector_t *nr_sects, gfp_t gfp_mask) +{ + sector_t bio_sects = min(*nr_sects, bio_discard_limit(bdev, *sector)); + struct bio *bio; + + if (!bio_sects) + return NULL; + + bio = bio_alloc(bdev, 0, REQ_OP_DISCARD, gfp_mask); + if (!bio) + return NULL; + bio->bi_iter.bi_sector = *sector; + bio->bi_iter.bi_size = bio_sects << SECTOR_SHIFT; + *sector += bio_sects; + *nr_sects -= bio_sects; + /* + * We can loop for a long time in here if someone does full device + * discards (like mkfs). Be nice and allow us to schedule out to avoid + * softlocking if preempt is disabled. + */ + cond_resched(); + return bio; } int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, - sector_t nr_sects, gfp_t gfp_mask, int flags, - struct bio **biop) + sector_t nr_sects, gfp_t gfp_mask, struct bio **biop) { - struct request_queue *q = bdev_get_queue(bdev); - struct bio *bio = *biop; - unsigned int granularity; - unsigned int op; - int alignment; - sector_t bs_mask; - - if (!q) - return -ENXIO; - - if (flags & BLKDEV_DISCARD_SECURE) { - if (!blk_queue_secure_erase(q)) - return -EOPNOTSUPP; - op = REQ_OP_SECURE_ERASE; - } else { - if (!blk_queue_discard(q)) - return -EOPNOTSUPP; - op = REQ_OP_DISCARD; - } + struct bio *bio; - bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1; - if ((sector | nr_sects) & bs_mask) - return -EINVAL; - - /* Zero-sector (unknown) and one-sector granularities are the same. */ - granularity = max(q->limits.discard_granularity >> 9, 1U); - alignment = (bdev_discard_alignment(bdev) >> 9) % granularity; - - while (nr_sects) { - unsigned int req_sects; - sector_t end_sect, tmp; - - /* Make sure bi_size doesn't overflow */ - req_sects = min_t(sector_t, nr_sects, UINT_MAX >> 9); - - /** - * If splitting a request, and the next starting sector would be - * misaligned, stop the discard at the previous aligned sector. - */ - end_sect = sector + req_sects; - tmp = end_sect; - if (req_sects < nr_sects && - sector_div(tmp, granularity) != alignment) { - end_sect = end_sect - alignment; - sector_div(end_sect, granularity); - end_sect = end_sect * granularity + alignment; - req_sects = end_sect - sector; - } - - bio = next_bio(bio, 0, gfp_mask); - bio->bi_iter.bi_sector = sector; - bio->bi_bdev = bdev; - bio_set_op_attrs(bio, op, 0); - - bio->bi_iter.bi_size = req_sects << 9; - nr_sects -= req_sects; - sector = end_sect; - - /* - * We can loop for a long time in here, if someone does - * full device discards (like mkfs). Be nice and allow - * us to schedule out to avoid softlocking if preempt - * is disabled. - */ - cond_resched(); - } - - *biop = bio; + while ((bio = blk_alloc_discard_bio(bdev, §or, &nr_sects, + gfp_mask))) + *biop = bio_chain_and_submit(*biop, bio); return 0; } EXPORT_SYMBOL(__blkdev_issue_discard); @@ -104,22 +78,20 @@ EXPORT_SYMBOL(__blkdev_issue_discard); * @sector: start sector * @nr_sects: number of sectors to discard * @gfp_mask: memory allocation flags (for bio_alloc) - * @flags: BLKDEV_DISCARD_* flags to control behaviour * * Description: * Issue a discard request for the sectors in question. */ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, - sector_t nr_sects, gfp_t gfp_mask, unsigned long flags) + sector_t nr_sects, gfp_t gfp_mask) { struct bio *bio = NULL; struct blk_plug plug; - int ret; + int ret = 0; blk_start_plug(&plug); - ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, flags, - &bio); - if (!ret && bio) { + __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, &bio); + if (bio) { ret = submit_bio_wait(bio); if (ret == -EOPNOTSUPP) ret = 0; @@ -131,147 +103,153 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, } EXPORT_SYMBOL(blkdev_issue_discard); -/** - * __blkdev_issue_write_same - generate number of bios with same page - * @bdev: target blockdev - * @sector: start sector - * @nr_sects: number of sectors to write - * @gfp_mask: memory allocation flags (for bio_alloc) - * @page: page containing data to write - * @biop: pointer to anchor bio - * - * Description: - * Generate and issue number of bios(REQ_OP_WRITE_SAME) with same page. - */ -static int __blkdev_issue_write_same(struct block_device *bdev, sector_t sector, - sector_t nr_sects, gfp_t gfp_mask, struct page *page, - struct bio **biop) +static sector_t bio_write_zeroes_limit(struct block_device *bdev) { - struct request_queue *q = bdev_get_queue(bdev); - unsigned int max_write_same_sectors; - struct bio *bio = *biop; - sector_t bs_mask; + sector_t bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1; - if (!q) - return -ENXIO; + return min(bdev_write_zeroes_sectors(bdev), + (UINT_MAX >> SECTOR_SHIFT) & ~bs_mask); +} - bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1; - if ((sector | nr_sects) & bs_mask) - return -EINVAL; +/* + * There is no reliable way for the SCSI subsystem to determine whether a + * device supports a WRITE SAME operation without actually performing a write + * to media. As a result, write_zeroes is enabled by default and will be + * disabled if a zeroing operation subsequently fails. This means that this + * queue limit is likely to change at runtime. + */ +static void __blkdev_issue_write_zeroes(struct block_device *bdev, + sector_t sector, sector_t nr_sects, gfp_t gfp_mask, + struct bio **biop, unsigned flags, sector_t limit) +{ - if (!bdev_write_same(bdev)) - return -EOPNOTSUPP; + while (nr_sects) { + unsigned int len = min(nr_sects, limit); + struct bio *bio; - /* Ensure that max_write_same_sectors doesn't overflow bi_size */ - max_write_same_sectors = UINT_MAX >> 9; + if ((flags & BLKDEV_ZERO_KILLABLE) && + fatal_signal_pending(current)) + break; - while (nr_sects) { - bio = next_bio(bio, 1, gfp_mask); + bio = bio_alloc(bdev, 0, REQ_OP_WRITE_ZEROES, gfp_mask); bio->bi_iter.bi_sector = sector; - bio->bi_bdev = bdev; - bio->bi_vcnt = 1; - bio->bi_io_vec->bv_page = page; - bio->bi_io_vec->bv_offset = 0; - bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev); - bio_set_op_attrs(bio, REQ_OP_WRITE_SAME, 0); - - if (nr_sects > max_write_same_sectors) { - bio->bi_iter.bi_size = max_write_same_sectors << 9; - nr_sects -= max_write_same_sectors; - sector += max_write_same_sectors; - } else { - bio->bi_iter.bi_size = nr_sects << 9; - nr_sects = 0; - } + if (flags & BLKDEV_ZERO_NOUNMAP) + bio->bi_opf |= REQ_NOUNMAP; + + bio->bi_iter.bi_size = len << SECTOR_SHIFT; + *biop = bio_chain_and_submit(*biop, bio); + + nr_sects -= len; + sector += len; cond_resched(); } - - *biop = bio; - return 0; } -/** - * blkdev_issue_write_same - queue a write same operation - * @bdev: target blockdev - * @sector: start sector - * @nr_sects: number of sectors to write - * @gfp_mask: memory allocation flags (for bio_alloc) - * @page: page containing data - * - * Description: - * Issue a write same request for the sectors in question. - */ -int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, - sector_t nr_sects, gfp_t gfp_mask, - struct page *page) +static int blkdev_issue_write_zeroes(struct block_device *bdev, sector_t sector, + sector_t nr_sects, gfp_t gfp, unsigned flags) { + sector_t limit = bio_write_zeroes_limit(bdev); struct bio *bio = NULL; struct blk_plug plug; - int ret; + int ret = 0; blk_start_plug(&plug); - ret = __blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask, page, - &bio); - if (ret == 0 && bio) { + __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp, &bio, + flags, limit); + if (bio) { + if ((flags & BLKDEV_ZERO_KILLABLE) && + fatal_signal_pending(current)) { + bio_await_chain(bio); + blk_finish_plug(&plug); + return -EINTR; + } ret = submit_bio_wait(bio); bio_put(bio); } blk_finish_plug(&plug); + + /* + * For some devices there is no non-destructive way to verify whether + * WRITE ZEROES is actually supported. These will clear the capability + * on an I/O error, in which case we'll turn any error into + * "not supported" here. + */ + if (ret && !bdev_write_zeroes_sectors(bdev)) + return -EOPNOTSUPP; return ret; } -EXPORT_SYMBOL(blkdev_issue_write_same); -static int __blkdev_issue_write_zeroes(struct block_device *bdev, - sector_t sector, sector_t nr_sects, gfp_t gfp_mask, - struct bio **biop, unsigned flags) +/* + * Convert a number of 512B sectors to a number of pages. + * The result is limited to a number of pages that can fit into a BIO. + * Also make sure that the result is always at least 1 (page) for the cases + * where nr_sects is lower than the number of sectors in a page. + */ +static unsigned int __blkdev_sectors_to_bio_pages(sector_t nr_sects) { - struct bio *bio = *biop; - unsigned int max_write_zeroes_sectors; - struct request_queue *q = bdev_get_queue(bdev); + sector_t pages = DIV_ROUND_UP_SECTOR_T(nr_sects, PAGE_SIZE / 512); - if (!q) - return -ENXIO; - - /* Ensure that max_write_zeroes_sectors doesn't overflow bi_size */ - max_write_zeroes_sectors = bdev_write_zeroes_sectors(bdev); + return min(pages, (sector_t)BIO_MAX_VECS); +} - if (max_write_zeroes_sectors == 0) - return -EOPNOTSUPP; +static void __blkdev_issue_zero_pages(struct block_device *bdev, + sector_t sector, sector_t nr_sects, gfp_t gfp_mask, + struct bio **biop, unsigned int flags) +{ + struct folio *zero_folio = largest_zero_folio(); while (nr_sects) { - bio = next_bio(bio, 0, gfp_mask); + unsigned int nr_vecs = __blkdev_sectors_to_bio_pages(nr_sects); + struct bio *bio; + + if ((flags & BLKDEV_ZERO_KILLABLE) && + fatal_signal_pending(current)) + break; + + bio = bio_alloc(bdev, nr_vecs, REQ_OP_WRITE, gfp_mask); bio->bi_iter.bi_sector = sector; - bio->bi_bdev = bdev; - bio->bi_opf = REQ_OP_WRITE_ZEROES; - if (flags & BLKDEV_ZERO_NOUNMAP) - bio->bi_opf |= REQ_NOUNMAP; - if (nr_sects > max_write_zeroes_sectors) { - bio->bi_iter.bi_size = max_write_zeroes_sectors << 9; - nr_sects -= max_write_zeroes_sectors; - sector += max_write_zeroes_sectors; - } else { - bio->bi_iter.bi_size = nr_sects << 9; - nr_sects = 0; - } + do { + unsigned int len; + + len = min_t(sector_t, folio_size(zero_folio), + nr_sects << SECTOR_SHIFT); + if (!bio_add_folio(bio, zero_folio, len, 0)) + break; + nr_sects -= len >> SECTOR_SHIFT; + sector += len >> SECTOR_SHIFT; + } while (nr_sects); + + *biop = bio_chain_and_submit(*biop, bio); cond_resched(); } - - *biop = bio; - return 0; } -/* - * Convert a number of 512B sectors to a number of pages. - * The result is limited to a number of pages that can fit into a BIO. - * Also make sure that the result is always at least 1 (page) for the cases - * where nr_sects is lower than the number of sectors in a page. - */ -static unsigned int __blkdev_sectors_to_bio_pages(sector_t nr_sects) +static int blkdev_issue_zero_pages(struct block_device *bdev, sector_t sector, + sector_t nr_sects, gfp_t gfp, unsigned flags) { - sector_t bytes = (nr_sects << 9) + PAGE_SIZE - 1; + struct bio *bio = NULL; + struct blk_plug plug; + int ret = 0; - return min(bytes >> PAGE_SHIFT, (sector_t)BIO_MAX_PAGES); + if (flags & BLKDEV_ZERO_NOFALLBACK) + return -EOPNOTSUPP; + + blk_start_plug(&plug); + __blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp, &bio, flags); + if (bio) { + if ((flags & BLKDEV_ZERO_KILLABLE) && + fatal_signal_pending(current)) { + bio_await_chain(bio); + blk_finish_plug(&plug); + return -EINTR; + } + ret = submit_bio_wait(bio); + bio_put(bio); + } + blk_finish_plug(&plug); + + return ret; } /** @@ -287,12 +265,6 @@ static unsigned int __blkdev_sectors_to_bio_pages(sector_t nr_sects) * Zero-fill a block range, either using hardware offload or by explicitly * writing zeroes to the device. * - * Note that this function may fail with -EOPNOTSUPP if the driver signals - * zeroing offload support, but the device fails to process the command (for - * some devices there is no non-destructive way to verify whether this - * operation is actually supported). In this case the caller should call - * retry the call to blkdev_issue_zeroout() and the fallback path will be used. - * * If a device is using logical block provisioning, the underlying space will * not be released if %flags contains BLKDEV_ZERO_NOUNMAP. * @@ -303,43 +275,21 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, struct bio **biop, unsigned flags) { - int ret; - int bi_size = 0; - struct bio *bio = *biop; - unsigned int sz; - sector_t bs_mask; + sector_t limit = bio_write_zeroes_limit(bdev); - bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1; - if ((sector | nr_sects) & bs_mask) - return -EINVAL; + if (bdev_read_only(bdev)) + return -EPERM; - ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp_mask, - biop, flags); - if (ret != -EOPNOTSUPP || (flags & BLKDEV_ZERO_NOFALLBACK)) - goto out; - - ret = 0; - while (nr_sects != 0) { - bio = next_bio(bio, __blkdev_sectors_to_bio_pages(nr_sects), - gfp_mask); - bio->bi_iter.bi_sector = sector; - bio->bi_bdev = bdev; - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); - - while (nr_sects != 0) { - sz = min((sector_t) PAGE_SIZE, nr_sects << 9); - bi_size = bio_add_page(bio, ZERO_PAGE(0), sz, 0); - nr_sects -= bi_size >> 9; - sector += bi_size >> 9; - if (bi_size < sz) - break; - } - cond_resched(); + if (limit) { + __blkdev_issue_write_zeroes(bdev, sector, nr_sects, + gfp_mask, biop, flags, limit); + } else { + if (flags & BLKDEV_ZERO_NOFALLBACK) + return -EOPNOTSUPP; + __blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp_mask, + biop, flags); } - - *biop = bio; -out: - return ret; + return 0; } EXPORT_SYMBOL(__blkdev_issue_zeroout); @@ -360,13 +310,57 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, unsigned flags) { int ret; + + if ((sector | nr_sects) & ((bdev_logical_block_size(bdev) >> 9) - 1)) + return -EINVAL; + if (bdev_read_only(bdev)) + return -EPERM; + + if (bdev_write_zeroes_sectors(bdev)) { + ret = blkdev_issue_write_zeroes(bdev, sector, nr_sects, + gfp_mask, flags); + if (ret != -EOPNOTSUPP) + return ret; + } + + return blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp_mask, flags); +} +EXPORT_SYMBOL(blkdev_issue_zeroout); + +int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector, + sector_t nr_sects, gfp_t gfp) +{ + sector_t bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1; + unsigned int max_sectors = bdev_max_secure_erase_sectors(bdev); struct bio *bio = NULL; struct blk_plug plug; + int ret = 0; + + /* make sure that "len << SECTOR_SHIFT" doesn't overflow */ + if (max_sectors > UINT_MAX >> SECTOR_SHIFT) + max_sectors = UINT_MAX >> SECTOR_SHIFT; + max_sectors &= ~bs_mask; + + if (max_sectors == 0) + return -EOPNOTSUPP; + if ((sector | nr_sects) & bs_mask) + return -EINVAL; + if (bdev_read_only(bdev)) + return -EPERM; blk_start_plug(&plug); - ret = __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask, - &bio, flags); - if (ret == 0 && bio) { + while (nr_sects) { + unsigned int len = min_t(sector_t, nr_sects, max_sectors); + + bio = blk_next_bio(bio, bdev, 0, REQ_OP_SECURE_ERASE, gfp); + bio->bi_iter.bi_sector = sector; + bio->bi_iter.bi_size = len << SECTOR_SHIFT; + + sector += len; + nr_sects -= len; + cond_resched(); + } + if (bio) { ret = submit_bio_wait(bio); bio_put(bio); } @@ -374,4 +368,4 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, return ret; } -EXPORT_SYMBOL(blkdev_issue_zeroout); +EXPORT_SYMBOL(blkdev_issue_secure_erase); |
