diff options
Diffstat (limited to 'block/blk-lib.c')
| -rw-r--r-- | block/blk-lib.c | 497 |
1 files changed, 213 insertions, 284 deletions
diff --git a/block/blk-lib.c b/block/blk-lib.c index 9f09beadcbe3..9e2cc58f881f 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -10,110 +10,64 @@ #include "blk.h" -struct bio *blk_next_bio(struct bio *bio, unsigned int nr_pages, gfp_t gfp) +static sector_t bio_discard_limit(struct block_device *bdev, sector_t sector) { - struct bio *new = bio_alloc(gfp, nr_pages); + unsigned int discard_granularity = bdev_discard_granularity(bdev); + sector_t granularity_aligned_sector; - if (bio) { - bio_chain(bio, new); - submit_bio(bio); - } - - return new; + if (bdev_is_partition(bdev)) + sector += bdev->bd_start_sect; + + granularity_aligned_sector = + round_up(sector, discard_granularity >> SECTOR_SHIFT); + + /* + * Make sure subsequent bios start aligned to the discard granularity if + * it needs to be split. + */ + if (granularity_aligned_sector != sector) + return granularity_aligned_sector - sector; + + /* + * Align the bio size to the discard granularity to make splitting the bio + * at discard granularity boundaries easier in the driver if needed. + */ + return round_down(UINT_MAX, discard_granularity) >> SECTOR_SHIFT; } -EXPORT_SYMBOL_GPL(blk_next_bio); -int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, - sector_t nr_sects, gfp_t gfp_mask, int flags, - struct bio **biop) +struct bio *blk_alloc_discard_bio(struct block_device *bdev, + sector_t *sector, sector_t *nr_sects, gfp_t gfp_mask) { - struct request_queue *q = bdev_get_queue(bdev); - struct bio *bio = *biop; - unsigned int op; - sector_t bs_mask, part_offset = 0; - - if (!q) - return -ENXIO; - - if (bdev_read_only(bdev)) - return -EPERM; - - if (flags & BLKDEV_DISCARD_SECURE) { - if (!blk_queue_secure_erase(q)) - return -EOPNOTSUPP; - op = REQ_OP_SECURE_ERASE; - } else { - if (!blk_queue_discard(q)) - return -EOPNOTSUPP; - op = REQ_OP_DISCARD; - } - - /* In case the discard granularity isn't set by buggy device driver */ - if (WARN_ON_ONCE(!q->limits.discard_granularity)) { - char dev_name[BDEVNAME_SIZE]; - - bdevname(bdev, dev_name); - pr_err_ratelimited("%s: Error: discard_granularity is 0.\n", dev_name); - return -EOPNOTSUPP; - } - - bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1; - if ((sector | nr_sects) & bs_mask) - return -EINVAL; - - if (!nr_sects) - return -EINVAL; + sector_t bio_sects = min(*nr_sects, bio_discard_limit(bdev, *sector)); + struct bio *bio; - /* In case the discard request is in a partition */ - if (bdev_is_partition(bdev)) - part_offset = bdev->bd_start_sect; + if (!bio_sects) + return NULL; + + bio = bio_alloc(bdev, 0, REQ_OP_DISCARD, gfp_mask); + if (!bio) + return NULL; + bio->bi_iter.bi_sector = *sector; + bio->bi_iter.bi_size = bio_sects << SECTOR_SHIFT; + *sector += bio_sects; + *nr_sects -= bio_sects; + /* + * We can loop for a long time in here if someone does full device + * discards (like mkfs). Be nice and allow us to schedule out to avoid + * softlocking if preempt is disabled. + */ + cond_resched(); + return bio; +} - while (nr_sects) { - sector_t granularity_aligned_lba, req_sects; - sector_t sector_mapped = sector + part_offset; - - granularity_aligned_lba = round_up(sector_mapped, - q->limits.discard_granularity >> SECTOR_SHIFT); - - /* - * Check whether the discard bio starts at a discard_granularity - * aligned LBA, - * - If no: set (granularity_aligned_lba - sector_mapped) to - * bi_size of the first split bio, then the second bio will - * start at a discard_granularity aligned LBA on the device. - * - If yes: use bio_aligned_discard_max_sectors() as the max - * possible bi_size of the first split bio. Then when this bio - * is split in device drive, the split ones are very probably - * to be aligned to discard_granularity of the device's queue. - */ - if (granularity_aligned_lba == sector_mapped) - req_sects = min_t(sector_t, nr_sects, - bio_aligned_discard_max_sectors(q)); - else - req_sects = min_t(sector_t, nr_sects, - granularity_aligned_lba - sector_mapped); - - WARN_ON_ONCE((req_sects << 9) > UINT_MAX); - - bio = blk_next_bio(bio, 0, gfp_mask); - bio->bi_iter.bi_sector = sector; - bio_set_dev(bio, bdev); - bio_set_op_attrs(bio, op, 0); - - bio->bi_iter.bi_size = req_sects << 9; - sector += req_sects; - nr_sects -= req_sects; - - /* - * We can loop for a long time in here, if someone does - * full device discards (like mkfs). Be nice and allow - * us to schedule out to avoid softlocking if preempt - * is disabled. - */ - cond_resched(); - } +int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, + sector_t nr_sects, gfp_t gfp_mask, struct bio **biop) +{ + struct bio *bio; - *biop = bio; + while ((bio = blk_alloc_discard_bio(bdev, §or, &nr_sects, + gfp_mask))) + *biop = bio_chain_and_submit(*biop, bio); return 0; } EXPORT_SYMBOL(__blkdev_issue_discard); @@ -124,22 +78,20 @@ EXPORT_SYMBOL(__blkdev_issue_discard); * @sector: start sector * @nr_sects: number of sectors to discard * @gfp_mask: memory allocation flags (for bio_alloc) - * @flags: BLKDEV_DISCARD_* flags to control behaviour * * Description: * Issue a discard request for the sectors in question. */ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, - sector_t nr_sects, gfp_t gfp_mask, unsigned long flags) + sector_t nr_sects, gfp_t gfp_mask) { struct bio *bio = NULL; struct blk_plug plug; - int ret; + int ret = 0; blk_start_plug(&plug); - ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, flags, - &bio); - if (!ret && bio) { + __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, &bio); + if (bio) { ret = submit_bio_wait(bio); if (ret == -EOPNOTSUPP) ret = 0; @@ -151,140 +103,80 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, } EXPORT_SYMBOL(blkdev_issue_discard); -/** - * __blkdev_issue_write_same - generate number of bios with same page - * @bdev: target blockdev - * @sector: start sector - * @nr_sects: number of sectors to write - * @gfp_mask: memory allocation flags (for bio_alloc) - * @page: page containing data to write - * @biop: pointer to anchor bio - * - * Description: - * Generate and issue number of bios(REQ_OP_WRITE_SAME) with same page. - */ -static int __blkdev_issue_write_same(struct block_device *bdev, sector_t sector, - sector_t nr_sects, gfp_t gfp_mask, struct page *page, - struct bio **biop) +static sector_t bio_write_zeroes_limit(struct block_device *bdev) { - struct request_queue *q = bdev_get_queue(bdev); - unsigned int max_write_same_sectors; - struct bio *bio = *biop; - sector_t bs_mask; + sector_t bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1; - if (!q) - return -ENXIO; - - if (bdev_read_only(bdev)) - return -EPERM; + return min(bdev_write_zeroes_sectors(bdev), + (UINT_MAX >> SECTOR_SHIFT) & ~bs_mask); +} - bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1; - if ((sector | nr_sects) & bs_mask) - return -EINVAL; +/* + * There is no reliable way for the SCSI subsystem to determine whether a + * device supports a WRITE SAME operation without actually performing a write + * to media. As a result, write_zeroes is enabled by default and will be + * disabled if a zeroing operation subsequently fails. This means that this + * queue limit is likely to change at runtime. + */ +static void __blkdev_issue_write_zeroes(struct block_device *bdev, + sector_t sector, sector_t nr_sects, gfp_t gfp_mask, + struct bio **biop, unsigned flags, sector_t limit) +{ - if (!bdev_write_same(bdev)) - return -EOPNOTSUPP; + while (nr_sects) { + unsigned int len = min(nr_sects, limit); + struct bio *bio; - /* Ensure that max_write_same_sectors doesn't overflow bi_size */ - max_write_same_sectors = bio_allowed_max_sectors(q); + if ((flags & BLKDEV_ZERO_KILLABLE) && + fatal_signal_pending(current)) + break; - while (nr_sects) { - bio = blk_next_bio(bio, 1, gfp_mask); + bio = bio_alloc(bdev, 0, REQ_OP_WRITE_ZEROES, gfp_mask); bio->bi_iter.bi_sector = sector; - bio_set_dev(bio, bdev); - bio->bi_vcnt = 1; - bio->bi_io_vec->bv_page = page; - bio->bi_io_vec->bv_offset = 0; - bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev); - bio_set_op_attrs(bio, REQ_OP_WRITE_SAME, 0); - - if (nr_sects > max_write_same_sectors) { - bio->bi_iter.bi_size = max_write_same_sectors << 9; - nr_sects -= max_write_same_sectors; - sector += max_write_same_sectors; - } else { - bio->bi_iter.bi_size = nr_sects << 9; - nr_sects = 0; - } + if (flags & BLKDEV_ZERO_NOUNMAP) + bio->bi_opf |= REQ_NOUNMAP; + + bio->bi_iter.bi_size = len << SECTOR_SHIFT; + *biop = bio_chain_and_submit(*biop, bio); + + nr_sects -= len; + sector += len; cond_resched(); } - - *biop = bio; - return 0; } -/** - * blkdev_issue_write_same - queue a write same operation - * @bdev: target blockdev - * @sector: start sector - * @nr_sects: number of sectors to write - * @gfp_mask: memory allocation flags (for bio_alloc) - * @page: page containing data - * - * Description: - * Issue a write same request for the sectors in question. - */ -int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, - sector_t nr_sects, gfp_t gfp_mask, - struct page *page) +static int blkdev_issue_write_zeroes(struct block_device *bdev, sector_t sector, + sector_t nr_sects, gfp_t gfp, unsigned flags) { + sector_t limit = bio_write_zeroes_limit(bdev); struct bio *bio = NULL; struct blk_plug plug; - int ret; + int ret = 0; blk_start_plug(&plug); - ret = __blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask, page, - &bio); - if (ret == 0 && bio) { + __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp, &bio, + flags, limit); + if (bio) { + if ((flags & BLKDEV_ZERO_KILLABLE) && + fatal_signal_pending(current)) { + bio_await_chain(bio); + blk_finish_plug(&plug); + return -EINTR; + } ret = submit_bio_wait(bio); bio_put(bio); } blk_finish_plug(&plug); - return ret; -} -EXPORT_SYMBOL(blkdev_issue_write_same); -static int __blkdev_issue_write_zeroes(struct block_device *bdev, - sector_t sector, sector_t nr_sects, gfp_t gfp_mask, - struct bio **biop, unsigned flags) -{ - struct bio *bio = *biop; - unsigned int max_write_zeroes_sectors; - struct request_queue *q = bdev_get_queue(bdev); - - if (!q) - return -ENXIO; - - if (bdev_read_only(bdev)) - return -EPERM; - - /* Ensure that max_write_zeroes_sectors doesn't overflow bi_size */ - max_write_zeroes_sectors = bdev_write_zeroes_sectors(bdev); - - if (max_write_zeroes_sectors == 0) + /* + * For some devices there is no non-destructive way to verify whether + * WRITE ZEROES is actually supported. These will clear the capability + * on an I/O error, in which case we'll turn any error into + * "not supported" here. + */ + if (ret && !bdev_write_zeroes_sectors(bdev)) return -EOPNOTSUPP; - - while (nr_sects) { - bio = blk_next_bio(bio, 0, gfp_mask); - bio->bi_iter.bi_sector = sector; - bio_set_dev(bio, bdev); - bio->bi_opf = REQ_OP_WRITE_ZEROES; - if (flags & BLKDEV_ZERO_NOUNMAP) - bio->bi_opf |= REQ_NOUNMAP; - - if (nr_sects > max_write_zeroes_sectors) { - bio->bi_iter.bi_size = max_write_zeroes_sectors << 9; - nr_sects -= max_write_zeroes_sectors; - sector += max_write_zeroes_sectors; - } else { - bio->bi_iter.bi_size = nr_sects << 9; - nr_sects = 0; - } - cond_resched(); - } - - *biop = bio; - return 0; + return ret; } /* @@ -300,41 +192,64 @@ static unsigned int __blkdev_sectors_to_bio_pages(sector_t nr_sects) return min(pages, (sector_t)BIO_MAX_VECS); } -static int __blkdev_issue_zero_pages(struct block_device *bdev, +static void __blkdev_issue_zero_pages(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, - struct bio **biop) + struct bio **biop, unsigned int flags) { - struct request_queue *q = bdev_get_queue(bdev); - struct bio *bio = *biop; - int bi_size = 0; - unsigned int sz; + struct folio *zero_folio = largest_zero_folio(); - if (!q) - return -ENXIO; + while (nr_sects) { + unsigned int nr_vecs = __blkdev_sectors_to_bio_pages(nr_sects); + struct bio *bio; - if (bdev_read_only(bdev)) - return -EPERM; + if ((flags & BLKDEV_ZERO_KILLABLE) && + fatal_signal_pending(current)) + break; - while (nr_sects != 0) { - bio = blk_next_bio(bio, __blkdev_sectors_to_bio_pages(nr_sects), - gfp_mask); + bio = bio_alloc(bdev, nr_vecs, REQ_OP_WRITE, gfp_mask); bio->bi_iter.bi_sector = sector; - bio_set_dev(bio, bdev); - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); - - while (nr_sects != 0) { - sz = min((sector_t) PAGE_SIZE, nr_sects << 9); - bi_size = bio_add_page(bio, ZERO_PAGE(0), sz, 0); - nr_sects -= bi_size >> 9; - sector += bi_size >> 9; - if (bi_size < sz) + + do { + unsigned int len; + + len = min_t(sector_t, folio_size(zero_folio), + nr_sects << SECTOR_SHIFT); + if (!bio_add_folio(bio, zero_folio, len, 0)) break; - } + nr_sects -= len >> SECTOR_SHIFT; + sector += len >> SECTOR_SHIFT; + } while (nr_sects); + + *biop = bio_chain_and_submit(*biop, bio); cond_resched(); } +} - *biop = bio; - return 0; +static int blkdev_issue_zero_pages(struct block_device *bdev, sector_t sector, + sector_t nr_sects, gfp_t gfp, unsigned flags) +{ + struct bio *bio = NULL; + struct blk_plug plug; + int ret = 0; + + if (flags & BLKDEV_ZERO_NOFALLBACK) + return -EOPNOTSUPP; + + blk_start_plug(&plug); + __blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp, &bio, flags); + if (bio) { + if ((flags & BLKDEV_ZERO_KILLABLE) && + fatal_signal_pending(current)) { + bio_await_chain(bio); + blk_finish_plug(&plug); + return -EINTR; + } + ret = submit_bio_wait(bio); + bio_put(bio); + } + blk_finish_plug(&plug); + + return ret; } /** @@ -360,20 +275,21 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, struct bio **biop, unsigned flags) { - int ret; - sector_t bs_mask; + sector_t limit = bio_write_zeroes_limit(bdev); - bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1; - if ((sector | nr_sects) & bs_mask) - return -EINVAL; - - ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp_mask, - biop, flags); - if (ret != -EOPNOTSUPP || (flags & BLKDEV_ZERO_NOFALLBACK)) - return ret; + if (bdev_read_only(bdev)) + return -EPERM; - return __blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp_mask, - biop); + if (limit) { + __blkdev_issue_write_zeroes(bdev, sector, nr_sects, + gfp_mask, biop, flags, limit); + } else { + if (flags & BLKDEV_ZERO_NOFALLBACK) + return -EOPNOTSUPP; + __blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp_mask, + biop, flags); + } + return 0; } EXPORT_SYMBOL(__blkdev_issue_zeroout); @@ -393,50 +309,63 @@ EXPORT_SYMBOL(__blkdev_issue_zeroout); int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, unsigned flags) { - int ret = 0; - sector_t bs_mask; - struct bio *bio; + int ret; + + if ((sector | nr_sects) & ((bdev_logical_block_size(bdev) >> 9) - 1)) + return -EINVAL; + if (bdev_read_only(bdev)) + return -EPERM; + + if (bdev_write_zeroes_sectors(bdev)) { + ret = blkdev_issue_write_zeroes(bdev, sector, nr_sects, + gfp_mask, flags); + if (ret != -EOPNOTSUPP) + return ret; + } + + return blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp_mask, flags); +} +EXPORT_SYMBOL(blkdev_issue_zeroout); + +int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector, + sector_t nr_sects, gfp_t gfp) +{ + sector_t bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1; + unsigned int max_sectors = bdev_max_secure_erase_sectors(bdev); + struct bio *bio = NULL; struct blk_plug plug; - bool try_write_zeroes = !!bdev_write_zeroes_sectors(bdev); + int ret = 0; - bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1; + /* make sure that "len << SECTOR_SHIFT" doesn't overflow */ + if (max_sectors > UINT_MAX >> SECTOR_SHIFT) + max_sectors = UINT_MAX >> SECTOR_SHIFT; + max_sectors &= ~bs_mask; + + if (max_sectors == 0) + return -EOPNOTSUPP; if ((sector | nr_sects) & bs_mask) return -EINVAL; + if (bdev_read_only(bdev)) + return -EPERM; -retry: - bio = NULL; blk_start_plug(&plug); - if (try_write_zeroes) { - ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects, - gfp_mask, &bio, flags); - } else if (!(flags & BLKDEV_ZERO_NOFALLBACK)) { - ret = __blkdev_issue_zero_pages(bdev, sector, nr_sects, - gfp_mask, &bio); - } else { - /* No zeroing offload support */ - ret = -EOPNOTSUPP; + while (nr_sects) { + unsigned int len = min_t(sector_t, nr_sects, max_sectors); + + bio = blk_next_bio(bio, bdev, 0, REQ_OP_SECURE_ERASE, gfp); + bio->bi_iter.bi_sector = sector; + bio->bi_iter.bi_size = len << SECTOR_SHIFT; + + sector += len; + nr_sects -= len; + cond_resched(); } - if (ret == 0 && bio) { + if (bio) { ret = submit_bio_wait(bio); bio_put(bio); } blk_finish_plug(&plug); - if (ret && try_write_zeroes) { - if (!(flags & BLKDEV_ZERO_NOFALLBACK)) { - try_write_zeroes = false; - goto retry; - } - if (!bdev_write_zeroes_sectors(bdev)) { - /* - * Zeroing offload support was indicated, but the - * device reported ILLEGAL REQUEST (for some devices - * there is no non-destructive way to verify whether - * WRITE ZEROES is actually supported). - */ - ret = -EOPNOTSUPP; - } - } return ret; } -EXPORT_SYMBOL(blkdev_issue_zeroout); +EXPORT_SYMBOL(blkdev_issue_secure_erase); |
