diff options
Diffstat (limited to 'block/blk-merge.c')
| -rw-r--r-- | block/blk-merge.c | 363 |
1 files changed, 126 insertions, 237 deletions
diff --git a/block/blk-merge.c b/block/blk-merge.c index e0b28e9298c9..d3115d7469df 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -7,7 +7,6 @@ #include <linux/bio.h> #include <linux/blkdev.h> #include <linux/blk-integrity.h> -#include <linux/scatterlist.h> #include <linux/part_stat.h> #include <linux/blk-cgroup.h> @@ -105,34 +104,58 @@ static unsigned int bio_allowed_max_sectors(const struct queue_limits *lim) return round_down(UINT_MAX, lim->logical_block_size) >> SECTOR_SHIFT; } +/* + * bio_submit_split_bioset - Submit a bio, splitting it at a designated sector + * @bio: the original bio to be submitted and split + * @split_sectors: the sector count at which to split + * @bs: the bio set used for allocating the new split bio + * + * The original bio is modified to contain the remaining sectors and submitted. + * The caller is responsible for submitting the returned bio. + * + * If succeed, the newly allocated bio representing the initial part will be + * returned, on failure NULL will be returned and original bio will fail. + */ +struct bio *bio_submit_split_bioset(struct bio *bio, unsigned int split_sectors, + struct bio_set *bs) +{ + struct bio *split = bio_split(bio, split_sectors, GFP_NOIO, bs); + + if (IS_ERR(split)) { + bio->bi_status = errno_to_blk_status(PTR_ERR(split)); + bio_endio(bio); + return NULL; + } + + bio_chain(split, bio); + trace_block_split(split, bio->bi_iter.bi_sector); + WARN_ON_ONCE(bio_zone_write_plugging(bio)); + + if (should_fail_bio(bio)) + bio_io_error(bio); + else if (!blk_throtl_bio(bio)) + submit_bio_noacct_nocheck(bio, true); + + return split; +} +EXPORT_SYMBOL_GPL(bio_submit_split_bioset); + static struct bio *bio_submit_split(struct bio *bio, int split_sectors) { - if (unlikely(split_sectors < 0)) - goto error; + if (unlikely(split_sectors < 0)) { + bio->bi_status = errno_to_blk_status(split_sectors); + bio_endio(bio); + return NULL; + } if (split_sectors) { - struct bio *split; - - split = bio_split(bio, split_sectors, GFP_NOIO, + bio = bio_submit_split_bioset(bio, split_sectors, &bio->bi_bdev->bd_disk->bio_split); - if (IS_ERR(split)) { - split_sectors = PTR_ERR(split); - goto error; - } - split->bi_opf |= REQ_NOMERGE; - blkcg_bio_issue_init(split); - bio_chain(split, bio); - trace_block_split(split, bio->bi_iter.bi_sector); - WARN_ON_ONCE(bio_zone_write_plugging(bio)); - submit_bio_noacct(bio); - return split; + if (bio) + bio->bi_opf |= REQ_NOMERGE; } return bio; -error: - bio->bi_status = errno_to_blk_status(split_sectors); - bio_endio(bio); - return NULL; } struct bio *bio_split_discard(struct bio *bio, const struct queue_limits *lim, @@ -226,27 +249,6 @@ static inline unsigned get_max_io_size(struct bio *bio, } /** - * get_max_segment_size() - maximum number of bytes to add as a single segment - * @lim: Request queue limits. - * @paddr: address of the range to add - * @len: maximum length available to add at @paddr - * - * Returns the maximum number of bytes of the range starting at @paddr that can - * be added to a single segment. - */ -static inline unsigned get_max_segment_size(const struct queue_limits *lim, - phys_addr_t paddr, unsigned int len) -{ - /* - * Prevent an overflow if mask = ULONG_MAX and offset = 0 by adding 1 - * after having calculated the minimum. - */ - return min_t(unsigned long, len, - min(lim->seg_boundary_mask - (lim->seg_boundary_mask & paddr), - (unsigned long)lim->max_segment_size - 1) + 1); -} - -/** * bvec_split_segs - verify whether or not a bvec should be split in the middle * @lim: [in] queue limits to split based on * @bv: [in] bvec to examine @@ -270,7 +272,7 @@ static bool bvec_split_segs(const struct queue_limits *lim, const struct bio_vec *bv, unsigned *nsegs, unsigned *bytes, unsigned max_segs, unsigned max_bytes) { - unsigned max_len = min(max_bytes, UINT_MAX) - *bytes; + unsigned max_len = max_bytes - *bytes; unsigned len = min(bv->bv_len, max_len); unsigned total_len = 0; unsigned seg_size = 0; @@ -300,36 +302,50 @@ static unsigned int bio_split_alignment(struct bio *bio, return lim->logical_block_size; } +static inline unsigned int bvec_seg_gap(struct bio_vec *bvprv, + struct bio_vec *bv) +{ + return bv->bv_offset | (bvprv->bv_offset + bvprv->bv_len); +} + /** - * bio_split_rw_at - check if and where to split a read/write bio + * bio_split_io_at - check if and where to split a bio * @bio: [in] bio to be split * @lim: [in] queue limits to split based on * @segs: [out] number of segments in the bio with the first half of the sectors * @max_bytes: [in] maximum number of bytes per bio + * @len_align_mask: [in] length alignment mask for each vector * * Find out if @bio needs to be split to fit the queue limits in @lim and a * maximum size of @max_bytes. Returns a negative error number if @bio can't be * split, 0 if the bio doesn't have to be split, or a positive sector offset if * @bio needs to be split. */ -int bio_split_rw_at(struct bio *bio, const struct queue_limits *lim, - unsigned *segs, unsigned max_bytes) +int bio_split_io_at(struct bio *bio, const struct queue_limits *lim, + unsigned *segs, unsigned max_bytes, unsigned len_align_mask) { struct bio_vec bv, bvprv, *bvprvp = NULL; + unsigned nsegs = 0, bytes = 0, gaps = 0; struct bvec_iter iter; - unsigned nsegs = 0, bytes = 0; bio_for_each_bvec(bv, bio, iter) { + if (bv.bv_offset & lim->dma_alignment || + bv.bv_len & len_align_mask) + return -EINVAL; + /* * If the queue doesn't support SG gaps and adding this * offset would create a gap, disallow it. */ - if (bvprvp && bvec_gap_to_prev(lim, bvprvp, bv.bv_offset)) - goto split; + if (bvprvp) { + if (bvec_gap_to_prev(lim, bvprvp, bv.bv_offset)) + goto split; + gaps |= bvec_seg_gap(bvprvp, &bv); + } if (nsegs < lim->max_segments && bytes + bv.bv_len <= max_bytes && - bv.bv_offset + bv.bv_len <= PAGE_SIZE) { + bv.bv_offset + bv.bv_len <= lim->max_fast_segment_size) { nsegs++; bytes += bv.bv_len; } else { @@ -343,6 +359,7 @@ int bio_split_rw_at(struct bio *bio, const struct queue_limits *lim, } *segs = nsegs; + bio->bi_bvec_gap_bit = ffs(gaps); return 0; split: if (bio->bi_opf & REQ_ATOMIC) @@ -361,8 +378,16 @@ split: * Individual bvecs might not be logical block aligned. Round down the * split size so that each bio is properly block size aligned, even if * we do not use the full hardware limits. + * + * It is possible to submit a bio that can't be split into a valid io: + * there may either be too many discontiguous vectors for the max + * segments limit, or contain virtual boundary gaps without having a + * valid block sized split. A zero byte result means one of those + * conditions occured. */ bytes = ALIGN_DOWN(bytes, bio_split_alignment(bio, lim)); + if (!bytes) + return -EINVAL; /* * Bio splitting may cause subtle trouble such as hang when doing sync @@ -370,9 +395,10 @@ split: * big IO can be trival, disable iopoll when split needed. */ bio_clear_polled(bio); + bio->bi_bvec_gap_bit = ffs(gaps); return bytes >> SECTOR_SHIFT; } -EXPORT_SYMBOL_GPL(bio_split_rw_at); +EXPORT_SYMBOL_GPL(bio_split_io_at); struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim, unsigned *nr_segs) @@ -473,151 +499,6 @@ unsigned int blk_recalc_rq_segments(struct request *rq) return nr_phys_segs; } -static inline struct scatterlist *blk_next_sg(struct scatterlist **sg, - struct scatterlist *sglist) -{ - if (!*sg) - return sglist; - - /* - * If the driver previously mapped a shorter list, we could see a - * termination bit prematurely unless it fully inits the sg table - * on each mapping. We KNOW that there must be more entries here - * or the driver would be buggy, so force clear the termination bit - * to avoid doing a full sg_init_table() in drivers for each command. - */ - sg_unmark_end(*sg); - return sg_next(*sg); -} - -static unsigned blk_bvec_map_sg(struct request_queue *q, - struct bio_vec *bvec, struct scatterlist *sglist, - struct scatterlist **sg) -{ - unsigned nbytes = bvec->bv_len; - unsigned nsegs = 0, total = 0; - - while (nbytes > 0) { - unsigned offset = bvec->bv_offset + total; - unsigned len = get_max_segment_size(&q->limits, - bvec_phys(bvec) + total, nbytes); - struct page *page = bvec->bv_page; - - /* - * Unfortunately a fair number of drivers barf on scatterlists - * that have an offset larger than PAGE_SIZE, despite other - * subsystems dealing with that invariant just fine. For now - * stick to the legacy format where we never present those from - * the block layer, but the code below should be removed once - * these offenders (mostly MMC/SD drivers) are fixed. - */ - page += (offset >> PAGE_SHIFT); - offset &= ~PAGE_MASK; - - *sg = blk_next_sg(sg, sglist); - sg_set_page(*sg, page, len, offset); - - total += len; - nbytes -= len; - nsegs++; - } - - return nsegs; -} - -static inline int __blk_bvec_map_sg(struct bio_vec bv, - struct scatterlist *sglist, struct scatterlist **sg) -{ - *sg = blk_next_sg(sg, sglist); - sg_set_page(*sg, bv.bv_page, bv.bv_len, bv.bv_offset); - return 1; -} - -/* only try to merge bvecs into one sg if they are from two bios */ -static inline bool -__blk_segment_map_sg_merge(struct request_queue *q, struct bio_vec *bvec, - struct bio_vec *bvprv, struct scatterlist **sg) -{ - - int nbytes = bvec->bv_len; - - if (!*sg) - return false; - - if ((*sg)->length + nbytes > queue_max_segment_size(q)) - return false; - - if (!biovec_phys_mergeable(q, bvprv, bvec)) - return false; - - (*sg)->length += nbytes; - - return true; -} - -static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio, - struct scatterlist *sglist, - struct scatterlist **sg) -{ - struct bio_vec bvec, bvprv = { NULL }; - struct bvec_iter iter; - int nsegs = 0; - bool new_bio = false; - - for_each_bio(bio) { - bio_for_each_bvec(bvec, bio, iter) { - /* - * Only try to merge bvecs from two bios given we - * have done bio internal merge when adding pages - * to bio - */ - if (new_bio && - __blk_segment_map_sg_merge(q, &bvec, &bvprv, sg)) - goto next_bvec; - - if (bvec.bv_offset + bvec.bv_len <= PAGE_SIZE) - nsegs += __blk_bvec_map_sg(bvec, sglist, sg); - else - nsegs += blk_bvec_map_sg(q, &bvec, sglist, sg); - next_bvec: - new_bio = false; - } - if (likely(bio->bi_iter.bi_size)) { - bvprv = bvec; - new_bio = true; - } - } - - return nsegs; -} - -/* - * map a request to scatterlist, return number of sg entries setup. Caller - * must make sure sg can hold rq->nr_phys_segments entries - */ -int __blk_rq_map_sg(struct request_queue *q, struct request *rq, - struct scatterlist *sglist, struct scatterlist **last_sg) -{ - int nsegs = 0; - - if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) - nsegs = __blk_bvec_map_sg(rq->special_vec, sglist, last_sg); - else if (rq->bio) - nsegs = __blk_bios_map_sg(q, rq->bio, sglist, last_sg); - - if (*last_sg) - sg_mark_end(*last_sg); - - /* - * Something must have been wrong if the figured number of - * segment is bigger than number of req's physical segments - */ - WARN_ON(nsegs > blk_rq_nr_phys_segments(rq)); - - return nsegs; -} -EXPORT_SYMBOL(__blk_rq_map_sg); - static inline unsigned int blk_rq_get_max_sectors(struct request *rq, sector_t offset) { @@ -851,6 +732,24 @@ static bool blk_atomic_write_mergeable_rqs(struct request *rq, return (rq->cmd_flags & REQ_ATOMIC) == (next->cmd_flags & REQ_ATOMIC); } +u8 bio_seg_gap(struct request_queue *q, struct bio *prev, struct bio *next, + u8 gaps_bit) +{ + struct bio_vec pb, nb; + + if (!bio_has_data(prev)) + return 0; + + gaps_bit = min_not_zero(gaps_bit, prev->bi_bvec_gap_bit); + gaps_bit = min_not_zero(gaps_bit, next->bi_bvec_gap_bit); + + bio_get_last_bvec(prev, &pb); + bio_get_first_bvec(next, &nb); + if (!biovec_phys_mergeable(q, &pb, &nb)) + gaps_bit = min_not_zero(gaps_bit, ffs(bvec_seg_gap(&pb, &nb))); + return gaps_bit; +} + /* * For non-mq, this has to be called with the request spinlock acquired. * For mq with scheduling, the appropriate queue wide lock should be held. @@ -864,17 +763,12 @@ static struct request *attempt_merge(struct request_queue *q, if (req_op(req) != req_op(next)) return NULL; - if (rq_data_dir(req) != rq_data_dir(next)) + if (req->bio->bi_write_hint != next->bio->bi_write_hint) + return NULL; + if (req->bio->bi_write_stream != next->bio->bi_write_stream) + return NULL; + if (req->bio->bi_ioprio != next->bio->bi_ioprio) return NULL; - - if (req->bio && next->bio) { - /* Don't merge requests with different write hints. */ - if (req->bio->bi_write_hint != next->bio->bi_write_hint) - return NULL; - if (req->bio->bi_ioprio != next->bio->bi_ioprio) - return NULL; - } - if (!blk_atomic_write_mergeable_rqs(req, next)) return NULL; @@ -920,6 +814,9 @@ static struct request *attempt_merge(struct request_queue *q, if (next->start_time_ns < req->start_time_ns) req->start_time_ns = next->start_time_ns; + req->phys_gap_bit = bio_seg_gap(req->q, req->biotail, next->bio, + min_not_zero(next->phys_gap_bit, + req->phys_gap_bit)); req->biotail->bi_next = next->bio; req->biotail = next->biotail; @@ -986,30 +883,18 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio) if (req_op(rq) != bio_op(bio)) return false; - /* different data direction or already started, don't merge */ - if (bio_data_dir(bio) != rq_data_dir(rq)) - return false; - - /* don't merge across cgroup boundaries */ if (!blk_cgroup_mergeable(rq, bio)) return false; - - /* only merge integrity protected bio into ditto rq */ if (blk_integrity_merge_bio(rq->q, rq, bio) == false) return false; - - /* Only merge if the crypt contexts are compatible */ if (!bio_crypt_rq_ctx_compatible(rq, bio)) return false; - - if (rq->bio) { - /* Don't merge requests with different write hints. */ - if (rq->bio->bi_write_hint != bio->bi_write_hint) - return false; - if (rq->bio->bi_ioprio != bio->bi_ioprio) - return false; - } - + if (rq->bio->bi_write_hint != bio->bi_write_hint) + return false; + if (rq->bio->bi_write_stream != bio->bi_write_stream) + return false; + if (rq->bio->bi_ioprio != bio->bi_ioprio) + return false; if (blk_atomic_write_mergeable_rq_bio(rq, bio) == false) return false; @@ -1055,6 +940,8 @@ enum bio_merge_status bio_attempt_back_merge(struct request *req, if (req->rq_flags & RQF_ZONE_WRITE_PLUGGING) blk_zone_write_plug_bio_merged(bio); + req->phys_gap_bit = bio_seg_gap(req->q, req->biotail, bio, + req->phys_gap_bit); req->biotail->bi_next = bio; req->biotail = bio; req->__data_len += bio->bi_iter.bi_size; @@ -1089,6 +976,8 @@ static enum bio_merge_status bio_attempt_front_merge(struct request *req, blk_update_mixed_merge(req, bio, true); + req->phys_gap_bit = bio_seg_gap(req->q, bio, req->bio, + req->phys_gap_bit); bio->bi_next = req->bio; req->bio = bio; @@ -1182,20 +1071,20 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, if (!plug || rq_list_empty(&plug->mq_list)) return false; - rq_list_for_each(&plug->mq_list, rq) { - if (rq->q == q) { - if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) == - BIO_MERGE_OK) - return true; - break; - } + rq = plug->mq_list.tail; + if (rq->q == q) + return blk_attempt_bio_merge(q, rq, bio, nr_segs, false) == + BIO_MERGE_OK; + else if (!plug->multiple_queues) + return false; - /* - * Only keep iterating plug list for merges if we have multiple - * queues - */ - if (!plug->multiple_queues) - break; + rq_list_for_each(&plug->mq_list, rq) { + if (rq->q != q) + continue; + if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) == + BIO_MERGE_OK) + return true; + break; } return false; } |
