summaryrefslogtreecommitdiff
path: root/block/blk-merge.c
diff options
context:
space:
mode:
Diffstat (limited to 'block/blk-merge.c')
-rw-r--r--block/blk-merge.c363
1 files changed, 126 insertions, 237 deletions
diff --git a/block/blk-merge.c b/block/blk-merge.c
index e0b28e9298c9..d3115d7469df 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -7,7 +7,6 @@
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/blk-integrity.h>
-#include <linux/scatterlist.h>
#include <linux/part_stat.h>
#include <linux/blk-cgroup.h>
@@ -105,34 +104,58 @@ static unsigned int bio_allowed_max_sectors(const struct queue_limits *lim)
return round_down(UINT_MAX, lim->logical_block_size) >> SECTOR_SHIFT;
}
+/*
+ * bio_submit_split_bioset - Submit a bio, splitting it at a designated sector
+ * @bio: the original bio to be submitted and split
+ * @split_sectors: the sector count at which to split
+ * @bs: the bio set used for allocating the new split bio
+ *
+ * The original bio is modified to contain the remaining sectors and submitted.
+ * The caller is responsible for submitting the returned bio.
+ *
+ * If succeed, the newly allocated bio representing the initial part will be
+ * returned, on failure NULL will be returned and original bio will fail.
+ */
+struct bio *bio_submit_split_bioset(struct bio *bio, unsigned int split_sectors,
+ struct bio_set *bs)
+{
+ struct bio *split = bio_split(bio, split_sectors, GFP_NOIO, bs);
+
+ if (IS_ERR(split)) {
+ bio->bi_status = errno_to_blk_status(PTR_ERR(split));
+ bio_endio(bio);
+ return NULL;
+ }
+
+ bio_chain(split, bio);
+ trace_block_split(split, bio->bi_iter.bi_sector);
+ WARN_ON_ONCE(bio_zone_write_plugging(bio));
+
+ if (should_fail_bio(bio))
+ bio_io_error(bio);
+ else if (!blk_throtl_bio(bio))
+ submit_bio_noacct_nocheck(bio, true);
+
+ return split;
+}
+EXPORT_SYMBOL_GPL(bio_submit_split_bioset);
+
static struct bio *bio_submit_split(struct bio *bio, int split_sectors)
{
- if (unlikely(split_sectors < 0))
- goto error;
+ if (unlikely(split_sectors < 0)) {
+ bio->bi_status = errno_to_blk_status(split_sectors);
+ bio_endio(bio);
+ return NULL;
+ }
if (split_sectors) {
- struct bio *split;
-
- split = bio_split(bio, split_sectors, GFP_NOIO,
+ bio = bio_submit_split_bioset(bio, split_sectors,
&bio->bi_bdev->bd_disk->bio_split);
- if (IS_ERR(split)) {
- split_sectors = PTR_ERR(split);
- goto error;
- }
- split->bi_opf |= REQ_NOMERGE;
- blkcg_bio_issue_init(split);
- bio_chain(split, bio);
- trace_block_split(split, bio->bi_iter.bi_sector);
- WARN_ON_ONCE(bio_zone_write_plugging(bio));
- submit_bio_noacct(bio);
- return split;
+ if (bio)
+ bio->bi_opf |= REQ_NOMERGE;
}
return bio;
-error:
- bio->bi_status = errno_to_blk_status(split_sectors);
- bio_endio(bio);
- return NULL;
}
struct bio *bio_split_discard(struct bio *bio, const struct queue_limits *lim,
@@ -226,27 +249,6 @@ static inline unsigned get_max_io_size(struct bio *bio,
}
/**
- * get_max_segment_size() - maximum number of bytes to add as a single segment
- * @lim: Request queue limits.
- * @paddr: address of the range to add
- * @len: maximum length available to add at @paddr
- *
- * Returns the maximum number of bytes of the range starting at @paddr that can
- * be added to a single segment.
- */
-static inline unsigned get_max_segment_size(const struct queue_limits *lim,
- phys_addr_t paddr, unsigned int len)
-{
- /*
- * Prevent an overflow if mask = ULONG_MAX and offset = 0 by adding 1
- * after having calculated the minimum.
- */
- return min_t(unsigned long, len,
- min(lim->seg_boundary_mask - (lim->seg_boundary_mask & paddr),
- (unsigned long)lim->max_segment_size - 1) + 1);
-}
-
-/**
* bvec_split_segs - verify whether or not a bvec should be split in the middle
* @lim: [in] queue limits to split based on
* @bv: [in] bvec to examine
@@ -270,7 +272,7 @@ static bool bvec_split_segs(const struct queue_limits *lim,
const struct bio_vec *bv, unsigned *nsegs, unsigned *bytes,
unsigned max_segs, unsigned max_bytes)
{
- unsigned max_len = min(max_bytes, UINT_MAX) - *bytes;
+ unsigned max_len = max_bytes - *bytes;
unsigned len = min(bv->bv_len, max_len);
unsigned total_len = 0;
unsigned seg_size = 0;
@@ -300,36 +302,50 @@ static unsigned int bio_split_alignment(struct bio *bio,
return lim->logical_block_size;
}
+static inline unsigned int bvec_seg_gap(struct bio_vec *bvprv,
+ struct bio_vec *bv)
+{
+ return bv->bv_offset | (bvprv->bv_offset + bvprv->bv_len);
+}
+
/**
- * bio_split_rw_at - check if and where to split a read/write bio
+ * bio_split_io_at - check if and where to split a bio
* @bio: [in] bio to be split
* @lim: [in] queue limits to split based on
* @segs: [out] number of segments in the bio with the first half of the sectors
* @max_bytes: [in] maximum number of bytes per bio
+ * @len_align_mask: [in] length alignment mask for each vector
*
* Find out if @bio needs to be split to fit the queue limits in @lim and a
* maximum size of @max_bytes. Returns a negative error number if @bio can't be
* split, 0 if the bio doesn't have to be split, or a positive sector offset if
* @bio needs to be split.
*/
-int bio_split_rw_at(struct bio *bio, const struct queue_limits *lim,
- unsigned *segs, unsigned max_bytes)
+int bio_split_io_at(struct bio *bio, const struct queue_limits *lim,
+ unsigned *segs, unsigned max_bytes, unsigned len_align_mask)
{
struct bio_vec bv, bvprv, *bvprvp = NULL;
+ unsigned nsegs = 0, bytes = 0, gaps = 0;
struct bvec_iter iter;
- unsigned nsegs = 0, bytes = 0;
bio_for_each_bvec(bv, bio, iter) {
+ if (bv.bv_offset & lim->dma_alignment ||
+ bv.bv_len & len_align_mask)
+ return -EINVAL;
+
/*
* If the queue doesn't support SG gaps and adding this
* offset would create a gap, disallow it.
*/
- if (bvprvp && bvec_gap_to_prev(lim, bvprvp, bv.bv_offset))
- goto split;
+ if (bvprvp) {
+ if (bvec_gap_to_prev(lim, bvprvp, bv.bv_offset))
+ goto split;
+ gaps |= bvec_seg_gap(bvprvp, &bv);
+ }
if (nsegs < lim->max_segments &&
bytes + bv.bv_len <= max_bytes &&
- bv.bv_offset + bv.bv_len <= PAGE_SIZE) {
+ bv.bv_offset + bv.bv_len <= lim->max_fast_segment_size) {
nsegs++;
bytes += bv.bv_len;
} else {
@@ -343,6 +359,7 @@ int bio_split_rw_at(struct bio *bio, const struct queue_limits *lim,
}
*segs = nsegs;
+ bio->bi_bvec_gap_bit = ffs(gaps);
return 0;
split:
if (bio->bi_opf & REQ_ATOMIC)
@@ -361,8 +378,16 @@ split:
* Individual bvecs might not be logical block aligned. Round down the
* split size so that each bio is properly block size aligned, even if
* we do not use the full hardware limits.
+ *
+ * It is possible to submit a bio that can't be split into a valid io:
+ * there may either be too many discontiguous vectors for the max
+ * segments limit, or contain virtual boundary gaps without having a
+ * valid block sized split. A zero byte result means one of those
+ * conditions occured.
*/
bytes = ALIGN_DOWN(bytes, bio_split_alignment(bio, lim));
+ if (!bytes)
+ return -EINVAL;
/*
* Bio splitting may cause subtle trouble such as hang when doing sync
@@ -370,9 +395,10 @@ split:
* big IO can be trival, disable iopoll when split needed.
*/
bio_clear_polled(bio);
+ bio->bi_bvec_gap_bit = ffs(gaps);
return bytes >> SECTOR_SHIFT;
}
-EXPORT_SYMBOL_GPL(bio_split_rw_at);
+EXPORT_SYMBOL_GPL(bio_split_io_at);
struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim,
unsigned *nr_segs)
@@ -473,151 +499,6 @@ unsigned int blk_recalc_rq_segments(struct request *rq)
return nr_phys_segs;
}
-static inline struct scatterlist *blk_next_sg(struct scatterlist **sg,
- struct scatterlist *sglist)
-{
- if (!*sg)
- return sglist;
-
- /*
- * If the driver previously mapped a shorter list, we could see a
- * termination bit prematurely unless it fully inits the sg table
- * on each mapping. We KNOW that there must be more entries here
- * or the driver would be buggy, so force clear the termination bit
- * to avoid doing a full sg_init_table() in drivers for each command.
- */
- sg_unmark_end(*sg);
- return sg_next(*sg);
-}
-
-static unsigned blk_bvec_map_sg(struct request_queue *q,
- struct bio_vec *bvec, struct scatterlist *sglist,
- struct scatterlist **sg)
-{
- unsigned nbytes = bvec->bv_len;
- unsigned nsegs = 0, total = 0;
-
- while (nbytes > 0) {
- unsigned offset = bvec->bv_offset + total;
- unsigned len = get_max_segment_size(&q->limits,
- bvec_phys(bvec) + total, nbytes);
- struct page *page = bvec->bv_page;
-
- /*
- * Unfortunately a fair number of drivers barf on scatterlists
- * that have an offset larger than PAGE_SIZE, despite other
- * subsystems dealing with that invariant just fine. For now
- * stick to the legacy format where we never present those from
- * the block layer, but the code below should be removed once
- * these offenders (mostly MMC/SD drivers) are fixed.
- */
- page += (offset >> PAGE_SHIFT);
- offset &= ~PAGE_MASK;
-
- *sg = blk_next_sg(sg, sglist);
- sg_set_page(*sg, page, len, offset);
-
- total += len;
- nbytes -= len;
- nsegs++;
- }
-
- return nsegs;
-}
-
-static inline int __blk_bvec_map_sg(struct bio_vec bv,
- struct scatterlist *sglist, struct scatterlist **sg)
-{
- *sg = blk_next_sg(sg, sglist);
- sg_set_page(*sg, bv.bv_page, bv.bv_len, bv.bv_offset);
- return 1;
-}
-
-/* only try to merge bvecs into one sg if they are from two bios */
-static inline bool
-__blk_segment_map_sg_merge(struct request_queue *q, struct bio_vec *bvec,
- struct bio_vec *bvprv, struct scatterlist **sg)
-{
-
- int nbytes = bvec->bv_len;
-
- if (!*sg)
- return false;
-
- if ((*sg)->length + nbytes > queue_max_segment_size(q))
- return false;
-
- if (!biovec_phys_mergeable(q, bvprv, bvec))
- return false;
-
- (*sg)->length += nbytes;
-
- return true;
-}
-
-static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio,
- struct scatterlist *sglist,
- struct scatterlist **sg)
-{
- struct bio_vec bvec, bvprv = { NULL };
- struct bvec_iter iter;
- int nsegs = 0;
- bool new_bio = false;
-
- for_each_bio(bio) {
- bio_for_each_bvec(bvec, bio, iter) {
- /*
- * Only try to merge bvecs from two bios given we
- * have done bio internal merge when adding pages
- * to bio
- */
- if (new_bio &&
- __blk_segment_map_sg_merge(q, &bvec, &bvprv, sg))
- goto next_bvec;
-
- if (bvec.bv_offset + bvec.bv_len <= PAGE_SIZE)
- nsegs += __blk_bvec_map_sg(bvec, sglist, sg);
- else
- nsegs += blk_bvec_map_sg(q, &bvec, sglist, sg);
- next_bvec:
- new_bio = false;
- }
- if (likely(bio->bi_iter.bi_size)) {
- bvprv = bvec;
- new_bio = true;
- }
- }
-
- return nsegs;
-}
-
-/*
- * map a request to scatterlist, return number of sg entries setup. Caller
- * must make sure sg can hold rq->nr_phys_segments entries
- */
-int __blk_rq_map_sg(struct request_queue *q, struct request *rq,
- struct scatterlist *sglist, struct scatterlist **last_sg)
-{
- int nsegs = 0;
-
- if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
- nsegs = __blk_bvec_map_sg(rq->special_vec, sglist, last_sg);
- else if (rq->bio)
- nsegs = __blk_bios_map_sg(q, rq->bio, sglist, last_sg);
-
- if (*last_sg)
- sg_mark_end(*last_sg);
-
- /*
- * Something must have been wrong if the figured number of
- * segment is bigger than number of req's physical segments
- */
- WARN_ON(nsegs > blk_rq_nr_phys_segments(rq));
-
- return nsegs;
-}
-EXPORT_SYMBOL(__blk_rq_map_sg);
-
static inline unsigned int blk_rq_get_max_sectors(struct request *rq,
sector_t offset)
{
@@ -851,6 +732,24 @@ static bool blk_atomic_write_mergeable_rqs(struct request *rq,
return (rq->cmd_flags & REQ_ATOMIC) == (next->cmd_flags & REQ_ATOMIC);
}
+u8 bio_seg_gap(struct request_queue *q, struct bio *prev, struct bio *next,
+ u8 gaps_bit)
+{
+ struct bio_vec pb, nb;
+
+ if (!bio_has_data(prev))
+ return 0;
+
+ gaps_bit = min_not_zero(gaps_bit, prev->bi_bvec_gap_bit);
+ gaps_bit = min_not_zero(gaps_bit, next->bi_bvec_gap_bit);
+
+ bio_get_last_bvec(prev, &pb);
+ bio_get_first_bvec(next, &nb);
+ if (!biovec_phys_mergeable(q, &pb, &nb))
+ gaps_bit = min_not_zero(gaps_bit, ffs(bvec_seg_gap(&pb, &nb)));
+ return gaps_bit;
+}
+
/*
* For non-mq, this has to be called with the request spinlock acquired.
* For mq with scheduling, the appropriate queue wide lock should be held.
@@ -864,17 +763,12 @@ static struct request *attempt_merge(struct request_queue *q,
if (req_op(req) != req_op(next))
return NULL;
- if (rq_data_dir(req) != rq_data_dir(next))
+ if (req->bio->bi_write_hint != next->bio->bi_write_hint)
+ return NULL;
+ if (req->bio->bi_write_stream != next->bio->bi_write_stream)
+ return NULL;
+ if (req->bio->bi_ioprio != next->bio->bi_ioprio)
return NULL;
-
- if (req->bio && next->bio) {
- /* Don't merge requests with different write hints. */
- if (req->bio->bi_write_hint != next->bio->bi_write_hint)
- return NULL;
- if (req->bio->bi_ioprio != next->bio->bi_ioprio)
- return NULL;
- }
-
if (!blk_atomic_write_mergeable_rqs(req, next))
return NULL;
@@ -920,6 +814,9 @@ static struct request *attempt_merge(struct request_queue *q,
if (next->start_time_ns < req->start_time_ns)
req->start_time_ns = next->start_time_ns;
+ req->phys_gap_bit = bio_seg_gap(req->q, req->biotail, next->bio,
+ min_not_zero(next->phys_gap_bit,
+ req->phys_gap_bit));
req->biotail->bi_next = next->bio;
req->biotail = next->biotail;
@@ -986,30 +883,18 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
if (req_op(rq) != bio_op(bio))
return false;
- /* different data direction or already started, don't merge */
- if (bio_data_dir(bio) != rq_data_dir(rq))
- return false;
-
- /* don't merge across cgroup boundaries */
if (!blk_cgroup_mergeable(rq, bio))
return false;
-
- /* only merge integrity protected bio into ditto rq */
if (blk_integrity_merge_bio(rq->q, rq, bio) == false)
return false;
-
- /* Only merge if the crypt contexts are compatible */
if (!bio_crypt_rq_ctx_compatible(rq, bio))
return false;
-
- if (rq->bio) {
- /* Don't merge requests with different write hints. */
- if (rq->bio->bi_write_hint != bio->bi_write_hint)
- return false;
- if (rq->bio->bi_ioprio != bio->bi_ioprio)
- return false;
- }
-
+ if (rq->bio->bi_write_hint != bio->bi_write_hint)
+ return false;
+ if (rq->bio->bi_write_stream != bio->bi_write_stream)
+ return false;
+ if (rq->bio->bi_ioprio != bio->bi_ioprio)
+ return false;
if (blk_atomic_write_mergeable_rq_bio(rq, bio) == false)
return false;
@@ -1055,6 +940,8 @@ enum bio_merge_status bio_attempt_back_merge(struct request *req,
if (req->rq_flags & RQF_ZONE_WRITE_PLUGGING)
blk_zone_write_plug_bio_merged(bio);
+ req->phys_gap_bit = bio_seg_gap(req->q, req->biotail, bio,
+ req->phys_gap_bit);
req->biotail->bi_next = bio;
req->biotail = bio;
req->__data_len += bio->bi_iter.bi_size;
@@ -1089,6 +976,8 @@ static enum bio_merge_status bio_attempt_front_merge(struct request *req,
blk_update_mixed_merge(req, bio, true);
+ req->phys_gap_bit = bio_seg_gap(req->q, bio, req->bio,
+ req->phys_gap_bit);
bio->bi_next = req->bio;
req->bio = bio;
@@ -1182,20 +1071,20 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
if (!plug || rq_list_empty(&plug->mq_list))
return false;
- rq_list_for_each(&plug->mq_list, rq) {
- if (rq->q == q) {
- if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) ==
- BIO_MERGE_OK)
- return true;
- break;
- }
+ rq = plug->mq_list.tail;
+ if (rq->q == q)
+ return blk_attempt_bio_merge(q, rq, bio, nr_segs, false) ==
+ BIO_MERGE_OK;
+ else if (!plug->multiple_queues)
+ return false;
- /*
- * Only keep iterating plug list for merges if we have multiple
- * queues
- */
- if (!plug->multiple_queues)
- break;
+ rq_list_for_each(&plug->mq_list, rq) {
+ if (rq->q != q)
+ continue;
+ if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) ==
+ BIO_MERGE_OK)
+ return true;
+ break;
}
return false;
}