diff options
Diffstat (limited to 'include/linux/bio.h')
| -rw-r--r-- | include/linux/bio.h | 846 |
1 files changed, 458 insertions, 388 deletions
diff --git a/include/linux/bio.h b/include/linux/bio.h index ec48bac5b039..ad2d57908c1c 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -1,158 +1,199 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* - * 2.5 block I/O model - * * Copyright (C) 2001 Jens Axboe <axboe@suse.de> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public Licens - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- */ #ifndef __LINUX_BIO_H #define __LINUX_BIO_H -#include <linux/highmem.h> #include <linux/mempool.h> -#include <linux/ioprio.h> -#include <linux/bug.h> +/* struct bio, bio_vec and BIO_* flags are defined in blk_types.h */ +#include <linux/blk_types.h> +#include <linux/uio.h> -#ifdef CONFIG_BLOCK +#define BIO_MAX_VECS 256U +#define BIO_MAX_INLINE_VECS UIO_MAXIOV -#include <asm/io.h> +struct queue_limits; -/* struct bio, bio_vec and BIO_* flags are defined in blk_types.h */ -#include <linux/blk_types.h> +static inline unsigned int bio_max_segs(unsigned int nr_segs) +{ + return min(nr_segs, BIO_MAX_VECS); +} + +#define bio_iter_iovec(bio, iter) \ + bvec_iter_bvec((bio)->bi_io_vec, (iter)) + +#define bio_iter_page(bio, iter) \ + bvec_iter_page((bio)->bi_io_vec, (iter)) +#define bio_iter_len(bio, iter) \ + bvec_iter_len((bio)->bi_io_vec, (iter)) +#define bio_iter_offset(bio, iter) \ + bvec_iter_offset((bio)->bi_io_vec, (iter)) -#define BIO_DEBUG +#define bio_page(bio) bio_iter_page((bio), (bio)->bi_iter) +#define bio_offset(bio) bio_iter_offset((bio), (bio)->bi_iter) +#define bio_iovec(bio) bio_iter_iovec((bio), (bio)->bi_iter) -#ifdef BIO_DEBUG -#define BIO_BUG_ON BUG_ON -#else -#define BIO_BUG_ON -#endif +#define bvec_iter_sectors(iter) ((iter).bi_size >> 9) +#define bvec_iter_end_sector(iter) ((iter).bi_sector + bvec_iter_sectors((iter))) -#define BIO_MAX_PAGES 256 -#define BIO_MAX_SIZE (BIO_MAX_PAGES << PAGE_CACHE_SHIFT) -#define BIO_MAX_SECTORS (BIO_MAX_SIZE >> 9) +#define bio_sectors(bio) bvec_iter_sectors((bio)->bi_iter) +#define bio_end_sector(bio) bvec_iter_end_sector((bio)->bi_iter) /* - * upper 16 bits of bi_rw define the io priority of this bio + * Return the data direction, READ or WRITE. */ -#define BIO_PRIO_SHIFT (8 * sizeof(unsigned long) - IOPRIO_BITS) -#define bio_prio(bio) ((bio)->bi_rw >> BIO_PRIO_SHIFT) -#define bio_prio_valid(bio) ioprio_valid(bio_prio(bio)) - -#define bio_set_prio(bio, prio) do { \ - WARN_ON(prio >= (1 << IOPRIO_BITS)); \ - (bio)->bi_rw &= ((1UL << BIO_PRIO_SHIFT) - 1); \ - (bio)->bi_rw |= ((unsigned long) (prio) << BIO_PRIO_SHIFT); \ -} while (0) +#define bio_data_dir(bio) \ + (op_is_write(bio_op(bio)) ? WRITE : READ) /* - * various member access, note that bio_data should of course not be used - * on highmem page vectors + * Check whether this bio carries any data or not. A NULL bio is allowed. */ -#define bio_iovec_idx(bio, idx) (&((bio)->bi_io_vec[(idx)])) -#define bio_iovec(bio) bio_iovec_idx((bio), (bio)->bi_idx) -#define bio_page(bio) bio_iovec((bio))->bv_page -#define bio_offset(bio) bio_iovec((bio))->bv_offset -#define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_idx) -#define bio_sectors(bio) ((bio)->bi_size >> 9) -#define bio_end_sector(bio) ((bio)->bi_sector + bio_sectors((bio))) +static inline bool bio_has_data(struct bio *bio) +{ + if (bio && + bio->bi_iter.bi_size && + bio_op(bio) != REQ_OP_DISCARD && + bio_op(bio) != REQ_OP_SECURE_ERASE && + bio_op(bio) != REQ_OP_WRITE_ZEROES) + return true; + + return false; +} -static inline unsigned int bio_cur_bytes(struct bio *bio) +static inline bool bio_no_advance_iter(const struct bio *bio) { - if (bio->bi_vcnt) - return bio_iovec(bio)->bv_len; - else /* dataless requests such as discard */ - return bio->bi_size; + return bio_op(bio) == REQ_OP_DISCARD || + bio_op(bio) == REQ_OP_SECURE_ERASE || + bio_op(bio) == REQ_OP_WRITE_ZEROES; } static inline void *bio_data(struct bio *bio) { - if (bio->bi_vcnt) + if (bio_has_data(bio)) return page_address(bio_page(bio)) + bio_offset(bio); return NULL; } -/* - * will die - */ -#define bio_to_phys(bio) (page_to_phys(bio_page((bio))) + (unsigned long) bio_offset((bio))) -#define bvec_to_phys(bv) (page_to_phys((bv)->bv_page) + (unsigned long) (bv)->bv_offset) +static inline bool bio_next_segment(const struct bio *bio, + struct bvec_iter_all *iter) +{ + if (iter->idx >= bio->bi_vcnt) + return false; + + bvec_advance(&bio->bi_io_vec[iter->idx], iter); + return true; +} /* - * queues that have highmem support enabled may still need to revert to - * PIO transfers occasionally and thus map high pages temporarily. For - * permanent PIO fall back, user is probably better off disabling highmem - * I/O completely on that queue (see ide-dma for example) + * drivers should _never_ use the all version - the bio may have been split + * before it got to the driver and the driver won't own all of it */ -#define __bio_kmap_atomic(bio, idx) \ - (kmap_atomic(bio_iovec_idx((bio), (idx))->bv_page) + \ - bio_iovec_idx((bio), (idx))->bv_offset) +#define bio_for_each_segment_all(bvl, bio, iter) \ + for (bvl = bvec_init_iter_all(&iter); bio_next_segment((bio), &iter); ) -#define __bio_kunmap_atomic(addr) kunmap_atomic(addr) +static inline void bio_advance_iter(const struct bio *bio, + struct bvec_iter *iter, unsigned int bytes) +{ + iter->bi_sector += bytes >> 9; -/* - * merge helpers etc - */ + if (bio_no_advance_iter(bio)) + iter->bi_size -= bytes; + else + bvec_iter_advance(bio->bi_io_vec, iter, bytes); + /* TODO: It is reasonable to complete bio with error here. */ +} -#define __BVEC_END(bio) bio_iovec_idx((bio), (bio)->bi_vcnt - 1) -#define __BVEC_START(bio) bio_iovec_idx((bio), (bio)->bi_idx) +/* @bytes should be less or equal to bvec[i->bi_idx].bv_len */ +static inline void bio_advance_iter_single(const struct bio *bio, + struct bvec_iter *iter, + unsigned int bytes) +{ + iter->bi_sector += bytes >> 9; -/* Default implementation of BIOVEC_PHYS_MERGEABLE */ -#define __BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ - ((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2))) + if (bio_no_advance_iter(bio)) + iter->bi_size -= bytes; + else + bvec_iter_advance_single(bio->bi_io_vec, iter, bytes); +} -/* - * allow arch override, for eg virtualized architectures (put in asm/io.h) +void __bio_advance(struct bio *, unsigned bytes); + +/** + * bio_advance - increment/complete a bio by some number of bytes + * @bio: bio to advance + * @nbytes: number of bytes to complete + * + * This updates bi_sector, bi_size and bi_idx; if the number of bytes to + * complete doesn't align with a bvec boundary, then bv_len and bv_offset will + * be updated on the last bvec as well. + * + * @bio will then represent the remaining, uncompleted portion of the io. */ -#ifndef BIOVEC_PHYS_MERGEABLE -#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ - __BIOVEC_PHYS_MERGEABLE(vec1, vec2) -#endif +static inline void bio_advance(struct bio *bio, unsigned int nbytes) +{ + if (nbytes == bio->bi_iter.bi_size) { + bio->bi_iter.bi_size = 0; + return; + } + __bio_advance(bio, nbytes); +} -#define __BIO_SEG_BOUNDARY(addr1, addr2, mask) \ - (((addr1) | (mask)) == (((addr2) - 1) | (mask))) -#define BIOVEC_SEG_BOUNDARY(q, b1, b2) \ - __BIO_SEG_BOUNDARY(bvec_to_phys((b1)), bvec_to_phys((b2)) + (b2)->bv_len, queue_segment_boundary((q))) -#define BIO_SEG_BOUNDARY(q, b1, b2) \ - BIOVEC_SEG_BOUNDARY((q), __BVEC_END((b1)), __BVEC_START((b2))) +#define __bio_for_each_segment(bvl, bio, iter, start) \ + for (iter = (start); \ + (iter).bi_size && \ + ((bvl = bio_iter_iovec((bio), (iter))), 1); \ + bio_advance_iter_single((bio), &(iter), (bvl).bv_len)) -#define bio_io_error(bio) bio_endio((bio), -EIO) +#define bio_for_each_segment(bvl, bio, iter) \ + __bio_for_each_segment(bvl, bio, iter, (bio)->bi_iter) -/* - * drivers should not use the __ version unless they _really_ know what - * they're doing - */ -#define __bio_for_each_segment(bvl, bio, i, start_idx) \ - for (bvl = bio_iovec_idx((bio), (start_idx)), i = (start_idx); \ - i < (bio)->bi_vcnt; \ - bvl++, i++) +#define __bio_for_each_bvec(bvl, bio, iter, start) \ + for (iter = (start); \ + (iter).bi_size && \ + ((bvl = mp_bvec_iter_bvec((bio)->bi_io_vec, (iter))), 1); \ + bio_advance_iter_single((bio), &(iter), (bvl).bv_len)) + +/* iterate over multi-page bvec */ +#define bio_for_each_bvec(bvl, bio, iter) \ + __bio_for_each_bvec(bvl, bio, iter, (bio)->bi_iter) /* - * drivers should _never_ use the all version - the bio may have been split - * before it got to the driver and the driver won't own all of it + * Iterate over all multi-page bvecs. Drivers shouldn't use this version for the + * same reasons as bio_for_each_segment_all(). */ -#define bio_for_each_segment_all(bvl, bio, i) \ - for (i = 0; \ - bvl = bio_iovec_idx((bio), (i)), i < (bio)->bi_vcnt; \ - i++) +#define bio_for_each_bvec_all(bvl, bio, i) \ + for (i = 0, bvl = bio_first_bvec_all(bio); \ + i < (bio)->bi_vcnt; i++, bvl++) + +#define bio_iter_last(bvec, iter) ((iter).bi_size == (bvec).bv_len) -#define bio_for_each_segment(bvl, bio, i) \ - for (i = (bio)->bi_idx; \ - bvl = bio_iovec_idx((bio), (i)), i < (bio)->bi_vcnt; \ - i++) +static inline unsigned bio_segments(struct bio *bio) +{ + unsigned segs = 0; + struct bio_vec bv; + struct bvec_iter iter; + + /* + * We special case discard/write same/write zeroes, because they + * interpret bi_size differently: + */ + + switch (bio_op(bio)) { + case REQ_OP_DISCARD: + case REQ_OP_SECURE_ERASE: + case REQ_OP_WRITE_ZEROES: + return 0; + default: + break; + } + + bio_for_each_segment(bv, bio, iter) + segs++; + + return segs; +} /* * get a reference to a bio, so it won't disappear. the intended use is @@ -168,233 +209,300 @@ static inline void *bio_data(struct bio *bio) * returns. and then bio would be freed memory when if (bio->bi_flags ...) * runs */ -#define bio_get(bio) atomic_inc(&(bio)->bi_cnt) - -#if defined(CONFIG_BLK_DEV_INTEGRITY) -/* - * bio integrity payload - */ -struct bio_integrity_payload { - struct bio *bip_bio; /* parent bio */ - - sector_t bip_sector; /* virtual start sector */ - - void *bip_buf; /* generated integrity data */ - bio_end_io_t *bip_end_io; /* saved I/O completion fn */ - - unsigned int bip_size; - - unsigned short bip_slab; /* slab the bip came from */ - unsigned short bip_vcnt; /* # of integrity bio_vecs */ - unsigned short bip_idx; /* current bip_vec index */ - unsigned bip_owns_buf:1; /* should free bip_buf */ - - struct work_struct bip_work; /* I/O completion */ - - struct bio_vec *bip_vec; - struct bio_vec bip_inline_vecs[0];/* embedded bvec array */ -}; -#endif /* CONFIG_BLK_DEV_INTEGRITY */ - -/* - * A bio_pair is used when we need to split a bio. - * This can only happen for a bio that refers to just one - * page of data, and in the unusual situation when the - * page crosses a chunk/device boundary - * - * The address of the master bio is stored in bio1.bi_private - * The address of the pool the pair was allocated from is stored - * in bio2.bi_private - */ -struct bio_pair { - struct bio bio1, bio2; - struct bio_vec bv1, bv2; -#if defined(CONFIG_BLK_DEV_INTEGRITY) - struct bio_integrity_payload bip1, bip2; - struct bio_vec iv1, iv2; -#endif - atomic_t cnt; - int error; -}; -extern struct bio_pair *bio_split(struct bio *bi, int first_sectors); -extern void bio_pair_release(struct bio_pair *dbio); - -extern struct bio_set *bioset_create(unsigned int, unsigned int); -extern void bioset_free(struct bio_set *); -extern mempool_t *biovec_create_pool(struct bio_set *bs, int pool_entries); +static inline void bio_get(struct bio *bio) +{ + bio->bi_flags |= (1 << BIO_REFFED); + smp_mb__before_atomic(); + atomic_inc(&bio->__bi_cnt); +} -extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *); -extern void bio_put(struct bio *); +static inline void bio_cnt_set(struct bio *bio, unsigned int count) +{ + if (count != 1) { + bio->bi_flags |= (1 << BIO_REFFED); + smp_mb(); + } + atomic_set(&bio->__bi_cnt, count); +} -extern void __bio_clone(struct bio *, struct bio *); -extern struct bio *bio_clone_bioset(struct bio *, gfp_t, struct bio_set *bs); +static inline bool bio_flagged(struct bio *bio, unsigned int bit) +{ + return bio->bi_flags & (1U << bit); +} -extern struct bio_set *fs_bio_set; +static inline void bio_set_flag(struct bio *bio, unsigned int bit) +{ + bio->bi_flags |= (1U << bit); +} -static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) +static inline void bio_clear_flag(struct bio *bio, unsigned int bit) { - return bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set); + bio->bi_flags &= ~(1U << bit); } -static inline struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask) +static inline struct bio_vec *bio_first_bvec_all(struct bio *bio) { - return bio_clone_bioset(bio, gfp_mask, fs_bio_set); + WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)); + return bio->bi_io_vec; } -static inline struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs) +static inline struct page *bio_first_page_all(struct bio *bio) { - return bio_alloc_bioset(gfp_mask, nr_iovecs, NULL); + return bio_first_bvec_all(bio)->bv_page; } -static inline struct bio *bio_clone_kmalloc(struct bio *bio, gfp_t gfp_mask) +static inline struct folio *bio_first_folio_all(struct bio *bio) { - return bio_clone_bioset(bio, gfp_mask, NULL); + return page_folio(bio_first_page_all(bio)); +} +static inline struct bio_vec *bio_last_bvec_all(struct bio *bio) +{ + WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)); + return &bio->bi_io_vec[bio->bi_vcnt - 1]; } -extern void bio_endio(struct bio *, int); -struct request_queue; -extern int bio_phys_segments(struct request_queue *, struct bio *); - -extern int submit_bio_wait(int rw, struct bio *bio); -extern void bio_advance(struct bio *, unsigned); - -extern void bio_init(struct bio *); -extern void bio_reset(struct bio *); - -extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int); -extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, - unsigned int, unsigned int); -extern int bio_get_nr_vecs(struct block_device *); -extern sector_t bio_sector_offset(struct bio *, unsigned short, unsigned int); -extern struct bio *bio_map_user(struct request_queue *, struct block_device *, - unsigned long, unsigned int, int, gfp_t); -struct sg_iovec; -struct rq_map_data; -extern struct bio *bio_map_user_iov(struct request_queue *, - struct block_device *, - struct sg_iovec *, int, int, gfp_t); -extern void bio_unmap_user(struct bio *); -extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int, - gfp_t); -extern struct bio *bio_copy_kern(struct request_queue *, void *, unsigned int, - gfp_t, int); -extern void bio_set_pages_dirty(struct bio *bio); -extern void bio_check_pages_dirty(struct bio *bio); +/** + * struct folio_iter - State for iterating all folios in a bio. + * @folio: The current folio we're iterating. NULL after the last folio. + * @offset: The byte offset within the current folio. + * @length: The number of bytes in this iteration (will not cross folio + * boundary). + */ +struct folio_iter { + struct folio *folio; + size_t offset; + size_t length; + /* private: for use by the iterator */ + struct folio *_next; + size_t _seg_count; + int _i; +}; -#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE -# error "You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform" -#endif -#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE -extern void bio_flush_dcache_pages(struct bio *bi); -#else -static inline void bio_flush_dcache_pages(struct bio *bi) +static inline void bio_first_folio(struct folio_iter *fi, struct bio *bio, + int i) { -} -#endif + struct bio_vec *bvec = bio_first_bvec_all(bio) + i; -extern void bio_copy_data(struct bio *dst, struct bio *src); -extern int bio_alloc_pages(struct bio *bio, gfp_t gfp); - -extern struct bio *bio_copy_user(struct request_queue *, struct rq_map_data *, - unsigned long, unsigned int, int, gfp_t); -extern struct bio *bio_copy_user_iov(struct request_queue *, - struct rq_map_data *, struct sg_iovec *, - int, int, gfp_t); -extern int bio_uncopy_user(struct bio *); -void zero_fill_bio(struct bio *bio); -extern struct bio_vec *bvec_alloc(gfp_t, int, unsigned long *, mempool_t *); -extern void bvec_free(mempool_t *, struct bio_vec *, unsigned int); -extern unsigned int bvec_nr_vecs(unsigned short idx); + if (unlikely(i >= bio->bi_vcnt)) { + fi->folio = NULL; + return; + } -#ifdef CONFIG_BLK_CGROUP -int bio_associate_current(struct bio *bio); -void bio_disassociate_task(struct bio *bio); -#else /* CONFIG_BLK_CGROUP */ -static inline int bio_associate_current(struct bio *bio) { return -ENOENT; } -static inline void bio_disassociate_task(struct bio *bio) { } -#endif /* CONFIG_BLK_CGROUP */ + fi->folio = page_folio(bvec->bv_page); + fi->offset = bvec->bv_offset + + PAGE_SIZE * folio_page_idx(fi->folio, bvec->bv_page); + fi->_seg_count = bvec->bv_len; + fi->length = min(folio_size(fi->folio) - fi->offset, fi->_seg_count); + fi->_next = folio_next(fi->folio); + fi->_i = i; +} + +static inline void bio_next_folio(struct folio_iter *fi, struct bio *bio) +{ + fi->_seg_count -= fi->length; + if (fi->_seg_count) { + fi->folio = fi->_next; + fi->offset = 0; + fi->length = min(folio_size(fi->folio), fi->_seg_count); + fi->_next = folio_next(fi->folio); + } else { + bio_first_folio(fi, bio, fi->_i + 1); + } +} -#ifdef CONFIG_HIGHMEM -/* - * remember never ever reenable interrupts between a bvec_kmap_irq and - * bvec_kunmap_irq! +/** + * bio_for_each_folio_all - Iterate over each folio in a bio. + * @fi: struct folio_iter which is updated for each folio. + * @bio: struct bio to iterate over. + */ +#define bio_for_each_folio_all(fi, bio) \ + for (bio_first_folio(&fi, bio, 0); fi.folio; bio_next_folio(&fi, bio)) + +void bio_trim(struct bio *bio, sector_t offset, sector_t size); +extern struct bio *bio_split(struct bio *bio, int sectors, + gfp_t gfp, struct bio_set *bs); +int bio_split_io_at(struct bio *bio, const struct queue_limits *lim, + unsigned *segs, unsigned max_bytes, unsigned len_align); +u8 bio_seg_gap(struct request_queue *q, struct bio *prev, struct bio *next, + u8 gaps_bit); + +/** + * bio_next_split - get next @sectors from a bio, splitting if necessary + * @bio: bio to split + * @sectors: number of sectors to split from the front of @bio + * @gfp: gfp mask + * @bs: bio set to allocate from + * + * Return: a bio representing the next @sectors of @bio - if the bio is smaller + * than @sectors, returns the original bio unchanged. */ -static inline char *bvec_kmap_irq(struct bio_vec *bvec, unsigned long *flags) +static inline struct bio *bio_next_split(struct bio *bio, int sectors, + gfp_t gfp, struct bio_set *bs) { - unsigned long addr; + if (sectors >= bio_sectors(bio)) + return bio; - /* - * might not be a highmem page, but the preempt/irq count - * balancing is a lot nicer this way - */ - local_irq_save(*flags); - addr = (unsigned long) kmap_atomic(bvec->bv_page); + return bio_split(bio, sectors, gfp, bs); +} - BUG_ON(addr & ~PAGE_MASK); +enum { + BIOSET_NEED_BVECS = BIT(0), + BIOSET_NEED_RESCUER = BIT(1), + BIOSET_PERCPU_CACHE = BIT(2), +}; +extern int bioset_init(struct bio_set *, unsigned int, unsigned int, int flags); +extern void bioset_exit(struct bio_set *); +extern int biovec_init_pool(mempool_t *pool, int pool_entries); + +struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs, + blk_opf_t opf, gfp_t gfp_mask, + struct bio_set *bs); +struct bio *bio_kmalloc(unsigned short nr_vecs, gfp_t gfp_mask); +extern void bio_put(struct bio *); - return (char *) addr + bvec->bv_offset; -} +struct bio *bio_alloc_clone(struct block_device *bdev, struct bio *bio_src, + gfp_t gfp, struct bio_set *bs); +int bio_init_clone(struct block_device *bdev, struct bio *bio, + struct bio *bio_src, gfp_t gfp); -static inline void bvec_kunmap_irq(char *buffer, unsigned long *flags) -{ - unsigned long ptr = (unsigned long) buffer & PAGE_MASK; +extern struct bio_set fs_bio_set; - kunmap_atomic((void *) ptr); - local_irq_restore(*flags); +static inline struct bio *bio_alloc(struct block_device *bdev, + unsigned short nr_vecs, blk_opf_t opf, gfp_t gfp_mask) +{ + return bio_alloc_bioset(bdev, nr_vecs, opf, gfp_mask, &fs_bio_set); } -#else -static inline char *bvec_kmap_irq(struct bio_vec *bvec, unsigned long *flags) +void submit_bio(struct bio *bio); + +extern void bio_endio(struct bio *); + +static inline void bio_io_error(struct bio *bio) { - return page_address(bvec->bv_page) + bvec->bv_offset; + bio->bi_status = BLK_STS_IOERR; + bio_endio(bio); } -static inline void bvec_kunmap_irq(char *buffer, unsigned long *flags) +static inline void bio_wouldblock_error(struct bio *bio) { - *flags = 0; + bio_set_flag(bio, BIO_QUIET); + bio->bi_status = BLK_STS_AGAIN; + bio_endio(bio); } -#endif -static inline char *__bio_kmap_irq(struct bio *bio, unsigned short idx, - unsigned long *flags) +/* + * Calculate number of bvec segments that should be allocated to fit data + * pointed by @iter. If @iter is backed by bvec it's going to be reused + * instead of allocating a new one. + */ +static inline int bio_iov_vecs_to_alloc(struct iov_iter *iter, int max_segs) { - return bvec_kmap_irq(bio_iovec_idx(bio, idx), flags); + if (iov_iter_is_bvec(iter)) + return 0; + return iov_iter_npages(iter, max_segs); } -#define __bio_kunmap_irq(buf, flags) bvec_kunmap_irq(buf, flags) -#define bio_kmap_irq(bio, flags) \ - __bio_kmap_irq((bio), (bio)->bi_idx, (flags)) -#define bio_kunmap_irq(buf,flags) __bio_kunmap_irq(buf, flags) +struct request_queue; -/* - * Check whether this bio carries any data or not. A NULL bio is allowed. +void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table, + unsigned short max_vecs, blk_opf_t opf); +static inline void bio_init_inline(struct bio *bio, struct block_device *bdev, + unsigned short max_vecs, blk_opf_t opf) +{ + bio_init(bio, bdev, bio_inline_vecs(bio), max_vecs, opf); +} +extern void bio_uninit(struct bio *); +void bio_reset(struct bio *bio, struct block_device *bdev, blk_opf_t opf); +void bio_chain(struct bio *, struct bio *); + +int __must_check bio_add_page(struct bio *bio, struct page *page, unsigned len, + unsigned off); +bool __must_check bio_add_folio(struct bio *bio, struct folio *folio, + size_t len, size_t off); +void __bio_add_page(struct bio *bio, struct page *page, + unsigned int len, unsigned int off); +void bio_add_folio_nofail(struct bio *bio, struct folio *folio, size_t len, + size_t off); +void bio_add_virt_nofail(struct bio *bio, void *vaddr, unsigned len); + +/** + * bio_add_max_vecs - number of bio_vecs needed to add data to a bio + * @kaddr: kernel virtual address to add + * @len: length in bytes to add + * + * Calculate how many bio_vecs need to be allocated to add the kernel virtual + * address range in [@kaddr:@len] in the worse case. */ -static inline bool bio_has_data(struct bio *bio) +static inline unsigned int bio_add_max_vecs(void *kaddr, unsigned int len) { - if (bio && bio->bi_vcnt) - return true; + if (is_vmalloc_addr(kaddr)) + return DIV_ROUND_UP(offset_in_page(kaddr) + len, PAGE_SIZE); + return 1; +} - return false; +unsigned int bio_add_vmalloc_chunk(struct bio *bio, void *vaddr, unsigned len); +bool bio_add_vmalloc(struct bio *bio, void *vaddr, unsigned int len); + +int submit_bio_wait(struct bio *bio); +int bdev_rw_virt(struct block_device *bdev, sector_t sector, void *data, + size_t len, enum req_op op); + +int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter, + unsigned len_align_mask); + +void bio_iov_bvec_set(struct bio *bio, const struct iov_iter *iter); +void __bio_release_pages(struct bio *bio, bool mark_dirty); +extern void bio_set_pages_dirty(struct bio *bio); +extern void bio_check_pages_dirty(struct bio *bio); + +extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter, + struct bio *src, struct bvec_iter *src_iter); +extern void bio_copy_data(struct bio *dst, struct bio *src); +extern void bio_free_pages(struct bio *bio); +void guard_bio_eod(struct bio *bio); +void zero_fill_bio_iter(struct bio *bio, struct bvec_iter iter); + +static inline void zero_fill_bio(struct bio *bio) +{ + zero_fill_bio_iter(bio, bio->bi_iter); } -static inline bool bio_is_rw(struct bio *bio) +static inline void bio_release_pages(struct bio *bio, bool mark_dirty) { - if (!bio_has_data(bio)) - return false; + if (bio_flagged(bio, BIO_PAGE_PINNED)) + __bio_release_pages(bio, mark_dirty); +} - if (bio->bi_rw & REQ_WRITE_SAME) - return false; +#define bio_dev(bio) \ + disk_devt((bio)->bi_bdev->bd_disk) - return true; +#ifdef CONFIG_BLK_CGROUP +void bio_associate_blkg(struct bio *bio); +void bio_associate_blkg_from_css(struct bio *bio, + struct cgroup_subsys_state *css); +void bio_clone_blkg_association(struct bio *dst, struct bio *src); +void blkcg_punt_bio_submit(struct bio *bio); +#else /* CONFIG_BLK_CGROUP */ +static inline void bio_associate_blkg(struct bio *bio) { } +static inline void bio_associate_blkg_from_css(struct bio *bio, + struct cgroup_subsys_state *css) +{ } +static inline void bio_clone_blkg_association(struct bio *dst, + struct bio *src) { } +static inline void blkcg_punt_bio_submit(struct bio *bio) +{ + submit_bio(bio); } +#endif /* CONFIG_BLK_CGROUP */ -static inline bool bio_mergeable(struct bio *bio) +static inline void bio_set_dev(struct bio *bio, struct block_device *bdev) { - if (bio->bi_rw & REQ_NOMERGE_FLAGS) - return false; - - return true; + bio_clear_flag(bio, BIO_REMAPPED); + if (bio->bi_bdev != bdev) + bio_clear_flag(bio, BIO_BPS_THROTTLED); + bio->bi_bdev = bdev; + bio_associate_blkg(bio); } /* @@ -419,6 +527,8 @@ static inline void bio_list_init(struct bio_list *bl) bl->head = bl->tail = NULL; } +#define BIO_EMPTY_LIST { NULL, NULL } + #define bio_list_for_each(bio, bl) \ for (bio = (bl)->head; bio; bio = bio->bi_next) @@ -468,6 +578,13 @@ static inline void bio_list_merge(struct bio_list *bl, struct bio_list *bl2) bl->tail = bl2->tail; } +static inline void bio_list_merge_init(struct bio_list *bl, + struct bio_list *bl2) +{ + bio_list_merge(bl, bl2); + bio_list_init(bl2); +} + static inline void bio_list_merge_head(struct bio_list *bl, struct bio_list *bl2) { @@ -512,26 +629,37 @@ static inline struct bio *bio_list_get(struct bio_list *bl) } /* + * Increment chain count for the bio. Make sure the CHAIN flag update + * is visible before the raised count. + */ +static inline void bio_inc_remaining(struct bio *bio) +{ + bio_set_flag(bio, BIO_CHAIN); + smp_mb__before_atomic(); + atomic_inc(&bio->__bi_remaining); +} + +/* * bio_set is used to allow other portions of the IO system to * allocate their own private memory pools for bio and iovec structures. * These memory pools in turn all allocate from the bio_slab * and the bvec_slabs[]. */ #define BIO_POOL_SIZE 2 -#define BIOVEC_NR_POOLS 6 -#define BIOVEC_MAX_IDX (BIOVEC_NR_POOLS - 1) struct bio_set { struct kmem_cache *bio_slab; unsigned int front_pad; - mempool_t *bio_pool; - mempool_t *bvec_pool; -#if defined(CONFIG_BLK_DEV_INTEGRITY) - mempool_t *bio_integrity_pool; - mempool_t *bvec_integrity_pool; -#endif + /* + * per-cpu bio alloc cache + */ + struct bio_alloc_cache __percpu *cache; + + mempool_t bio_pool; + mempool_t bvec_pool; + unsigned int back_pad; /* * Deadlock avoidance for stacking block drivers: see comments in * bio_alloc_bioset() for details @@ -540,117 +668,59 @@ struct bio_set { struct bio_list rescue_list; struct work_struct rescue_work; struct workqueue_struct *rescue_workqueue; -}; -struct biovec_slab { - int nr_vecs; - char *name; - struct kmem_cache *slab; + /* + * Hot un-plug notifier for the per-cpu cache, if used + */ + struct hlist_node cpuhp_dead; }; -/* - * a small number of entries is fine, not going to be performance critical. - * basically we just need to survive - */ -#define BIO_SPLIT_ENTRIES 2 - -#if defined(CONFIG_BLK_DEV_INTEGRITY) - -#define bip_vec_idx(bip, idx) (&(bip->bip_vec[(idx)])) -#define bip_vec(bip) bip_vec_idx(bip, 0) - -#define __bip_for_each_vec(bvl, bip, i, start_idx) \ - for (bvl = bip_vec_idx((bip), (start_idx)), i = (start_idx); \ - i < (bip)->bip_vcnt; \ - bvl++, i++) - -#define bip_for_each_vec(bvl, bip, i) \ - __bip_for_each_vec(bvl, bip, i, (bip)->bip_idx) - -#define bio_for_each_integrity_vec(_bvl, _bio, _iter) \ - for_each_bio(_bio) \ - bip_for_each_vec(_bvl, _bio->bi_integrity, _iter) - -#define bio_integrity(bio) (bio->bi_integrity != NULL) - -extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int); -extern void bio_integrity_free(struct bio *); -extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int); -extern int bio_integrity_enabled(struct bio *bio); -extern int bio_integrity_set_tag(struct bio *, void *, unsigned int); -extern int bio_integrity_get_tag(struct bio *, void *, unsigned int); -extern int bio_integrity_prep(struct bio *); -extern void bio_integrity_endio(struct bio *, int); -extern void bio_integrity_advance(struct bio *, unsigned int); -extern void bio_integrity_trim(struct bio *, unsigned int, unsigned int); -extern void bio_integrity_split(struct bio *, struct bio_pair *, int); -extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t); -extern int bioset_integrity_create(struct bio_set *, int); -extern void bioset_integrity_free(struct bio_set *); -extern void bio_integrity_init(void); - -#else /* CONFIG_BLK_DEV_INTEGRITY */ - -static inline int bio_integrity(struct bio *bio) +static inline bool bioset_initialized(struct bio_set *bs) { - return 0; + return bs->bio_slab != NULL; } -static inline int bio_integrity_enabled(struct bio *bio) -{ - return 0; -} - -static inline int bioset_integrity_create(struct bio_set *bs, int pool_size) -{ - return 0; -} - -static inline void bioset_integrity_free (struct bio_set *bs) -{ - return; -} - -static inline int bio_integrity_prep(struct bio *bio) -{ - return 0; -} - -static inline void bio_integrity_free(struct bio *bio) -{ - return; -} - -static inline int bio_integrity_clone(struct bio *bio, struct bio *bio_src, - gfp_t gfp_mask) -{ - return 0; -} - -static inline void bio_integrity_split(struct bio *bio, struct bio_pair *bp, - int sectors) +/* + * Mark a bio as polled. Note that for async polled IO, the caller must + * expect -EWOULDBLOCK if we cannot allocate a request (or other resources). + * We cannot block waiting for requests on polled IO, as those completions + * must be found by the caller. This is different than IRQ driven IO, where + * it's safe to wait for IO to complete. + */ +static inline void bio_set_polled(struct bio *bio, struct kiocb *kiocb) { - return; + bio->bi_opf |= REQ_POLLED; + if (kiocb->ki_flags & IOCB_NOWAIT) + bio->bi_opf |= REQ_NOWAIT; } -static inline void bio_integrity_advance(struct bio *bio, - unsigned int bytes_done) +static inline void bio_clear_polled(struct bio *bio) { - return; + bio->bi_opf &= ~REQ_POLLED; } -static inline void bio_integrity_trim(struct bio *bio, unsigned int offset, - unsigned int sectors) +/** + * bio_is_zone_append - is this a zone append bio? + * @bio: bio to check + * + * Check if @bio is a zone append operation. Core block layer code and end_io + * handlers must use this instead of an open coded REQ_OP_ZONE_APPEND check + * because the block layer can rewrite REQ_OP_ZONE_APPEND to REQ_OP_WRITE if + * it is not natively supported. + */ +static inline bool bio_is_zone_append(struct bio *bio) { - return; + if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED)) + return false; + return bio_op(bio) == REQ_OP_ZONE_APPEND || + bio_flagged(bio, BIO_EMULATES_ZONE_APPEND); } -static inline void bio_integrity_init(void) -{ - return; -} +struct bio *blk_next_bio(struct bio *bio, struct block_device *bdev, + unsigned int nr_pages, blk_opf_t opf, gfp_t gfp); +struct bio *bio_chain_and_submit(struct bio *prev, struct bio *new); -#endif /* CONFIG_BLK_DEV_INTEGRITY */ +struct bio *blk_alloc_discard_bio(struct block_device *bdev, + sector_t *sector, sector_t *nr_sects, gfp_t gfp_mask); -#endif /* CONFIG_BLOCK */ #endif /* __LINUX_BIO_H */ |
