diff options
Diffstat (limited to 'block/blk.h')
-rw-r--r-- | block/blk.h | 294 |
1 files changed, 221 insertions, 73 deletions
diff --git a/block/blk.h b/block/blk.h index d9f584984bc4..37ec459fe656 100644 --- a/block/blk.h +++ b/block/blk.h @@ -2,7 +2,9 @@ #ifndef BLK_INTERNAL_H #define BLK_INTERNAL_H +#include <linux/bio-integrity.h> #include <linux/blk-crypto.h> +#include <linux/lockdep.h> #include <linux/memblock.h> /* for max_pfn/max_low_pfn */ #include <linux/sched/sysctl.h> #include <linux/timekeeping.h> @@ -11,6 +13,9 @@ struct elevator_type; +#define BLK_DEV_MAX_SECTORS (LLONG_MAX >> 9) +#define BLK_MIN_SEGMENT_SIZE 4096 + /* Max future timer expiry for timeouts */ #define BLK_MAX_TIMEOUT (5 * HZ) @@ -33,11 +38,13 @@ struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size, gfp_t flags); void blk_free_flush_queue(struct blk_flush_queue *q); -void blk_freeze_queue(struct request_queue *q); -void __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic); -void blk_queue_start_drain(struct request_queue *q); +bool __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic); +bool blk_queue_start_drain(struct request_queue *q); +bool __blk_freeze_queue_start(struct request_queue *q, + struct task_struct *owner); int __bio_queue_enter(struct request_queue *q, struct bio *bio); void submit_bio_noacct_nocheck(struct bio *bio); +void bio_await_chain(struct bio *bio); static inline bool blk_try_enter_queue(struct request_queue *q, bool pm) { @@ -67,8 +74,11 @@ static inline int bio_queue_enter(struct bio *bio) { struct request_queue *q = bdev_get_queue(bio->bi_bdev); - if (blk_try_enter_queue(q, false)) + if (blk_try_enter_queue(q, false)) { + rwsem_acquire_read(&q->io_lockdep_map, 0, 0, _RET_IP_); + rwsem_release(&q->io_lockdep_map, _RET_IP_); return 0; + } return __bio_queue_enter(q, bio); } @@ -84,21 +94,23 @@ static inline void blk_wait_io(struct completion *done) wait_for_completion_io(done); } +struct block_device *blkdev_get_no_open(dev_t dev, bool autoload); +void blkdev_put_no_open(struct block_device *bdev); + #define BIO_INLINE_VECS 4 struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs, gfp_t gfp_mask); void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned short nr_vecs); bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv, - struct page *page, unsigned len, unsigned offset, - bool *same_page); + struct page *page, unsigned len, unsigned offset); static inline bool biovec_phys_mergeable(struct request_queue *q, struct bio_vec *vec1, struct bio_vec *vec2) { unsigned long mask = queue_segment_boundary(q); - phys_addr_t addr1 = page_to_phys(vec1->bv_page) + vec1->bv_offset; - phys_addr_t addr2 = page_to_phys(vec2->bv_page) + vec2->bv_offset; + phys_addr_t addr1 = bvec_phys(vec1); + phys_addr_t addr2 = bvec_phys(vec2); /* * Merging adjacent physical pages may not work correctly under KMSAN @@ -180,9 +192,11 @@ static inline unsigned int blk_rq_get_max_segments(struct request *rq) return queue_max_segments(rq->q); } -static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q, - enum req_op op) +static inline unsigned int blk_queue_get_max_sectors(struct request *rq) { + struct request_queue *q = rq->q; + enum req_op op = req_op(rq); + if (unlikely(op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE)) return min(q->limits.max_discard_sectors, UINT_MAX >> SECTOR_SHIFT); @@ -190,16 +204,28 @@ static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q, if (unlikely(op == REQ_OP_WRITE_ZEROES)) return q->limits.max_write_zeroes_sectors; + if (rq->cmd_flags & REQ_ATOMIC) + return q->limits.atomic_write_max_sectors; + return q->limits.max_sectors; } #ifdef CONFIG_BLK_DEV_INTEGRITY void blk_flush_integrity(void); -bool __bio_integrity_endio(struct bio *); void bio_integrity_free(struct bio *bio); + +/* + * Integrity payloads can either be owned by the submitter, in which case + * bio_uninit will free them, or owned and generated by the block layer, + * in which case we'll verify them here (for reads) and free them before + * the bio is handed back to the submitted. + */ +bool __bio_integrity_endio(struct bio *bio); static inline bool bio_integrity_endio(struct bio *bio) { - if (bio_integrity(bio)) + struct bio_integrity_payload *bip = bio_integrity(bio); + + if (bip && (bip->bip_flags & BIP_BLOCK_INTEGRITY)) return __bio_integrity_endio(bio); return true; } @@ -269,6 +295,14 @@ static inline void bio_integrity_free(struct bio *bio) unsigned long blk_rq_timeout(unsigned long timeout); void blk_add_timer(struct request *req); +enum bio_merge_status { + BIO_MERGE_OK, + BIO_MERGE_NONE, + BIO_MERGE_FAILED, +}; + +enum bio_merge_status bio_attempt_back_merge(struct request *req, + struct bio *bio, unsigned int nr_segs); bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, unsigned int nr_segs); bool blk_bio_list_merge(struct request_queue *q, struct list_head *list, @@ -287,11 +321,9 @@ bool blk_bio_list_merge(struct request_queue *q, struct list_head *list, bool blk_insert_flush(struct request *rq); -int elevator_switch(struct request_queue *q, struct elevator_type *new_e); -void elevator_disable(struct request_queue *q); -void elevator_exit(struct request_queue *q); -int elv_register_queue(struct request_queue *q, bool uevent); -void elv_unregister_queue(struct request_queue *q); +void elv_update_nr_hw_queues(struct request_queue *q); +void elevator_set_default(struct request_queue *q); +void elevator_set_none(struct request_queue *q); ssize_t part_size_show(struct device *dev, struct device_attribute *attr, char *buf); @@ -307,33 +339,92 @@ ssize_t part_timeout_show(struct device *, struct device_attribute *, char *); ssize_t part_timeout_store(struct device *, struct device_attribute *, const char *, size_t); -static inline bool bio_may_exceed_limits(struct bio *bio, - const struct queue_limits *lim) +struct bio *bio_split_discard(struct bio *bio, const struct queue_limits *lim, + unsigned *nsegs); +struct bio *bio_split_write_zeroes(struct bio *bio, + const struct queue_limits *lim, unsigned *nsegs); +struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim, + unsigned *nr_segs); +struct bio *bio_split_zone_append(struct bio *bio, + const struct queue_limits *lim, unsigned *nr_segs); + +/* + * All drivers must accept single-segments bios that are smaller than PAGE_SIZE. + * + * This is a quick and dirty check that relies on the fact that bi_io_vec[0] is + * always valid if a bio has data. The check might lead to occasional false + * positives when bios are cloned, but compared to the performance impact of + * cloned bios themselves the loop below doesn't matter anyway. + */ +static inline bool bio_may_need_split(struct bio *bio, + const struct queue_limits *lim) +{ + if (lim->chunk_sectors) + return true; + if (bio->bi_vcnt != 1) + return true; + return bio->bi_io_vec->bv_len + bio->bi_io_vec->bv_offset > + lim->min_segment_size; +} + +/** + * __bio_split_to_limits - split a bio to fit the queue limits + * @bio: bio to be split + * @lim: queue limits to split based on + * @nr_segs: returns the number of segments in the returned bio + * + * Check if @bio needs splitting based on the queue limits, and if so split off + * a bio fitting the limits from the beginning of @bio and return it. @bio is + * shortened to the remainder and re-submitted. + * + * The split bio is allocated from @q->bio_split, which is provided by the + * block layer. + */ +static inline struct bio *__bio_split_to_limits(struct bio *bio, + const struct queue_limits *lim, unsigned int *nr_segs) { switch (bio_op(bio)) { + case REQ_OP_READ: + case REQ_OP_WRITE: + if (bio_may_need_split(bio, lim)) + return bio_split_rw(bio, lim, nr_segs); + *nr_segs = 1; + return bio; + case REQ_OP_ZONE_APPEND: + return bio_split_zone_append(bio, lim, nr_segs); case REQ_OP_DISCARD: case REQ_OP_SECURE_ERASE: + return bio_split_discard(bio, lim, nr_segs); case REQ_OP_WRITE_ZEROES: - return true; /* non-trivial splitting decisions */ + return bio_split_write_zeroes(bio, lim, nr_segs); default: - break; + /* other operations can't be split */ + *nr_segs = 0; + return bio; } +} +/** + * get_max_segment_size() - maximum number of bytes to add as a single segment + * @lim: Request queue limits. + * @paddr: address of the range to add + * @len: maximum length available to add at @paddr + * + * Returns the maximum number of bytes of the range starting at @paddr that can + * be added to a single segment. + */ +static inline unsigned get_max_segment_size(const struct queue_limits *lim, + phys_addr_t paddr, unsigned int len) +{ /* - * All drivers must accept single-segments bios that are <= PAGE_SIZE. - * This is a quick and dirty check that relies on the fact that - * bi_io_vec[0] is always valid if a bio has data. The check might - * lead to occasional false negatives when bios are cloned, but compared - * to the performance impact of cloned bios themselves the loop below - * doesn't matter anyway. + * Prevent an overflow if mask = ULONG_MAX and offset = 0 by adding 1 + * after having calculated the minimum. */ - return lim->chunk_sectors || bio->bi_vcnt != 1 || - bio->bi_io_vec->bv_len + bio->bi_io_vec->bv_offset > PAGE_SIZE; + return min_t(unsigned long, len, + min(lim->seg_boundary_mask - (lim->seg_boundary_mask & paddr), + (unsigned long)lim->max_segment_size - 1) + 1); } -struct bio *__bio_split_to_limits(struct bio *bio, - const struct queue_limits *lim, - unsigned int *nr_segs); int ll_back_merge_fn(struct request *req, struct bio *bio, unsigned int nr_segs); bool blk_attempt_req_merge(struct request_queue *q, struct request *rq, @@ -343,19 +434,10 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio); enum elv_merge blk_try_merge(struct request *rq, struct bio *bio); int blk_set_default_limits(struct queue_limits *lim); +void blk_apply_bdi_limits(struct backing_dev_info *bdi, + struct queue_limits *lim); int blk_dev_init(void); -/* - * Contribute to IO statistics IFF: - * - * a) it's attached to a gendisk, and - * b) the queue had IO stats enabled when this request was started - */ -static inline bool blk_do_io_stat(struct request *rq) -{ - return (rq->rq_flags & RQF_IO_STAT) && !blk_rq_is_passthrough(rq); -} - void update_io_ticks(struct block_device *part, unsigned long now, bool end); static inline void req_set_nomerge(struct request_queue *q, struct request *req) @@ -378,42 +460,78 @@ static inline void ioc_clear_queue(struct request_queue *q) } #endif /* CONFIG_BLK_ICQ */ -#ifdef CONFIG_BLK_DEV_THROTTLING_LOW -extern ssize_t blk_throtl_sample_time_show(struct request_queue *q, char *page); -extern ssize_t blk_throtl_sample_time_store(struct request_queue *q, - const char *page, size_t count); -extern void blk_throtl_bio_endio(struct bio *bio); -extern void blk_throtl_stat_add(struct request *rq, u64 time); -#else -static inline void blk_throtl_bio_endio(struct bio *bio) { } -static inline void blk_throtl_stat_add(struct request *rq, u64 time) { } -#endif - -struct bio *__blk_queue_bounce(struct bio *bio, struct request_queue *q); - -static inline bool blk_queue_may_bounce(struct request_queue *q) +#ifdef CONFIG_BLK_DEV_ZONED +void disk_init_zone_resources(struct gendisk *disk); +void disk_free_zone_resources(struct gendisk *disk); +static inline bool bio_zone_write_plugging(struct bio *bio) { - return IS_ENABLED(CONFIG_BOUNCE) && - q->limits.bounce == BLK_BOUNCE_HIGH && - max_low_pfn >= max_pfn; + return bio_flagged(bio, BIO_ZONE_WRITE_PLUGGING); } - -static inline struct bio *blk_queue_bounce(struct bio *bio, - struct request_queue *q) +void blk_zone_write_plug_bio_merged(struct bio *bio); +void blk_zone_write_plug_init_request(struct request *rq); +static inline void blk_zone_update_request_bio(struct request *rq, + struct bio *bio) { - if (unlikely(blk_queue_may_bounce(q) && bio_has_data(bio))) - return __blk_queue_bounce(bio, q); - return bio; + /* + * For zone append requests, the request sector indicates the location + * at which the BIO data was written. Return this value to the BIO + * issuer through the BIO iter sector. + * For plugged zone writes, which include emulated zone append, we need + * the original BIO sector so that blk_zone_write_plug_bio_endio() can + * lookup the zone write plug. + */ + if (req_op(rq) == REQ_OP_ZONE_APPEND || + bio_flagged(bio, BIO_EMULATES_ZONE_APPEND)) + bio->bi_iter.bi_sector = rq->__sector; +} +void blk_zone_write_plug_bio_endio(struct bio *bio); +static inline void blk_zone_bio_endio(struct bio *bio) +{ + /* + * For write BIOs to zoned devices, signal the completion of the BIO so + * that the next write BIO can be submitted by zone write plugging. + */ + if (bio_zone_write_plugging(bio)) + blk_zone_write_plug_bio_endio(bio); } -#ifdef CONFIG_BLK_DEV_ZONED -void disk_free_zone_bitmaps(struct gendisk *disk); +void blk_zone_write_plug_finish_request(struct request *rq); +static inline void blk_zone_finish_request(struct request *rq) +{ + if (rq->rq_flags & RQF_ZONE_WRITE_PLUGGING) + blk_zone_write_plug_finish_request(rq); +} int blkdev_report_zones_ioctl(struct block_device *bdev, unsigned int cmd, unsigned long arg); int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode, unsigned int cmd, unsigned long arg); #else /* CONFIG_BLK_DEV_ZONED */ -static inline void disk_free_zone_bitmaps(struct gendisk *disk) {} +static inline void disk_init_zone_resources(struct gendisk *disk) +{ +} +static inline void disk_free_zone_resources(struct gendisk *disk) +{ +} +static inline bool bio_zone_write_plugging(struct bio *bio) +{ + return false; +} +static inline void blk_zone_write_plug_bio_merged(struct bio *bio) +{ +} +static inline void blk_zone_write_plug_init_request(struct request *rq) +{ +} +static inline void blk_zone_update_request_bio(struct request *rq, + struct bio *bio) +{ +} +static inline void blk_zone_bio_endio(struct bio *bio) +{ +} +static inline void blk_zone_finish_request(struct request *rq) +{ +} static inline int blkdev_report_zones_ioctl(struct block_device *bdev, unsigned int cmd, unsigned long arg) { @@ -428,12 +546,15 @@ static inline int blkdev_zone_mgmt_ioctl(struct block_device *bdev, struct block_device *bdev_alloc(struct gendisk *disk, u8 partno); void bdev_add(struct block_device *bdev, dev_t dev); +void bdev_unhash(struct block_device *bdev); +void bdev_drop(struct block_device *bdev); int blk_alloc_ext_minor(void); void blk_free_ext_minor(unsigned int minor); #define ADDPART_FLAG_NONE 0 #define ADDPART_FLAG_RAID 1 #define ADDPART_FLAG_WHOLEDISK 2 +#define ADDPART_FLAG_READONLY 4 int bdev_add_partition(struct gendisk *disk, int partno, sector_t start, sector_t length); int bdev_del_partition(struct gendisk *disk, int partno); @@ -446,10 +567,6 @@ void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors); struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id, struct lock_class_key *lkclass); -int bio_add_hw_page(struct request_queue *q, struct bio *bio, - struct page *page, unsigned int len, unsigned int offset, - unsigned int max_sectors, bool *same_page); - /* * Clean up a page appropriately, where the page may be pinned, may have a * ref taken on it or neither. @@ -481,6 +598,7 @@ blk_mode_t file_to_blk_mode(struct file *file); int truncate_bdev_range(struct block_device *bdev, blk_mode_t mode, loff_t lstart, loff_t lend); long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg); +int blkdev_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags); long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg); extern const struct address_space_operations def_blk_aops; @@ -600,4 +718,34 @@ int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder, const struct blk_holder_ops *hops, struct file *bdev_file); int bdev_permission(dev_t dev, blk_mode_t mode, void *holder); +void blk_integrity_generate(struct bio *bio); +void blk_integrity_verify_iter(struct bio *bio, struct bvec_iter *saved_iter); +void blk_integrity_prepare(struct request *rq); +void blk_integrity_complete(struct request *rq, unsigned int nr_bytes); + +#ifdef CONFIG_LOCKDEP +static inline void blk_freeze_acquire_lock(struct request_queue *q) +{ + if (!q->mq_freeze_disk_dead) + rwsem_acquire(&q->io_lockdep_map, 0, 1, _RET_IP_); + if (!q->mq_freeze_queue_dying) + rwsem_acquire(&q->q_lockdep_map, 0, 1, _RET_IP_); +} + +static inline void blk_unfreeze_release_lock(struct request_queue *q) +{ + if (!q->mq_freeze_queue_dying) + rwsem_release(&q->q_lockdep_map, _RET_IP_); + if (!q->mq_freeze_disk_dead) + rwsem_release(&q->io_lockdep_map, _RET_IP_); +} +#else +static inline void blk_freeze_acquire_lock(struct request_queue *q) +{ +} +static inline void blk_unfreeze_release_lock(struct request_queue *q) +{ +} +#endif + #endif /* BLK_INTERNAL_H */ |