diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-03-11 11:43:44 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-03-11 11:43:44 -0700 |
commit | 1ddeeb2a058d7b2a58ed9e820396b4ceb715d529 (patch) | |
tree | 32a27b8eb1c538239b641292d77dc1a8cee8ee97 /include | |
parent | d2c84bdce25a678c1e1f116d65b58790bd241af0 (diff) | |
parent | 5205a4aa8fc9454853b705b69611c80e9c644283 (diff) |
Merge tag 'for-6.9/block-20240310' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe:
- MD pull requests via Song:
- Cleanup redundant checks (Yu Kuai)
- Remove deprecated headers (Marc Zyngier, Song Liu)
- Concurrency fixes (Li Lingfeng)
- Memory leak fix (Li Nan)
- Refactor raid1 read_balance (Yu Kuai, Paul Luse)
- Clean up and fix for md_ioctl (Li Nan)
- Other small fixes (Gui-Dong Han, Heming Zhao)
- MD atomic limits (Christoph)
- NVMe pull request via Keith:
- RDMA target enhancements (Max)
- Fabrics fixes (Max, Guixin, Hannes)
- Atomic queue_limits usage (Christoph)
- Const use for class_register (Ricardo)
- Identification error handling fixes (Shin'ichiro, Keith)
- Improvement and cleanup for cached request handling (Christoph)
- Moving towards atomic queue limits. Core changes and driver bits so
far (Christoph)
- Fix UAF issues in aoeblk (Chun-Yi)
- Zoned fix and cleanups (Damien)
- s390 dasd cleanups and fixes (Jan, Miroslav)
- Block issue timestamp caching (me)
- noio scope guarding for zoned IO (Johannes)
- block/nvme PI improvements (Kanchan)
- Ability to terminate long running discard loop (Keith)
- bdev revalidation fix (Li)
- Get rid of old nr_queues hack for kdump kernels (Ming)
- Support for async deletion of ublk (Ming)
- Improve IRQ bio recycling (Pavel)
- Factor in CPU capacity for remote vs local completion (Qais)
- Add shared_tags configfs entry for null_blk (Shin'ichiro
- Fix for a regression in page refcounts introduced by the folio
unification (Tony)
- Misc fixes and cleanups (Arnd, Colin, John, Kunwu, Li, Navid,
Ricardo, Roman, Tang, Uwe)
* tag 'for-6.9/block-20240310' of git://git.kernel.dk/linux: (221 commits)
block: partitions: only define function mac_fix_string for CONFIG_PPC_PMAC
block/swim: Convert to platform remove callback returning void
cdrom: gdrom: Convert to platform remove callback returning void
block: remove disk_stack_limits
md: remove mddev->queue
md: don't initialize queue limits
md/raid10: use the atomic queue limit update APIs
md/raid5: use the atomic queue limit update APIs
md/raid1: use the atomic queue limit update APIs
md/raid0: use the atomic queue limit update APIs
md: add queue limit helpers
md: add a mddev_is_dm helper
md: add a mddev_add_trace_msg helper
md: add a mddev_trace_remap helper
bcache: move calculation of stripe_size and io_opt into bcache_device_init
virtio_blk: Do not use disk_set_max_open/active_zones()
aoe: fix the potential use-after-free problem in aoecmd_cfg_pkts
block: move capacity validation to blkpg_do_ioctl()
block: prevent division by zero in blk_rq_stat_sum()
drbd: atomically update queue limits in drbd_reconsider_queue_parameters
...
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/blk-integrity.h | 1 | ||||
-rw-r--r-- | include/linux/blk-mq.h | 10 | ||||
-rw-r--r-- | include/linux/blk_types.h | 42 | ||||
-rw-r--r-- | include/linux/blkdev.h | 73 | ||||
-rw-r--r-- | include/linux/nvme-rdma.h | 6 | ||||
-rw-r--r-- | include/linux/nvme.h | 2 | ||||
-rw-r--r-- | include/linux/sched.h | 2 | ||||
-rw-r--r-- | include/linux/sched/topology.h | 6 | ||||
-rw-r--r-- | include/uapi/linux/ublk_cmd.h | 2 |
9 files changed, 81 insertions, 63 deletions
diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h index 378b2459efe2..e253e7bd0d17 100644 --- a/include/linux/blk-integrity.h +++ b/include/linux/blk-integrity.h @@ -20,6 +20,7 @@ struct blk_integrity_iter { unsigned int data_size; unsigned short interval; unsigned char tuple_size; + unsigned char pi_offset; const char *disk_name; }; diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 492b0128b5d9..d3d8fd8e229b 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -684,17 +684,19 @@ enum { #define BLK_MQ_NO_HCTX_IDX (-1U) -struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata, +struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, + struct queue_limits *lim, void *queuedata, struct lock_class_key *lkclass); -#define blk_mq_alloc_disk(set, queuedata) \ +#define blk_mq_alloc_disk(set, lim, queuedata) \ ({ \ static struct lock_class_key __key; \ \ - __blk_mq_alloc_disk(set, queuedata, &__key); \ + __blk_mq_alloc_disk(set, lim, queuedata, &__key); \ }) struct gendisk *blk_mq_alloc_disk_for_queue(struct request_queue *q, struct lock_class_key *lkclass); -struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); +struct request_queue *blk_mq_alloc_queue(struct blk_mq_tag_set *set, + struct queue_limits *lim, void *queuedata); int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, struct request_queue *q); void blk_mq_destroy_queue(struct request_queue *); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 12d87cef2c03..cb1526ec44b5 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -207,52 +207,10 @@ static inline bool blk_path_error(blk_status_t error) return true; } -/* - * From most significant bit: - * 1 bit: reserved for other usage, see below - * 12 bits: original size of bio - * 51 bits: issue time of bio - */ -#define BIO_ISSUE_RES_BITS 1 -#define BIO_ISSUE_SIZE_BITS 12 -#define BIO_ISSUE_RES_SHIFT (64 - BIO_ISSUE_RES_BITS) -#define BIO_ISSUE_SIZE_SHIFT (BIO_ISSUE_RES_SHIFT - BIO_ISSUE_SIZE_BITS) -#define BIO_ISSUE_TIME_MASK ((1ULL << BIO_ISSUE_SIZE_SHIFT) - 1) -#define BIO_ISSUE_SIZE_MASK \ - (((1ULL << BIO_ISSUE_SIZE_BITS) - 1) << BIO_ISSUE_SIZE_SHIFT) -#define BIO_ISSUE_RES_MASK (~((1ULL << BIO_ISSUE_RES_SHIFT) - 1)) - -/* Reserved bit for blk-throtl */ -#define BIO_ISSUE_THROTL_SKIP_LATENCY (1ULL << 63) - struct bio_issue { u64 value; }; -static inline u64 __bio_issue_time(u64 time) -{ - return time & BIO_ISSUE_TIME_MASK; -} - -static inline u64 bio_issue_time(struct bio_issue *issue) -{ - return __bio_issue_time(issue->value); -} - -static inline sector_t bio_issue_size(struct bio_issue *issue) -{ - return ((issue->value & BIO_ISSUE_SIZE_MASK) >> BIO_ISSUE_SIZE_SHIFT); -} - -static inline void bio_issue_init(struct bio_issue *issue, - sector_t size) -{ - size &= (1ULL << BIO_ISSUE_SIZE_BITS) - 1; - issue->value = ((issue->value & BIO_ISSUE_RES_MASK) | - (ktime_get_ns() & BIO_ISSUE_TIME_MASK) | - ((u64)size << BIO_ISSUE_SIZE_SHIFT)); -} - typedef __u32 __bitwise blk_opf_t; typedef unsigned int blk_qc_t; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2f5dbde23094..f9b87c39cab0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -43,7 +43,7 @@ struct blk_crypto_profile; extern const struct device_type disk_type; extern const struct device_type part_type; -extern struct class block_class; +extern const struct class block_class; /* * Maximum number of blkcg policies allowed to be registered concurrently. @@ -109,6 +109,7 @@ struct blk_integrity { const struct blk_integrity_profile *profile; unsigned char flags; unsigned char tuple_size; + unsigned char pi_offset; unsigned char interval_exp; unsigned char tag_size; }; @@ -190,8 +191,6 @@ struct gendisk { * blk_mq_unfreeze_queue(). */ unsigned int nr_zones; - unsigned int max_open_zones; - unsigned int max_active_zones; unsigned long *conv_zones_bitmap; unsigned long *seq_zones_wlock; #endif /* CONFIG_BLK_DEV_ZONED */ @@ -293,6 +292,7 @@ struct queue_limits { unsigned int io_opt; unsigned int max_discard_sectors; unsigned int max_hw_discard_sectors; + unsigned int max_user_discard_sectors; unsigned int max_secure_erase_sectors; unsigned int max_write_zeroes_sectors; unsigned int max_zone_append_sectors; @@ -308,6 +308,8 @@ struct queue_limits { unsigned char discard_misaligned; unsigned char raid_partial_stripes_expensive; bool zoned; + unsigned int max_open_zones; + unsigned int max_active_zones; /* * Drivers that set dma_alignment to less than 511 must be prepared to @@ -326,7 +328,7 @@ void disk_set_zoned(struct gendisk *disk); int blkdev_report_zones(struct block_device *bdev, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data); int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op, - sector_t sectors, sector_t nr_sectors, gfp_t gfp_mask); + sector_t sectors, sector_t nr_sectors); int blk_revalidate_disk_zones(struct gendisk *disk, void (*update_driver_data)(struct gendisk *disk)); @@ -474,6 +476,7 @@ struct request_queue { struct mutex sysfs_lock; struct mutex sysfs_dir_lock; + struct mutex limits_lock; /* * for reusing dead hctx instance in case of updating @@ -640,23 +643,23 @@ static inline bool disk_zone_is_seq(struct gendisk *disk, sector_t sector) static inline void disk_set_max_open_zones(struct gendisk *disk, unsigned int max_open_zones) { - disk->max_open_zones = max_open_zones; + disk->queue->limits.max_open_zones = max_open_zones; } static inline void disk_set_max_active_zones(struct gendisk *disk, unsigned int max_active_zones) { - disk->max_active_zones = max_active_zones; + disk->queue->limits.max_active_zones = max_active_zones; } static inline unsigned int bdev_max_open_zones(struct block_device *bdev) { - return bdev->bd_disk->max_open_zones; + return bdev->bd_disk->queue->limits.max_open_zones; } static inline unsigned int bdev_max_active_zones(struct block_device *bdev) { - return bdev->bd_disk->max_active_zones; + return bdev->bd_disk->queue->limits.max_active_zones; } #else /* CONFIG_BLK_DEV_ZONED */ @@ -764,22 +767,26 @@ static inline u64 sb_bdev_nr_blocks(struct super_block *sb) int bdev_disk_changed(struct gendisk *disk, bool invalidate); void put_disk(struct gendisk *disk); -struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass); +struct gendisk *__blk_alloc_disk(struct queue_limits *lim, int node, + struct lock_class_key *lkclass); /** * blk_alloc_disk - allocate a gendisk structure + * @lim: queue limits to be used for this disk. * @node_id: numa node to allocate on * * Allocate and pre-initialize a gendisk structure for use with BIO based * drivers. * + * Returns an ERR_PTR on error, else the allocated disk. + * * Context: can sleep */ -#define blk_alloc_disk(node_id) \ +#define blk_alloc_disk(lim, node_id) \ ({ \ static struct lock_class_key __key; \ \ - __blk_alloc_disk(node_id, &__key); \ + __blk_alloc_disk(lim, node_id, &__key); \ }) int __register_blkdev(unsigned int major, const char *name, @@ -862,6 +869,29 @@ static inline unsigned int blk_chunk_sectors_left(sector_t offset, return chunk_sectors - (offset & (chunk_sectors - 1)); } +/** + * queue_limits_start_update - start an atomic update of queue limits + * @q: queue to update + * + * This functions starts an atomic update of the queue limits. It takes a lock + * to prevent other updates and returns a snapshot of the current limits that + * the caller can modify. The caller must call queue_limits_commit_update() + * to finish the update. + * + * Context: process context. The caller must have frozen the queue or ensured + * that there is outstanding I/O by other means. + */ +static inline struct queue_limits +queue_limits_start_update(struct request_queue *q) + __acquires(q->limits_lock) +{ + mutex_lock(&q->limits_lock); + return q->limits; +} +int queue_limits_commit_update(struct request_queue *q, + struct queue_limits *lim); +int queue_limits_set(struct request_queue *q, struct queue_limits *lim); + /* * Access functions for manipulating queue properties */ @@ -895,8 +925,8 @@ extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth); extern void blk_set_stacking_limits(struct queue_limits *lim); extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, sector_t offset); -extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, - sector_t offset); +void queue_limits_stack_bdev(struct queue_limits *t, struct block_device *bdev, + sector_t offset, const char *pfx); extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); extern void blk_queue_segment_boundary(struct request_queue *, unsigned long); extern void blk_queue_virt_boundary(struct request_queue *, unsigned long); @@ -943,6 +973,7 @@ struct blk_plug { /* if ios_left is > 1, we can batch tag/rq allocations */ struct request *cached_rq; + u64 cur_ktime; unsigned short nr_ios; unsigned short rq_count; @@ -973,6 +1004,18 @@ static inline void blk_flush_plug(struct blk_plug *plug, bool async) __blk_flush_plug(plug, async); } +/* + * tsk == current here + */ +static inline void blk_plug_invalidate_ts(struct task_struct *tsk) +{ + struct blk_plug *plug = tsk->plug; + + if (plug) + plug->cur_ktime = 0; + current->flags &= ~PF_BLOCK_TS; +} + int blkdev_issue_flush(struct block_device *bdev); long nr_blockdev_pages(void); #else /* CONFIG_BLOCK */ @@ -996,6 +1039,10 @@ static inline void blk_flush_plug(struct blk_plug *plug, bool async) { } +static inline void blk_plug_invalidate_ts(struct task_struct *tsk) +{ +} + static inline int blkdev_issue_flush(struct block_device *bdev) { return 0; diff --git a/include/linux/nvme-rdma.h b/include/linux/nvme-rdma.h index 4dd7e6fe92fb..eb2f04d636c8 100644 --- a/include/linux/nvme-rdma.h +++ b/include/linux/nvme-rdma.h @@ -6,7 +6,11 @@ #ifndef _LINUX_NVME_RDMA_H #define _LINUX_NVME_RDMA_H -#define NVME_RDMA_MAX_QUEUE_SIZE 128 +#define NVME_RDMA_IP_PORT 4420 + +#define NVME_RDMA_MAX_QUEUE_SIZE 256 +#define NVME_RDMA_MAX_METADATA_QUEUE_SIZE 128 +#define NVME_RDMA_DEFAULT_QUEUE_SIZE 128 enum nvme_rdma_cm_fmt { NVME_RDMA_CM_FMT_1_0 = 0x0, diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 3ef4053ea950..425573202295 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -23,8 +23,6 @@ #define NVME_DISC_SUBSYS_NAME "nqn.2014-08.org.nvmexpress.discovery" -#define NVME_RDMA_IP_PORT 4420 - #define NVME_NSID_ALL 0xffffffff enum nvme_subsys_type { diff --git a/include/linux/sched.h b/include/linux/sched.h index ffe8f618ab86..15b7cb478d16 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1642,7 +1642,7 @@ extern struct pid *cad_pid; #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */ #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ #define PF_MEMALLOC_PIN 0x10000000 /* Allocation context constrained to zones which allow long term pinning. */ -#define PF__HOLE__20000000 0x20000000 +#define PF_BLOCK_TS 0x20000000 /* plug has ts that needs updating */ #define PF__HOLE__40000000 0x40000000 #define PF_SUSPEND_TASK 0x80000000 /* This thread called freeze_processes() and should not be frozen */ diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index a6e04b4a21d7..11e0e00e0bb9 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -176,6 +176,7 @@ extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], cpumask_var_t *alloc_sched_domains(unsigned int ndoms); void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms); +bool cpus_equal_capacity(int this_cpu, int that_cpu); bool cpus_share_cache(int this_cpu, int that_cpu); bool cpus_share_resources(int this_cpu, int that_cpu); @@ -226,6 +227,11 @@ partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], { } +static inline bool cpus_equal_capacity(int this_cpu, int that_cpu) +{ + return true; +} + static inline bool cpus_share_cache(int this_cpu, int that_cpu) { return true; diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index b9cfc5c96268..c8dc5f8ea699 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -49,6 +49,8 @@ _IOR('u', UBLK_CMD_GET_DEV_INFO2, struct ublksrv_ctrl_cmd) #define UBLK_U_CMD_GET_FEATURES \ _IOR('u', 0x13, struct ublksrv_ctrl_cmd) +#define UBLK_U_CMD_DEL_DEV_ASYNC \ + _IOR('u', 0x14, struct ublksrv_ctrl_cmd) /* * 64bits are enough now, and it should be easy to extend in case of |