summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-01-20 19:38:46 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2025-01-20 19:38:46 -0800
commit1cbfb828e05171ca2dd77b5988d068e6872480fe (patch)
treebfb33c9ad8840908058649ba2e261bdb7e5f7ee9 /include
parent3d3a9c8b89d4f8a3785e06ffd15405c670696f02 (diff)
parent554b22864cc79e28cd65e3a6e1d0d1dfa8581c68 (diff)
Merge tag 'for-6.14/block-20250118' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe: - NVMe pull requests via Keith: - Target support for PCI-Endpoint transport (Damien) - TCP IO queue spreading fixes (Sagi, Chaitanya) - Target handling for "limited retry" flags (Guixen) - Poll type fix (Yongsoo) - Xarray storage error handling (Keisuke) - Host memory buffer free size fix on error (Francis) - MD pull requests via Song: - Reintroduce md-linear (Yu Kuai) - md-bitmap refactor and fix (Yu Kuai) - Replace kmap_atomic with kmap_local_page (David Reaver) - Quite a few queue freeze and debugfs deadlock fixes Ming introduced lockdep support for this in the 6.13 kernel, and it has (unsurprisingly) uncovered quite a few issues - Use const attributes for IO schedulers - Remove bio ioprio wrappers - Fixes for stacked device atomic write support - Refactor queue affinity helpers, in preparation for better supporting isolated CPUs - Cleanups of loop O_DIRECT handling - Cleanup of BLK_MQ_F_* flags - Add rotational support for null_blk - Various fixes and cleanups * tag 'for-6.14/block-20250118' of git://git.kernel.dk/linux: (106 commits) block: Don't trim an atomic write block: Add common atomic writes enable flag md/md-linear: Fix a NULL vs IS_ERR() bug in linear_add() block: limit disk max sectors to (LLONG_MAX >> 9) block: Change blk_stack_atomic_writes_limits() unit_min check block: Ensure start sector is aligned for stacking atomic writes blk-mq: Move more error handling into blk_mq_submit_bio() block: Reorder the request allocation code in blk_mq_submit_bio() nvme: fix bogus kzalloc() return check in nvme_init_effects_log() md/md-bitmap: move bitmap_{start, end}write to md upper layer md/raid5: implement pers->bitmap_sector() md: add a new callback pers->bitmap_sector() md/md-bitmap: remove the last parameter for bimtap_ops->endwrite() md/md-bitmap: factor behind write counters out from bitmap_{start/end}write() md: Replace deprecated kmap_atomic() with kmap_local_page() md: reintroduce md-linear partitions: ldm: remove the initial kernel-doc notation blk-cgroup: rwstat: fix kernel-doc warnings in header file blk-cgroup: fix kernel-doc warnings in header file nbd: fix partial sending ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/bio.h5
-rw-r--r--include/linux/blk-mq-pci.h11
-rw-r--r--include/linux/blk-mq-virtio.h11
-rw-r--r--include/linux/blk-mq.h35
-rw-r--r--include/linux/blkdev.h36
-rw-r--r--include/linux/bvec.h7
-rw-r--r--include/linux/device/bus.h3
-rw-r--r--include/linux/libata.h4
-rw-r--r--include/linux/nvme.h42
-rw-r--r--include/scsi/scsi_host.h6
-rw-r--r--include/uapi/linux/raid/md_p.h2
-rw-r--r--include/uapi/linux/raid/md_u.h2
12 files changed, 87 insertions, 77 deletions
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 7a1b3b1a8fed..4b79bf50f4f0 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -19,9 +19,6 @@ static inline unsigned int bio_max_segs(unsigned int nr_segs)
return min(nr_segs, BIO_MAX_VECS);
}
-#define bio_prio(bio) (bio)->bi_ioprio
-#define bio_set_prio(bio, prio) ((bio)->bi_ioprio = prio)
-
#define bio_iter_iovec(bio, iter) \
bvec_iter_bvec((bio)->bi_io_vec, (iter))
@@ -416,8 +413,6 @@ int __must_check bio_add_page(struct bio *bio, struct page *page, unsigned len,
unsigned off);
bool __must_check bio_add_folio(struct bio *bio, struct folio *folio,
size_t len, size_t off);
-extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *,
- unsigned int, unsigned int);
void __bio_add_page(struct bio *bio, struct page *page,
unsigned int len, unsigned int off);
void bio_add_folio_nofail(struct bio *bio, struct folio *folio, size_t len,
diff --git a/include/linux/blk-mq-pci.h b/include/linux/blk-mq-pci.h
deleted file mode 100644
index ca544e1d3508..000000000000
--- a/include/linux/blk-mq-pci.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINUX_BLK_MQ_PCI_H
-#define _LINUX_BLK_MQ_PCI_H
-
-struct blk_mq_queue_map;
-struct pci_dev;
-
-void blk_mq_pci_map_queues(struct blk_mq_queue_map *qmap, struct pci_dev *pdev,
- int offset);
-
-#endif /* _LINUX_BLK_MQ_PCI_H */
diff --git a/include/linux/blk-mq-virtio.h b/include/linux/blk-mq-virtio.h
deleted file mode 100644
index 13226e9b22dd..000000000000
--- a/include/linux/blk-mq-virtio.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINUX_BLK_MQ_VIRTIO_H
-#define _LINUX_BLK_MQ_VIRTIO_H
-
-struct blk_mq_queue_map;
-struct virtio_device;
-
-void blk_mq_virtio_map_queues(struct blk_mq_queue_map *qmap,
- struct virtio_device *vdev, int first_vec);
-
-#endif /* _LINUX_BLK_MQ_VIRTIO_H */
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index c596e0e4cb75..a0a9007cc1e3 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -296,13 +296,6 @@ enum blk_eh_timer_return {
BLK_EH_RESET_TIMER,
};
-/* Keep alloc_policy_name[] in sync with the definitions below */
-enum {
- BLK_TAG_ALLOC_FIFO, /* allocate starting from 0 */
- BLK_TAG_ALLOC_RR, /* allocate starting from last allocated tag */
- BLK_TAG_ALLOC_MAX
-};
-
/**
* struct blk_mq_hw_ctx - State for a hardware queue facing the hardware
* block device
@@ -668,7 +661,6 @@ struct blk_mq_ops {
/* Keep hctx_flag_name[] in sync with the definitions below */
enum {
- BLK_MQ_F_SHOULD_MERGE = 1 << 0,
BLK_MQ_F_TAG_QUEUE_SHARED = 1 << 1,
/*
* Set when this device requires underlying blk-mq device for
@@ -677,23 +669,20 @@ enum {
BLK_MQ_F_STACKING = 1 << 2,
BLK_MQ_F_TAG_HCTX_SHARED = 1 << 3,
BLK_MQ_F_BLOCKING = 1 << 4,
- /* Do not allow an I/O scheduler to be configured. */
- BLK_MQ_F_NO_SCHED = 1 << 5,
+
+ /*
+ * Alloc tags on a round-robin base instead of the first available one.
+ */
+ BLK_MQ_F_TAG_RR = 1 << 5,
/*
* Select 'none' during queue registration in case of a single hwq
* or shared hwqs instead of 'mq-deadline'.
*/
BLK_MQ_F_NO_SCHED_BY_DEFAULT = 1 << 6,
- BLK_MQ_F_ALLOC_POLICY_START_BIT = 7,
- BLK_MQ_F_ALLOC_POLICY_BITS = 1,
+
+ BLK_MQ_F_MAX = 1 << 7,
};
-#define BLK_MQ_FLAG_TO_ALLOC_POLICY(flags) \
- ((flags >> BLK_MQ_F_ALLOC_POLICY_START_BIT) & \
- ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1))
-#define BLK_ALLOC_POLICY_TO_MQ_FLAG(policy) \
- ((policy & ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) \
- << BLK_MQ_F_ALLOC_POLICY_START_BIT)
#define BLK_MQ_MAX_DEPTH (10240)
#define BLK_MQ_NO_HCTX_IDX (-1U)
@@ -921,6 +910,8 @@ void blk_mq_unfreeze_queue_non_owner(struct request_queue *q);
void blk_freeze_queue_start_non_owner(struct request_queue *q);
void blk_mq_map_queues(struct blk_mq_queue_map *qmap);
+void blk_mq_map_hw_queues(struct blk_mq_queue_map *qmap,
+ struct device *dev, unsigned int offset);
void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
void blk_mq_quiesce_queue_nowait(struct request_queue *q);
@@ -977,14 +968,6 @@ static inline void blk_mq_cleanup_rq(struct request *rq)
rq->q->mq_ops->cleanup_rq(rq);
}
-static inline void blk_rq_bio_prep(struct request *rq, struct bio *bio,
- unsigned int nr_segs)
-{
- rq->nr_phys_segments = nr_segs;
- rq->__data_len = bio->bi_iter.bi_size;
- rq->bio = rq->biotail = bio;
-}
-
void blk_mq_hctx_set_fq_lock_class(struct blk_mq_hw_ctx *hctx,
struct lock_class_key *key);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 378d3a1a22fc..76f0a4e7c2e5 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -331,8 +331,8 @@ typedef unsigned int __bitwise blk_features_t;
#define BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE \
((__force blk_features_t)(1u << 15))
-/* stacked device can/does support atomic writes */
-#define BLK_FEAT_ATOMIC_WRITES_STACKED \
+/* atomic writes enabled */
+#define BLK_FEAT_ATOMIC_WRITES \
((__force blk_features_t)(1u << 16))
/*
@@ -581,6 +581,12 @@ struct request_queue {
#ifdef CONFIG_LOCKDEP
struct task_struct *mq_freeze_owner;
int mq_freeze_owner_depth;
+ /*
+ * Records disk & queue state in current context, used in unfreeze
+ * queue
+ */
+ bool mq_freeze_disk_dead;
+ bool mq_freeze_queue_dying;
#endif
wait_queue_head_t mq_freeze_wq;
/*
@@ -938,8 +944,7 @@ static inline unsigned int blk_boundary_sectors_left(sector_t offset,
* the caller can modify. The caller must call queue_limits_commit_update()
* to finish the update.
*
- * Context: process context. The caller must have frozen the queue or ensured
- * that there is outstanding I/O by other means.
+ * Context: process context.
*/
static inline struct queue_limits
queue_limits_start_update(struct request_queue *q)
@@ -947,6 +952,8 @@ queue_limits_start_update(struct request_queue *q)
mutex_lock(&q->limits_lock);
return q->limits;
}
+int queue_limits_commit_update_frozen(struct request_queue *q,
+ struct queue_limits *lim);
int queue_limits_commit_update(struct request_queue *q,
struct queue_limits *lim);
int queue_limits_set(struct request_queue *q, struct queue_limits *lim);
@@ -1699,6 +1706,15 @@ struct io_comp_batch {
void (*complete)(struct io_comp_batch *);
};
+static inline bool blk_atomic_write_start_sect_aligned(sector_t sector,
+ struct queue_limits *limits)
+{
+ unsigned int alignment = max(limits->atomic_write_hw_unit_min,
+ limits->atomic_write_hw_boundary);
+
+ return IS_ALIGNED(sector, alignment >> SECTOR_SHIFT);
+}
+
static inline bool bdev_can_atomic_write(struct block_device *bdev)
{
struct request_queue *bd_queue = bdev->bd_queue;
@@ -1707,15 +1723,9 @@ static inline bool bdev_can_atomic_write(struct block_device *bdev)
if (!limits->atomic_write_unit_min)
return false;
- if (bdev_is_partition(bdev)) {
- sector_t bd_start_sect = bdev->bd_start_sect;
- unsigned int alignment =
- max(limits->atomic_write_unit_min,
- limits->atomic_write_hw_boundary);
-
- if (!IS_ALIGNED(bd_start_sect, alignment >> SECTOR_SHIFT))
- return false;
- }
+ if (bdev_is_partition(bdev))
+ return blk_atomic_write_start_sect_aligned(bdev->bd_start_sect,
+ limits);
return true;
}
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index f41c7f0ef91e..ba8f52d48b94 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -286,12 +286,7 @@ static inline void *bvec_virt(struct bio_vec *bvec)
*/
static inline phys_addr_t bvec_phys(const struct bio_vec *bvec)
{
- /*
- * Note this open codes page_to_phys because page_to_phys is defined in
- * <asm/io.h>, which we don't want to pull in here. If it ever moves to
- * a sensible place we should start using it.
- */
- return PFN_PHYS(page_to_pfn(bvec->bv_page)) + bvec->bv_offset;
+ return page_to_phys(bvec->bv_page) + bvec->bv_offset;
}
#endif /* __LINUX_BVEC_H */
diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h
index cdc4757217f9..b18658bce2c3 100644
--- a/include/linux/device/bus.h
+++ b/include/linux/device/bus.h
@@ -48,6 +48,7 @@ struct fwnode_handle;
* will never get called until they do.
* @remove: Called when a device removed from this bus.
* @shutdown: Called at shut-down time to quiesce the device.
+ * @irq_get_affinity: Get IRQ affinity mask for the device on this bus.
*
* @online: Called to put the device back online (after offlining it).
* @offline: Called to put the device offline for hot-removal. May fail.
@@ -87,6 +88,8 @@ struct bus_type {
void (*sync_state)(struct device *dev);
void (*remove)(struct device *dev);
void (*shutdown)(struct device *dev);
+ const struct cpumask *(*irq_get_affinity)(struct device *dev,
+ unsigned int irq_vec);
int (*online)(struct device *dev);
int (*offline)(struct device *dev);
diff --git a/include/linux/libata.h b/include/linux/libata.h
index c1a85d46eba6..be5183d75736 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1467,13 +1467,13 @@ extern const struct attribute_group *ata_common_sdev_groups[];
#define ATA_SUBBASE_SHT(drv_name) \
__ATA_BASE_SHT(drv_name), \
.can_queue = ATA_DEF_QUEUE, \
- .tag_alloc_policy = BLK_TAG_ALLOC_RR, \
+ .tag_alloc_policy_rr = true, \
.device_configure = ata_scsi_device_configure
#define ATA_SUBBASE_SHT_QD(drv_name, drv_qd) \
__ATA_BASE_SHT(drv_name), \
.can_queue = drv_qd, \
- .tag_alloc_policy = BLK_TAG_ALLOC_RR, \
+ .tag_alloc_policy_rr = true, \
.device_configure = ata_scsi_device_configure
#define ATA_BASE_SHT(drv_name) \
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 13377dde4527..fe3b60818fdc 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -64,6 +64,7 @@ enum {
/* Transport Type codes for Discovery Log Page entry TRTYPE field */
enum {
+ NVMF_TRTYPE_PCI = 0, /* PCI */
NVMF_TRTYPE_RDMA = 1, /* RDMA */
NVMF_TRTYPE_FC = 2, /* Fibre Channel */
NVMF_TRTYPE_TCP = 3, /* TCP/IP */
@@ -275,6 +276,7 @@ enum nvme_ctrl_attr {
NVME_CTRL_ATTR_HID_128_BIT = (1 << 0),
NVME_CTRL_ATTR_TBKAS = (1 << 6),
NVME_CTRL_ATTR_ELBAS = (1 << 15),
+ NVME_CTRL_ATTR_RHII = (1 << 18),
};
struct nvme_id_ctrl {
@@ -1896,6 +1898,46 @@ static inline bool nvme_is_fabrics(const struct nvme_command *cmd)
return cmd->common.opcode == nvme_fabrics_command;
}
+#ifdef CONFIG_NVME_VERBOSE_ERRORS
+const char *nvme_get_error_status_str(u16 status);
+const char *nvme_get_opcode_str(u8 opcode);
+const char *nvme_get_admin_opcode_str(u8 opcode);
+const char *nvme_get_fabrics_opcode_str(u8 opcode);
+#else /* CONFIG_NVME_VERBOSE_ERRORS */
+static inline const char *nvme_get_error_status_str(u16 status)
+{
+ return "I/O Error";
+}
+static inline const char *nvme_get_opcode_str(u8 opcode)
+{
+ return "I/O Cmd";
+}
+static inline const char *nvme_get_admin_opcode_str(u8 opcode)
+{
+ return "Admin Cmd";
+}
+
+static inline const char *nvme_get_fabrics_opcode_str(u8 opcode)
+{
+ return "Fabrics Cmd";
+}
+#endif /* CONFIG_NVME_VERBOSE_ERRORS */
+
+static inline const char *nvme_opcode_str(int qid, u8 opcode)
+{
+ return qid ? nvme_get_opcode_str(opcode) :
+ nvme_get_admin_opcode_str(opcode);
+}
+
+static inline const char *nvme_fabrics_opcode_str(
+ int qid, const struct nvme_command *cmd)
+{
+ if (nvme_is_fabrics(cmd))
+ return nvme_get_fabrics_opcode_str(cmd->fabrics.fctype);
+
+ return nvme_opcode_str(qid, cmd->common.opcode);
+}
+
struct nvme_error_slot {
__le64 error_count;
__le16 sqid;
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index 2b4ab0369ffb..02823d6af37d 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -438,8 +438,10 @@ struct scsi_host_template {
*/
short cmd_per_lun;
- /* If use block layer to manage tags, this is tag allocation policy */
- int tag_alloc_policy;
+ /*
+ * Allocate tags starting from last allocated tag.
+ */
+ bool tag_alloc_policy_rr : 1;
/*
* Track QUEUE_FULL events and reduce queue depth on demand.
diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h
index 5a43c23f53bf..ff47b6f0ba0f 100644
--- a/include/uapi/linux/raid/md_p.h
+++ b/include/uapi/linux/raid/md_p.h
@@ -233,7 +233,7 @@ struct mdp_superblock_1 {
char set_name[32]; /* set and interpreted by user-space */
__le64 ctime; /* lo 40 bits are seconds, top 24 are microseconds or 0*/
- __le32 level; /* 0,1,4,5 */
+ __le32 level; /* 0,1,4,5, -1 (linear) */
__le32 layout; /* only for raid5 and raid10 currently */
__le64 size; /* used size of component devices, in 512byte sectors */
diff --git a/include/uapi/linux/raid/md_u.h b/include/uapi/linux/raid/md_u.h
index 7be89a4906e7..a893010735fb 100644
--- a/include/uapi/linux/raid/md_u.h
+++ b/include/uapi/linux/raid/md_u.h
@@ -103,6 +103,8 @@ typedef struct mdu_array_info_s {
} mdu_array_info_t;
+#define LEVEL_LINEAR (-1)
+
/* we need a value for 'no level specified' and 0
* means 'raid0', so we need something else. This is
* for internal use only