diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-01-18 18:22:40 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-01-18 18:22:40 -0800 |
commit | 9d1694dc91ce7b80bc96d6d8eaf1a1eca668d847 (patch) | |
tree | d9093aecb9261cccaea1f0a58887fcd9db542172 /drivers/nvme/host | |
parent | e9a5a78d1ad8ceb4e3df6d6ad93360094c84ac40 (diff) | |
parent | b2e792ae883a0aa976d4176dfa7dc933263440ea (diff) |
Merge tag 'for-6.8/block-2024-01-18' of git://git.kernel.dk/linux
Pull block fixes from Jens Axboe:
- NVMe pull request via Keith:
- tcp, fc, and rdma target fixes (Maurizio, Daniel, Hannes,
Christoph)
- discard fixes and improvements (Christoph)
- timeout debug improvements (Keith, Max)
- various cleanups (Daniel, Max, Giuxen)
- trace event string fixes (Arnd)
- shadow doorbell setup on reset fix (William)
- a write zeroes quirk for SK Hynix (Jim)
- MD pull request via Song:
- Sparse warning since v6.0 (Bart)
- /proc/mdstat regression since v6.7 (Yu Kuai)
- Use symbolic error value (Christian)
- IO Priority documentation update (Christian)
- Fix for accessing queue limits without having entered the queue
(Christoph, me)
- Fix for loop dio support (Christoph)
- Move null_blk off deprecated ida interface (Christophe)
- Ensure nbd initializes full msghdr (Eric)
- Fix for a regression with the folio conversion, which is now easier
to hit because of an unrelated change (Matthew)
- Remove redundant check in virtio-blk (Li)
- Fix for a potential hang in sbitmap (Ming)
- Fix for partial zone appending (Damien)
- Misc changes and fixes (Bart, me, Kemeng, Dmitry)
* tag 'for-6.8/block-2024-01-18' of git://git.kernel.dk/linux: (45 commits)
Documentation: block: ioprio: Update schedulers
loop: fix the the direct I/O support check when used on top of block devices
blk-mq: Remove the hctx 'run' debugfs attribute
nbd: always initialize struct msghdr completely
block: Fix iterating over an empty bio with bio_for_each_folio_all
block: bio-integrity: fix kcalloc() arguments order
virtio_blk: remove duplicate check if queue is broken in virtblk_done
sbitmap: remove stale comment in sbq_calc_wake_batch
block: Correct a documentation comment in blk-cgroup.c
null_blk: Remove usage of the deprecated ida_simple_xx() API
block: ensure we hold a queue reference when using queue limits
blk-mq: rename blk_mq_can_use_cached_rq
block: print symbolic error name instead of error code
blk-mq: fix IO hang from sbitmap wakeup race
nvmet-rdma: avoid circular locking dependency on install_queue()
nvmet-tcp: avoid circular locking dependency on install_queue()
nvme-pci: set doorbell config before unquiescing
block: fix partial zone append completion handling in req_bio_endio()
block/iocost: silence warning on 'last_period' potentially being unused
md/raid1: Use blk_opf_t for read and write operations
...
Diffstat (limited to 'drivers/nvme/host')
-rw-r--r-- | drivers/nvme/host/core.c | 41 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 16 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 27 | ||||
-rw-r--r-- | drivers/nvme/host/pr.c | 2 | ||||
-rw-r--r-- | drivers/nvme/host/rdma.c | 11 | ||||
-rw-r--r-- | drivers/nvme/host/sysfs.c | 8 | ||||
-rw-r--r-- | drivers/nvme/host/tcp.c | 11 |
7 files changed, 67 insertions, 49 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 0af612387083..85ab0fcf9e88 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1740,13 +1740,13 @@ static void nvme_config_discard(struct nvme_ctrl *ctrl, struct gendisk *disk, struct nvme_ns_head *head) { struct request_queue *queue = disk->queue; - u32 size = queue_logical_block_size(queue); + u32 max_discard_sectors; - if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(head, UINT_MAX)) - ctrl->max_discard_sectors = - nvme_lba_to_sect(head, ctrl->dmrsl); - - if (ctrl->max_discard_sectors == 0) { + if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(head, UINT_MAX)) { + max_discard_sectors = nvme_lba_to_sect(head, ctrl->dmrsl); + } else if (ctrl->oncs & NVME_CTRL_ONCS_DSM) { + max_discard_sectors = UINT_MAX; + } else { blk_queue_max_discard_sectors(queue, 0); return; } @@ -1754,14 +1754,22 @@ static void nvme_config_discard(struct nvme_ctrl *ctrl, struct gendisk *disk, BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) < NVME_DSM_MAX_RANGES); - queue->limits.discard_granularity = size; - - /* If discard is already enabled, don't reset queue limits */ + /* + * If discard is already enabled, don't reset queue limits. + * + * This works around the fact that the block layer can't cope well with + * updating the hardware limits when overridden through sysfs. This is + * harmless because discard limits in NVMe are purely advisory. + */ if (queue->limits.max_discard_sectors) return; - blk_queue_max_discard_sectors(queue, ctrl->max_discard_sectors); - blk_queue_max_discard_segments(queue, ctrl->max_discard_segments); + blk_queue_max_discard_sectors(queue, max_discard_sectors); + if (ctrl->dmrl) + blk_queue_max_discard_segments(queue, ctrl->dmrl); + else + blk_queue_max_discard_segments(queue, NVME_DSM_MAX_RANGES); + queue->limits.discard_granularity = queue_logical_block_size(queue); if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES) blk_queue_max_write_zeroes_sectors(queue, UINT_MAX); @@ -2930,14 +2938,6 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl) struct nvme_id_ctrl_nvm *id; int ret; - if (ctrl->oncs & NVME_CTRL_ONCS_DSM) { - ctrl->max_discard_sectors = UINT_MAX; - ctrl->max_discard_segments = NVME_DSM_MAX_RANGES; - } else { - ctrl->max_discard_sectors = 0; - ctrl->max_discard_segments = 0; - } - /* * Even though NVMe spec explicitly states that MDTS is not applicable * to the write-zeroes, we are cautious and limit the size to the @@ -2967,8 +2967,7 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl) if (ret) goto free_data; - if (id->dmrl) - ctrl->max_discard_segments = id->dmrl; + ctrl->dmrl = id->dmrl; ctrl->dmrsl = le32_to_cpu(id->dmrsl); if (id->wzsl) ctrl->max_zeroes_sectors = nvme_mps_to_sectors(ctrl, id->wzsl); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 4be7f6822966..030c80818240 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -303,14 +303,13 @@ struct nvme_ctrl { u32 max_hw_sectors; u32 max_segments; u32 max_integrity_segments; - u32 max_discard_sectors; - u32 max_discard_segments; u32 max_zeroes_sectors; #ifdef CONFIG_BLK_DEV_ZONED u32 max_zone_append; #endif u16 crdt[3]; u16 oncs; + u8 dmrl; u32 dmrsl; u16 oacs; u16 sqsize; @@ -932,6 +931,10 @@ extern struct device_attribute dev_attr_ana_grpid; extern struct device_attribute dev_attr_ana_state; extern struct device_attribute subsys_attr_iopolicy; +static inline bool nvme_disk_is_ns_head(struct gendisk *disk) +{ + return disk->fops == &nvme_ns_head_ops; +} #else #define multipath false static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl) @@ -1009,6 +1012,10 @@ static inline void nvme_mpath_start_request(struct request *rq) static inline void nvme_mpath_end_request(struct request *rq) { } +static inline bool nvme_disk_is_ns_head(struct gendisk *disk) +{ + return false; +} #endif /* CONFIG_NVME_MULTIPATH */ int nvme_revalidate_zones(struct nvme_ns *ns); @@ -1037,7 +1044,10 @@ static inline int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf) static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev) { - return dev_to_disk(dev)->private_data; + struct gendisk *disk = dev_to_disk(dev); + + WARN_ON(nvme_disk_is_ns_head(disk)); + return disk->private_data; } #ifdef CONFIG_NVME_HWMON diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 61af7ff1a9d6..c1d6357ec98a 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1284,6 +1284,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req) struct request *abort_req; struct nvme_command cmd = { }; u32 csts = readl(dev->bar + NVME_REG_CSTS); + u8 opcode; /* If PCI error recovery process is happening, we cannot reset or * the recovery mechanism will surely fail. @@ -1310,8 +1311,8 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req) if (blk_mq_rq_state(req) != MQ_RQ_IN_FLIGHT) { dev_warn(dev->ctrl.device, - "I/O %d QID %d timeout, completion polled\n", - req->tag, nvmeq->qid); + "I/O tag %d (%04x) QID %d timeout, completion polled\n", + req->tag, nvme_cid(req), nvmeq->qid); return BLK_EH_DONE; } @@ -1327,8 +1328,8 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req) fallthrough; case NVME_CTRL_DELETING: dev_warn_ratelimited(dev->ctrl.device, - "I/O %d QID %d timeout, disable controller\n", - req->tag, nvmeq->qid); + "I/O tag %d (%04x) QID %d timeout, disable controller\n", + req->tag, nvme_cid(req), nvmeq->qid); nvme_req(req)->flags |= NVME_REQ_CANCELLED; nvme_dev_disable(dev, true); return BLK_EH_DONE; @@ -1343,10 +1344,12 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req) * command was already aborted once before and still hasn't been * returned to the driver, or if this is the admin queue. */ + opcode = nvme_req(req)->cmd->common.opcode; if (!nvmeq->qid || iod->aborted) { dev_warn(dev->ctrl.device, - "I/O %d QID %d timeout, reset controller\n", - req->tag, nvmeq->qid); + "I/O tag %d (%04x) opcode %#x (%s) QID %d timeout, reset controller\n", + req->tag, nvme_cid(req), opcode, + nvme_opcode_str(nvmeq->qid, opcode, 0), nvmeq->qid); nvme_req(req)->flags |= NVME_REQ_CANCELLED; goto disable; } @@ -1362,10 +1365,10 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req) cmd.abort.sqid = cpu_to_le16(nvmeq->qid); dev_warn(nvmeq->dev->ctrl.device, - "I/O %d (%s) QID %d timeout, aborting\n", - req->tag, - nvme_get_opcode_str(nvme_req(req)->cmd->common.opcode), - nvmeq->qid); + "I/O tag %d (%04x) opcode %#x (%s) QID %d timeout, aborting req_op:%s(%u) size:%u\n", + req->tag, nvme_cid(req), opcode, nvme_get_opcode_str(opcode), + nvmeq->qid, blk_op_str(req_op(req)), req_op(req), + blk_rq_bytes(req)); abort_req = blk_mq_alloc_request(dev->ctrl.admin_q, nvme_req_op(&cmd), BLK_MQ_REQ_NOWAIT); @@ -2743,10 +2746,10 @@ static void nvme_reset_work(struct work_struct *work) * controller around but remove all namespaces. */ if (dev->online_queues > 1) { + nvme_dbbuf_set(dev); nvme_unquiesce_io_queues(&dev->ctrl); nvme_wait_freeze(&dev->ctrl); nvme_pci_update_nr_queues(dev); - nvme_dbbuf_set(dev); nvme_unfreeze(&dev->ctrl); } else { dev_warn(dev->ctrl.device, "IO queues lost\n"); @@ -3408,6 +3411,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, { PCI_DEVICE(0x1c5c, 0x174a), /* SK Hynix P31 SSD */ .driver_data = NVME_QUIRK_BOGUS_NID, }, + { PCI_DEVICE(0x1c5c, 0x1D59), /* SK Hynix BC901 */ + .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, { PCI_DEVICE(0x15b7, 0x2001), /* Sandisk Skyhawk */ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, { PCI_DEVICE(0x1d97, 0x2263), /* SPCC */ diff --git a/drivers/nvme/host/pr.c b/drivers/nvme/host/pr.c index 391b1465ebfd..fc3eed00f9ff 100644 --- a/drivers/nvme/host/pr.c +++ b/drivers/nvme/host/pr.c @@ -98,7 +98,7 @@ static int nvme_send_pr_command(struct block_device *bdev, struct nvme_command *c, void *data, unsigned int data_len) { if (IS_ENABLED(CONFIG_NVME_MULTIPATH) && - bdev->bd_disk->fops == &nvme_ns_head_ops) + nvme_disk_is_ns_head(bdev->bd_disk)) return nvme_send_ns_head_pr_command(bdev, c, data, data_len); return nvme_send_ns_pr_command(bdev->bd_disk->private_data, c, data, diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index c89503da24d7..11dde0d83044 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1946,9 +1946,14 @@ static enum blk_eh_timer_return nvme_rdma_timeout(struct request *rq) struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); struct nvme_rdma_queue *queue = req->queue; struct nvme_rdma_ctrl *ctrl = queue->ctrl; - - dev_warn(ctrl->ctrl.device, "I/O %d QID %d timeout\n", - rq->tag, nvme_rdma_queue_idx(queue)); + u8 opcode = req->req.cmd->common.opcode; + u8 fctype = req->req.cmd->fabrics.fctype; + int qid = nvme_rdma_queue_idx(queue); + + dev_warn(ctrl->ctrl.device, + "I/O tag %d (%04x) opcode %#x (%s) QID %d timeout\n", + rq->tag, nvme_cid(rq), opcode, + nvme_opcode_str(qid, opcode, fctype), qid); if (nvme_ctrl_state(&ctrl->ctrl) != NVME_CTRL_LIVE) { /* diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index ac24ad102380..754e91111042 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -39,10 +39,9 @@ static inline struct nvme_ns_head *dev_to_ns_head(struct device *dev) { struct gendisk *disk = dev_to_disk(dev); - if (disk->fops == &nvme_bdev_ops) - return nvme_get_ns_from_dev(dev)->head; - else + if (nvme_disk_is_ns_head(disk)) return disk->private_data; + return nvme_get_ns_from_dev(dev)->head; } static ssize_t wwid_show(struct device *dev, struct device_attribute *attr, @@ -233,7 +232,8 @@ static umode_t nvme_ns_attrs_are_visible(struct kobject *kobj, } #ifdef CONFIG_NVME_MULTIPATH if (a == &dev_attr_ana_grpid.attr || a == &dev_attr_ana_state.attr) { - if (dev_to_disk(dev)->fops != &nvme_bdev_ops) /* per-path attr */ + /* per-path attr */ + if (nvme_disk_is_ns_head(dev_to_disk(dev))) return 0; if (!nvme_ctrl_use_ana(nvme_get_ns_from_dev(dev)->ctrl)) return 0; diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 08805f027810..d058d990532b 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1922,14 +1922,13 @@ static int nvme_tcp_alloc_admin_queue(struct nvme_ctrl *ctrl) ctrl->opts->subsysnqn); if (!pskid) { dev_err(ctrl->device, "no valid PSK found\n"); - ret = -ENOKEY; - goto out_free_queue; + return -ENOKEY; } } ret = nvme_tcp_alloc_queue(ctrl, 0, pskid); if (ret) - goto out_free_queue; + return ret; ret = nvme_tcp_alloc_async_req(to_tcp_ctrl(ctrl)); if (ret) @@ -2433,9 +2432,9 @@ static enum blk_eh_timer_return nvme_tcp_timeout(struct request *rq) int qid = nvme_tcp_queue_id(req->queue); dev_warn(ctrl->device, - "queue %d: timeout cid %#x type %d opcode %#x (%s)\n", - nvme_tcp_queue_id(req->queue), nvme_cid(rq), pdu->hdr.type, - opc, nvme_opcode_str(qid, opc, fctype)); + "I/O tag %d (%04x) type %d opcode %#x (%s) QID %d timeout\n", + rq->tag, nvme_cid(rq), pdu->hdr.type, opc, + nvme_opcode_str(qid, opc, fctype), qid); if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE) { /* |