diff options
Diffstat (limited to 'drivers/nvme')
-rw-r--r-- | drivers/nvme/host/constants.c | 5 | ||||
-rw-r--r-- | drivers/nvme/host/core.c | 111 | ||||
-rw-r--r-- | drivers/nvme/host/fabrics.h | 8 | ||||
-rw-r--r-- | drivers/nvme/host/fc.c | 26 | ||||
-rw-r--r-- | drivers/nvme/host/ioctl.c | 278 | ||||
-rw-r--r-- | drivers/nvme/host/multipath.c | 1 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 6 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 5 | ||||
-rw-r--r-- | drivers/nvme/host/rdma.c | 5 | ||||
-rw-r--r-- | drivers/nvme/host/tcp.c | 5 | ||||
-rw-r--r-- | drivers/nvme/target/io-cmd-bdev.c | 2 | ||||
-rw-r--r-- | drivers/nvme/target/zns.c | 3 |
12 files changed, 397 insertions, 58 deletions
diff --git a/drivers/nvme/host/constants.c b/drivers/nvme/host/constants.c index 7d49eb34b348..4910543f00ff 100644 --- a/drivers/nvme/host/constants.c +++ b/drivers/nvme/host/constants.c @@ -4,7 +4,6 @@ * Copyright (c) 2022, Oracle and/or its affiliates */ -#include <linux/blkdev.h> #include "nvme.h" #ifdef CONFIG_NVME_VERBOSE_ERRORS @@ -92,6 +91,7 @@ static const char * const nvme_statuses[] = { [NVME_SC_NS_WRITE_PROTECTED] = "Namespace is Write Protected", [NVME_SC_CMD_INTERRUPTED] = "Command Interrupted", [NVME_SC_TRANSIENT_TR_ERR] = "Transient Transport Error", + [NVME_SC_ADMIN_COMMAND_MEDIA_NOT_READY] = "Admin Command Media Not Ready", [NVME_SC_INVALID_IO_CMD_SET] = "Invalid IO Command Set", [NVME_SC_LBA_RANGE] = "LBA Out of Range", [NVME_SC_CAP_EXCEEDED] = "Capacity Exceeded", @@ -155,10 +155,13 @@ static const char * const nvme_statuses[] = { [NVME_SC_COMPARE_FAILED] = "Compare Failure", [NVME_SC_ACCESS_DENIED] = "Access Denied", [NVME_SC_UNWRITTEN_BLOCK] = "Deallocated or Unwritten Logical Block", + [NVME_SC_INTERNAL_PATH_ERROR] = "Internal Pathing Error", [NVME_SC_ANA_PERSISTENT_LOSS] = "Asymmetric Access Persistent Loss", [NVME_SC_ANA_INACCESSIBLE] = "Asymmetric Access Inaccessible", [NVME_SC_ANA_TRANSITION] = "Asymmetric Access Transition", + [NVME_SC_CTRL_PATH_ERROR] = "Controller Pathing Error", [NVME_SC_HOST_PATH_ERROR] = "Host Pathing Error", + [NVME_SC_HOST_ABORTED_CMD] = "Host Aborted Command", }; const unsigned char *nvme_get_error_status_str(u16 status) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index e1846d04817f..72f7c955c707 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1207,6 +1207,7 @@ static void nvme_keep_alive_work(struct work_struct *work) rq->timeout = ctrl->kato * HZ; rq->end_io_data = ctrl; + rq->rq_flags |= RQF_QUIET; blk_execute_rq_nowait(rq, false, nvme_keep_alive_end_io); } @@ -1426,6 +1427,32 @@ out_free_id: return error; } +static int nvme_identify_ns_cs_indep(struct nvme_ctrl *ctrl, unsigned nsid, + struct nvme_id_ns_cs_indep **id) +{ + struct nvme_command c = { + .identify.opcode = nvme_admin_identify, + .identify.nsid = cpu_to_le32(nsid), + .identify.cns = NVME_ID_CNS_NS_CS_INDEP, + }; + int ret; + + *id = kmalloc(sizeof(**id), GFP_KERNEL); + if (!*id) + return -ENOMEM; + + ret = nvme_submit_sync_cmd(ctrl->admin_q, &c, *id, sizeof(**id)); + if (ret) { + dev_warn(ctrl->device, + "Identify namespace (CS independent) failed (%d)\n", + ret); + kfree(*id); + return ret; + } + + return 0; +} + static int nvme_features(struct nvme_ctrl *dev, u8 op, unsigned int fid, unsigned int dword11, void *buffer, size_t buflen, u32 *result) { @@ -1621,20 +1648,22 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns) u32 size = queue_logical_block_size(queue); if (ctrl->max_discard_sectors == 0) { - blk_queue_flag_clear(QUEUE_FLAG_DISCARD, queue); + blk_queue_max_discard_sectors(queue, 0); return; } BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) < NVME_DSM_MAX_RANGES); - queue->limits.discard_alignment = 0; queue->limits.discard_granularity = size; /* If discard is already enabled, don't reset queue limits */ - if (blk_queue_flag_test_and_set(QUEUE_FLAG_DISCARD, queue)) + if (queue->limits.max_discard_sectors) return; + if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(ns, UINT_MAX)) + ctrl->max_discard_sectors = nvme_lba_to_sect(ns, ctrl->dmrsl); + blk_queue_max_discard_sectors(queue, ctrl->max_discard_sectors); blk_queue_max_discard_segments(queue, ctrl->max_discard_segments); @@ -1771,7 +1800,7 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX)); } blk_queue_virt_boundary(q, NVME_CTRL_PAGE_SIZE - 1); - blk_queue_dma_alignment(q, 7); + blk_queue_dma_alignment(q, 3); blk_queue_write_cache(q, vwc, vwc); } @@ -2100,10 +2129,9 @@ static const struct block_device_operations nvme_bdev_ops = { .pr_ops = &nvme_pr_ops, }; -static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled) +static int nvme_wait_ready(struct nvme_ctrl *ctrl, u32 timeout, bool enabled) { - unsigned long timeout = - ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies; + unsigned long timeout_jiffies = ((timeout + 1) * HZ / 2) + jiffies; u32 csts, bit = enabled ? NVME_CSTS_RDY : 0; int ret; @@ -2116,7 +2144,7 @@ static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled) usleep_range(1000, 2000); if (fatal_signal_pending(current)) return -EINTR; - if (time_after(jiffies, timeout)) { + if (time_after(jiffies, timeout_jiffies)) { dev_err(ctrl->device, "Device not ready; aborting %s, CSTS=0x%x\n", enabled ? "initialisation" : "reset", csts); @@ -2147,13 +2175,14 @@ int nvme_disable_ctrl(struct nvme_ctrl *ctrl) if (ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) msleep(NVME_QUIRK_DELAY_AMOUNT); - return nvme_wait_ready(ctrl, ctrl->cap, false); + return nvme_wait_ready(ctrl, NVME_CAP_TIMEOUT(ctrl->cap), false); } EXPORT_SYMBOL_GPL(nvme_disable_ctrl); int nvme_enable_ctrl(struct nvme_ctrl *ctrl) { unsigned dev_page_min; + u32 timeout; int ret; ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &ctrl->cap); @@ -2174,6 +2203,27 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl) ctrl->ctrl_config = NVME_CC_CSS_CSI; else ctrl->ctrl_config = NVME_CC_CSS_NVM; + + if (ctrl->cap & NVME_CAP_CRMS_CRWMS) { + u32 crto; + + ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CRTO, &crto); + if (ret) { + dev_err(ctrl->device, "Reading CRTO failed (%d)\n", + ret); + return ret; + } + + if (ctrl->cap & NVME_CAP_CRMS_CRIMS) { + ctrl->ctrl_config |= NVME_CC_CRIME; + timeout = NVME_CRTO_CRIMT(crto); + } else { + timeout = NVME_CRTO_CRWMT(crto); + } + } else { + timeout = NVME_CAP_TIMEOUT(ctrl->cap); + } + ctrl->ctrl_config |= (NVME_CTRL_PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT; ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE; ctrl->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES; @@ -2182,7 +2232,7 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl) ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config); if (ret) return ret; - return nvme_wait_ready(ctrl, ctrl->cap, true); + return nvme_wait_ready(ctrl, timeout, true); } EXPORT_SYMBOL_GPL(nvme_enable_ctrl); @@ -2894,8 +2944,7 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl) if (id->dmrl) ctrl->max_discard_segments = id->dmrl; - if (id->dmrsl) - ctrl->max_discard_sectors = le32_to_cpu(id->dmrsl); + ctrl->dmrsl = le32_to_cpu(id->dmrsl); if (id->wzsl) ctrl->max_zeroes_sectors = nvme_mps_to_sectors(ctrl, id->wzsl); @@ -3080,10 +3129,6 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl) if (ret) return ret; - ret = nvme_init_non_mdts_limits(ctrl); - if (ret < 0) - return ret; - ret = nvme_configure_apst(ctrl); if (ret < 0) return ret; @@ -3146,6 +3191,7 @@ static const struct file_operations nvme_dev_fops = { .release = nvme_dev_release, .unlocked_ioctl = nvme_dev_ioctl, .compat_ioctl = compat_ptr_ioctl, + .uring_cmd = nvme_dev_uring_cmd, }; static ssize_t nvme_sysfs_reset(struct device *dev, @@ -3699,6 +3745,7 @@ static const struct file_operations nvme_ns_chr_fops = { .release = nvme_ns_chr_release, .unlocked_ioctl = nvme_ns_chr_ioctl, .compat_ioctl = compat_ptr_ioctl, + .uring_cmd = nvme_ns_chr_uring_cmd, }; static int nvme_add_ns_cdev(struct nvme_ns *ns) @@ -4090,11 +4137,26 @@ out: static void nvme_validate_or_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) { struct nvme_ns_ids ids = { }; + struct nvme_id_ns_cs_indep *id; struct nvme_ns *ns; + bool ready = true; if (nvme_identify_ns_descs(ctrl, nsid, &ids)) return; + /* + * Check if the namespace is ready. If not ignore it, we will get an + * AEN once it becomes ready and restart the scan. + */ + if ((ctrl->cap & NVME_CAP_CRMS_CRIMS) && + !nvme_identify_ns_cs_indep(ctrl, nsid, &id)) { + ready = id->nstat & NVME_NSTAT_NRDY; + kfree(id); + } + + if (!ready) + return; + ns = nvme_find_get_ns(ctrl, nsid); if (ns) { nvme_validate_ns(ns, &ids); @@ -4237,11 +4299,26 @@ static void nvme_scan_work(struct work_struct *work) { struct nvme_ctrl *ctrl = container_of(work, struct nvme_ctrl, scan_work); + int ret; /* No tagset on a live ctrl means IO queues could not created */ if (ctrl->state != NVME_CTRL_LIVE || !ctrl->tagset) return; + /* + * Identify controller limits can change at controller reset due to + * new firmware download, even though it is not common we cannot ignore + * such scenario. Controller's non-mdts limits are reported in the unit + * of logical blocks that is dependent on the format of attached + * namespace. Hence re-read the limits at the time of ns allocation. + */ + ret = nvme_init_non_mdts_limits(ctrl); + if (ret < 0) { + dev_warn(ctrl->device, + "reading non-mdts-limits failed: %d\n", ret); + return; + } + if (test_and_clear_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events)) { dev_info(ctrl->device, "rescanning namespaces.\n"); nvme_clear_changed_ns_log(ctrl); @@ -4839,6 +4916,8 @@ static inline void _nvme_check_size(void) BUILD_BUG_ON(sizeof(struct nvme_command) != 64); BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE); BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE); + BUILD_BUG_ON(sizeof(struct nvme_id_ns_cs_indep) != + NVME_IDENTIFY_DATA_SIZE); BUILD_BUG_ON(sizeof(struct nvme_id_ns_zns) != NVME_IDENTIFY_DATA_SIZE); BUILD_BUG_ON(sizeof(struct nvme_id_ns_nvm) != NVME_IDENTIFY_DATA_SIZE); BUILD_BUG_ON(sizeof(struct nvme_id_ctrl_zns) != NVME_IDENTIFY_DATA_SIZE); diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index 1e3a09cad961..46d6e194ac2b 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -187,6 +187,14 @@ static inline char *nvmf_ctrl_subsysnqn(struct nvme_ctrl *ctrl) return ctrl->subsys->subnqn; } +static inline void nvmf_complete_timed_out_request(struct request *rq) +{ + if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) { + nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD; + blk_mq_complete_request(rq); + } +} + int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val); int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val); int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 080f85f4105f..7ae72c7a211b 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -3831,6 +3831,9 @@ process_local_list: return count; } +static DEVICE_ATTR(nvme_discovery, 0200, NULL, nvme_fc_nvme_discovery_store); + +#ifdef CONFIG_BLK_CGROUP_FC_APPID /* Parse the cgroup id from a buf and return the length of cgrpid */ static int fc_parse_cgrpid(const char *buf, u64 *id) { @@ -3854,12 +3857,10 @@ static int fc_parse_cgrpid(const char *buf, u64 *id) } /* - * fc_update_appid: Parse and update the appid in the blkcg associated with - * cgroupid. - * @buf: buf contains both cgrpid and appid info - * @count: size of the buffer + * Parse and update the appid in the blkcg associated with the cgroupid. */ -static int fc_update_appid(const char *buf, size_t count) +static ssize_t fc_appid_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { u64 cgrp_id; int appid_len = 0; @@ -3887,23 +3888,14 @@ static int fc_update_appid(const char *buf, size_t count) return ret; return count; } - -static ssize_t fc_appid_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - int ret = 0; - - ret = fc_update_appid(buf, count); - if (ret < 0) - return -EINVAL; - return count; -} -static DEVICE_ATTR(nvme_discovery, 0200, NULL, nvme_fc_nvme_discovery_store); static DEVICE_ATTR(appid_store, 0200, NULL, fc_appid_store); +#endif /* CONFIG_BLK_CGROUP_FC_APPID */ static struct attribute *nvme_fc_attrs[] = { &dev_attr_nvme_discovery.attr, +#ifdef CONFIG_BLK_CGROUP_FC_APPID &dev_attr_appid_store.attr, +#endif NULL }; diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 554566371ffa..096b1b47d750 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -5,6 +5,7 @@ */ #include <linux/ptrace.h> /* for force_successful_syscall_return */ #include <linux/nvme_ioctl.h> +#include <linux/io_uring.h> #include "nvme.h" /* @@ -53,10 +54,21 @@ out: return ERR_PTR(ret); } -static int nvme_submit_user_cmd(struct request_queue *q, +static int nvme_finish_user_metadata(struct request *req, void __user *ubuf, + void *meta, unsigned len, int ret) +{ + if (!ret && req_op(req) == REQ_OP_DRV_IN && + copy_to_user(ubuf, meta, len)) + ret = -EFAULT; + kfree(meta); + return ret; +} + +static struct request *nvme_alloc_user_request(struct request_queue *q, struct nvme_command *cmd, void __user *ubuffer, unsigned bufflen, void __user *meta_buffer, unsigned meta_len, - u32 meta_seed, u64 *result, unsigned timeout, bool vec) + u32 meta_seed, void **metap, unsigned timeout, bool vec, + unsigned int rq_flags, blk_mq_req_flags_t blk_flags) { bool write = nvme_is_write(cmd); struct nvme_ns *ns = q->queuedata; @@ -66,9 +78,9 @@ static int nvme_submit_user_cmd(struct request_queue *q, void *meta = NULL; int ret; - req = blk_mq_alloc_request(q, nvme_req_op(cmd), 0); + req = blk_mq_alloc_request(q, nvme_req_op(cmd) | rq_flags, blk_flags); if (IS_ERR(req)) - return PTR_ERR(req); + return req; nvme_init_request(req, cmd); if (timeout) @@ -105,26 +117,50 @@ static int nvme_submit_user_cmd(struct request_queue *q, goto out_unmap; } req->cmd_flags |= REQ_INTEGRITY; + *metap = meta; } } + return req; + +out_unmap: + if (bio) + blk_rq_unmap_user(bio); +out: + blk_mq_free_request(req); + return ERR_PTR(ret); +} + +static int nvme_submit_user_cmd(struct request_queue *q, + struct nvme_command *cmd, void __user *ubuffer, + unsigned bufflen, void __user *meta_buffer, unsigned meta_len, + u32 meta_seed, u64 *result, unsigned timeout, bool vec) +{ + struct request *req; + void *meta = NULL; + struct bio *bio; + int ret; + + req = nvme_alloc_user_request(q, cmd, ubuffer, bufflen, meta_buffer, + meta_len, meta_seed, &meta, timeout, vec, 0, 0); + if (IS_ERR(req)) + return PTR_ERR(req); + + bio = req->bio; + ret = nvme_execute_passthru_rq(req); + if (result) *result = le64_to_cpu(nvme_req(req)->result.u64); - if (meta && !ret && !write) { - if (copy_to_user(meta_buffer, meta, meta_len)) - ret = -EFAULT; - } - kfree(meta); - out_unmap: + if (meta) + ret = nvme_finish_user_metadata(req, meta_buffer, meta, + meta_len, ret); if (bio) blk_rq_unmap_user(bio); - out: blk_mq_free_request(req); return ret; } - static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) { struct nvme_user_io io; @@ -296,6 +332,139 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, return status; } +struct nvme_uring_data { + __u64 metadata; + __u64 addr; + __u32 data_len; + __u32 metadata_len; + __u32 timeout_ms; +}; + +/* + * This overlays struct io_uring_cmd pdu. + * Expect build errors if this grows larger than that. + */ +struct nvme_uring_cmd_pdu { + union { + struct bio *bio; + struct request *req; + }; + void *meta; /* kernel-resident buffer */ + void __user *meta_buffer; + u32 meta_len; +}; + +static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu( + struct io_uring_cmd *ioucmd) +{ + return (struct nvme_uring_cmd_pdu *)&ioucmd->pdu; +} + +static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd) +{ + struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); + struct request *req = pdu->req; + struct bio *bio = req->bio; + int status; + u64 result; + + if (nvme_req(req)->flags & NVME_REQ_CANCELLED) + status = -EINTR; + else + status = nvme_req(req)->status; + + result = le64_to_cpu(nvme_req(req)->result.u64); + + if (pdu->meta) + status = nvme_finish_user_metadata(req, pdu->meta_buffer, + pdu->meta, pdu->meta_len, status); + if (bio) + blk_rq_unmap_user(bio); + blk_mq_free_request(req); + + io_uring_cmd_done(ioucmd, status, result); +} + +static void nvme_uring_cmd_end_io(struct request *req, blk_status_t err) +{ + struct io_uring_cmd *ioucmd = req->end_io_data; + struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); + /* extract bio before reusing the same field for request */ + struct bio *bio = pdu->bio; + + pdu->req = req; + req->bio = bio; + /* this takes care of moving rest of completion-work to task context */ + io_uring_cmd_complete_in_task(ioucmd, nvme_uring_task_cb); +} + +static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, + struct io_uring_cmd *ioucmd, unsigned int issue_flags, bool vec) +{ + struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); + const struct nvme_uring_cmd *cmd = ioucmd->cmd; + struct request_queue *q = ns ? ns->queue : ctrl->admin_q; + struct nvme_uring_data d; + struct nvme_command c; + struct request *req; + unsigned int rq_flags = 0; + blk_mq_req_flags_t blk_flags = 0; + void *meta = NULL; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + c.common.opcode = READ_ONCE(cmd->opcode); + c.common.flags = READ_ONCE(cmd->flags); + if (c.common.flags) + return -EINVAL; + + c.common.command_id = 0; + c.common.nsid = cpu_to_le32(cmd->nsid); + if (!nvme_validate_passthru_nsid(ctrl, ns, le32_to_cpu(c.common.nsid))) + return -EINVAL; + + c.common.cdw2[0] = cpu_to_le32(READ_ONCE(cmd->cdw2)); + c.common.cdw2[1] = cpu_to_le32(READ_ONCE(cmd->cdw3)); + c.common.metadata = 0; + c.common.dptr.prp1 = c.common.dptr.prp2 = 0; + c.common.cdw10 = cpu_to_le32(READ_ONCE(cmd->cdw10)); + c.common.cdw11 = cpu_to_le32(READ_ONCE(cmd->cdw11)); + c.common.cdw12 = cpu_to_le32(READ_ONCE(cmd->cdw12)); + c.common.cdw13 = cpu_to_le32(READ_ONCE(cmd->cdw13)); + c.common.cdw14 = cpu_to_le32(READ_ONCE(cmd->cdw14)); + c.common.cdw15 = cpu_to_le32(READ_ONCE(cmd->cdw15)); + + d.metadata = READ_ONCE(cmd->metadata); + d.addr = READ_ONCE(cmd->addr); + d.data_len = READ_ONCE(cmd->data_len); + d.metadata_len = READ_ONCE(cmd->metadata_len); + d.timeout_ms = READ_ONCE(cmd->timeout_ms); + + if (issue_flags & IO_URING_F_NONBLOCK) { + rq_flags = REQ_NOWAIT; + blk_flags = BLK_MQ_REQ_NOWAIT; + } + + req = nvme_alloc_user_request(q, &c, nvme_to_user_ptr(d.addr), + d.data_len, nvme_to_user_ptr(d.metadata), + d.metadata_len, 0, &meta, d.timeout_ms ? + msecs_to_jiffies(d.timeout_ms) : 0, vec, rq_flags, + blk_flags); + if (IS_ERR(req)) + return PTR_ERR(req); + req->end_io_data = ioucmd; + + /* to free bio on completion, as req->bio will be null at that time */ + pdu->bio = req->bio; + pdu->meta = meta; + pdu->meta_buffer = nvme_to_user_ptr(d.metadata); + pdu->meta_len = d.metadata_len; + + blk_execute_rq_nowait(req, 0, nvme_uring_cmd_end_io); + return -EIOCBQUEUED; +} + static bool is_ctrl_ioctl(unsigned int cmd) { if (cmd == NVME_IOCTL_ADMIN_CMD || cmd == NVME_IOCTL_ADMIN64_CMD) @@ -387,6 +556,53 @@ long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return __nvme_ioctl(ns, cmd, (void __user *)arg); } +static int nvme_uring_cmd_checks(unsigned int issue_flags) +{ + /* IOPOLL not supported yet */ + if (issue_flags & IO_URING_F_IOPOLL) + return -EOPNOTSUPP; + + /* NVMe passthrough requires big SQE/CQE support */ + if ((issue_flags & (IO_URING_F_SQE128|IO_URING_F_CQE32)) != + (IO_URING_F_SQE128|IO_URING_F_CQE32)) + return -EOPNOTSUPP; + return 0; +} + +static int nvme_ns_uring_cmd(struct nvme_ns *ns, struct io_uring_cmd *ioucmd, + unsigned int issue_flags) +{ + struct nvme_ctrl *ctrl = ns->ctrl; + int ret; + + BUILD_BUG_ON(sizeof(struct nvme_uring_cmd_pdu) > sizeof(ioucmd->pdu)); + + ret = nvme_uring_cmd_checks(issue_flags); + if (ret) + return ret; + + switch (ioucmd->cmd_op) { + case NVME_URING_CMD_IO: + ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, false); + break; + case NVME_URING_CMD_IO_VEC: + ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, true); + break; + default: + ret = -ENOTTY; + } + + return ret; +} + +int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) +{ + struct nvme_ns *ns = container_of(file_inode(ioucmd->file)->i_cdev, + struct nvme_ns, cdev); + + return nvme_ns_uring_cmd(ns, ioucmd, issue_flags); +} + #ifdef CONFIG_NVME_MULTIPATH static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *argp, struct nvme_ns_head *head, int srcu_idx) @@ -453,8 +669,46 @@ out_unlock: srcu_read_unlock(&head->srcu, srcu_idx); return ret; } + +int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd, + unsigned int issue_flags) +{ + struct cdev *cdev = file_inode(ioucmd->file)->i_cdev; + struct nvme_ns_head *head = container_of(cdev, struct nvme_ns_head, cdev); + int srcu_idx = srcu_read_lock(&head->srcu); + struct nvme_ns *ns = nvme_find_path(head); + int ret = -EINVAL; + + if (ns) + ret = nvme_ns_uring_cmd(ns, ioucmd, issue_flags); + srcu_read_unlock(&head->srcu, srcu_idx); + return ret; +} #endif /* CONFIG_NVME_MULTIPATH */ +int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) +{ + struct nvme_ctrl *ctrl = ioucmd->file->private_data; + int ret; + + ret = nvme_uring_cmd_checks(issue_flags); + if (ret) + return ret; + + switch (ioucmd->cmd_op) { + case NVME_URING_CMD_ADMIN: + ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, false); + break; + case NVME_URING_CMD_ADMIN_VEC: + ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, true); + break; + default: + ret = -ENOTTY; + } + + return ret; +} + static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp) { struct nvme_ns *ns; diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index d464fdf978fb..d3e2440d8abb 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -437,6 +437,7 @@ static const struct file_operations nvme_ns_head_chr_fops = { .release = nvme_ns_head_chr_release, .unlocked_ioctl = nvme_ns_head_chr_ioctl, .compat_ioctl = compat_ptr_ioctl, + .uring_cmd = nvme_ns_head_chr_uring_cmd, }; static int nvme_add_ns_head_cdev(struct nvme_ns_head *head) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index a2b53ca63335..9b72b6ecf33c 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -284,6 +284,7 @@ struct nvme_ctrl { #endif u16 crdt[3]; u16 oncs; + u32 dmrsl; u16 oacs; u16 sqsize; u32 max_namespaces; @@ -782,7 +783,12 @@ long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg); long nvme_dev_ioctl(struct file *file, unsigned int cmd, unsigned long arg); +int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd, + unsigned int issue_flags); +int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd, + unsigned int issue_flags); int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo); +int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags); extern const struct attribute_group *nvme_ns_id_attr_groups[]; extern const struct pr_ops nvme_pr_ops; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 3aacf1c0d5a5..5a98a7de0964 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1439,6 +1439,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) nvme_init_request(abort_req, &cmd); abort_req->end_io_data = NULL; + abort_req->rq_flags |= RQF_QUIET; blk_execute_rq_nowait(abort_req, false, abort_endio); /* @@ -1775,6 +1776,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev) dev->ctrl.admin_q = blk_mq_init_queue(&dev->admin_tagset); if (IS_ERR(dev->ctrl.admin_q)) { blk_mq_free_tag_set(&dev->admin_tagset); + dev->ctrl.admin_q = NULL; return -ENOMEM; } if (!blk_get_queue(dev->ctrl.admin_q)) { @@ -2486,6 +2488,7 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode) req->end_io_data = nvmeq; init_completion(&nvmeq->delete_done); + req->rq_flags |= RQF_QUIET; blk_execute_rq_nowait(req, false, opcode == nvme_admin_delete_cq ? nvme_del_cq_end : nvme_del_queue_end); return 0; @@ -2675,7 +2678,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) struct pci_dev *pdev = to_pci_dev(dev->dev); mutex_lock(&dev->shutdown_lock); - if (pci_is_enabled(pdev)) { + if (pci_device_is_present(pdev) && pci_is_enabled(pdev)) { u32 csts = readl(dev->bar + NVME_REG_CSTS); if (dev->ctrl.state == NVME_CTRL_LIVE || diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index d9f19d901313..b87c8ae41d9b 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -2010,10 +2010,7 @@ static void nvme_rdma_complete_timed_out(struct request *rq) struct nvme_rdma_queue *queue = req->queue; nvme_rdma_stop_queue(queue); - if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) { - nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD; - blk_mq_complete_request(rq); - } + nvmf_complete_timed_out_request(rq); } static enum blk_eh_timer_return diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index ad3a2bf2f1e9..bb67538d241b 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2318,10 +2318,7 @@ static void nvme_tcp_complete_timed_out(struct request *rq) struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl; nvme_tcp_stop_queue(ctrl, nvme_tcp_queue_id(req->queue)); - if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) { - nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD; - blk_mq_complete_request(rq); - } + nvmf_complete_timed_out_request(rq); } static enum blk_eh_timer_return diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index d886c2c59554..27a72504d31c 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -360,7 +360,7 @@ static u16 nvmet_bdev_discard_range(struct nvmet_req *req, ret = __blkdev_issue_discard(ns->bdev, nvmet_lba_to_sect(ns, range->slba), le32_to_cpu(range->nlb) << (ns->blksize_shift - 9), - GFP_KERNEL, 0, bio); + GFP_KERNEL, bio); if (ret && ret != -EOPNOTSUPP) { req->error_slba = le64_to_cpu(range->slba); return errno_to_nvme_status(req, ret); diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c index e34718b09550..82b61acf7a72 100644 --- a/drivers/nvme/target/zns.c +++ b/drivers/nvme/target/zns.c @@ -34,8 +34,7 @@ static int validate_conv_zones_cb(struct blk_zone *z, bool nvmet_bdev_zns_enable(struct nvmet_ns *ns) { - struct request_queue *q = ns->bdev->bd_disk->queue; - u8 zasl = nvmet_zasl(queue_max_zone_append_sectors(q)); + u8 zasl = nvmet_zasl(bdev_max_zone_append_sectors(ns->bdev)); struct gendisk *bd_disk = ns->bdev->bd_disk; int ret; |