diff options
Diffstat (limited to 'drivers/nvme/host')
-rw-r--r-- | drivers/nvme/host/apple.c | 2 | ||||
-rw-r--r-- | drivers/nvme/host/core.c | 67 | ||||
-rw-r--r-- | drivers/nvme/host/fc.c | 1 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 39 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 17 | ||||
-rw-r--r-- | drivers/nvme/host/tcp.c | 70 |
6 files changed, 112 insertions, 84 deletions
diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c index 4319ab50c10d..1de11b722f04 100644 --- a/drivers/nvme/host/apple.c +++ b/drivers/nvme/host/apple.c @@ -1251,7 +1251,6 @@ static int apple_nvme_alloc_tagsets(struct apple_nvme *anv) anv->admin_tagset.timeout = NVME_ADMIN_TIMEOUT; anv->admin_tagset.numa_node = NUMA_NO_NODE; anv->admin_tagset.cmd_size = sizeof(struct apple_nvme_iod); - anv->admin_tagset.flags = BLK_MQ_F_NO_SCHED; anv->admin_tagset.driver_data = &anv->adminq; ret = blk_mq_alloc_tag_set(&anv->admin_tagset); @@ -1275,7 +1274,6 @@ static int apple_nvme_alloc_tagsets(struct apple_nvme *anv) anv->tagset.timeout = NVME_IO_TIMEOUT; anv->tagset.numa_node = NUMA_NO_NODE; anv->tagset.cmd_size = sizeof(struct apple_nvme_iod); - anv->tagset.flags = BLK_MQ_F_SHOULD_MERGE; anv->tagset.driver_data = &anv->ioq; ret = blk_mq_alloc_tag_set(&anv->tagset); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index a970168a3014..76b615d4d5b9 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -885,6 +885,12 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req, return BLK_STS_OK; } +static void nvme_set_app_tag(struct request *req, struct nvme_command *cmnd) +{ + cmnd->rw.lbat = cpu_to_le16(bio_integrity(req->bio)->app_tag); + cmnd->rw.lbatm = cpu_to_le16(0xffff); +} + static void nvme_set_ref_tag(struct nvme_ns *ns, struct nvme_command *cmnd, struct request *req) { @@ -1017,18 +1023,17 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns, control |= NVME_RW_PRINFO_PRACT; } - switch (ns->head->pi_type) { - case NVME_NS_DPS_PI_TYPE3: + if (bio_integrity_flagged(req->bio, BIP_CHECK_GUARD)) control |= NVME_RW_PRINFO_PRCHK_GUARD; - break; - case NVME_NS_DPS_PI_TYPE1: - case NVME_NS_DPS_PI_TYPE2: - control |= NVME_RW_PRINFO_PRCHK_GUARD | - NVME_RW_PRINFO_PRCHK_REF; + if (bio_integrity_flagged(req->bio, BIP_CHECK_REFTAG)) { + control |= NVME_RW_PRINFO_PRCHK_REF; if (op == nvme_cmd_zone_append) control |= NVME_RW_APPEND_PIREMAP; nvme_set_ref_tag(ns, cmnd, req); - break; + } + if (bio_integrity_flagged(req->bio, BIP_CHECK_APPTAG)) { + control |= NVME_RW_PRINFO_PRCHK_APP; + nvme_set_app_tag(req, cmnd); } } @@ -2002,6 +2007,7 @@ static void nvme_update_atomic_write_disk_info(struct nvme_ns *ns, lim->atomic_write_hw_boundary = boundary; lim->atomic_write_hw_unit_min = bs; lim->atomic_write_hw_unit_max = rounddown_pow_of_two(atomic_bs); + lim->features |= BLK_FEAT_ATOMIC_WRITES; } static u32 nvme_max_drv_segments(struct nvme_ctrl *ctrl) @@ -2128,9 +2134,10 @@ static int nvme_update_ns_info_generic(struct nvme_ns *ns, struct queue_limits lim; int ret; - blk_mq_freeze_queue(ns->disk->queue); lim = queue_limits_start_update(ns->disk->queue); nvme_set_ctrl_limits(ns->ctrl, &lim); + + blk_mq_freeze_queue(ns->disk->queue); ret = queue_limits_commit_update(ns->disk->queue, &lim); set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info)); blk_mq_unfreeze_queue(ns->disk->queue); @@ -2177,12 +2184,12 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, goto out; } + lim = queue_limits_start_update(ns->disk->queue); + blk_mq_freeze_queue(ns->disk->queue); ns->head->lba_shift = id->lbaf[lbaf].ds; ns->head->nuse = le64_to_cpu(id->nuse); capacity = nvme_lba_to_sect(ns->head, le64_to_cpu(id->nsze)); - - lim = queue_limits_start_update(ns->disk->queue); nvme_set_ctrl_limits(ns->ctrl, &lim); nvme_configure_metadata(ns->ctrl, ns->head, id, nvm, info); nvme_set_chunk_sectors(ns, id, &lim); @@ -2285,6 +2292,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info) struct queue_limits *ns_lim = &ns->disk->queue->limits; struct queue_limits lim; + lim = queue_limits_start_update(ns->head->disk->queue); blk_mq_freeze_queue(ns->head->disk->queue); /* * queue_limits mixes values that are the hardware limitations @@ -2301,7 +2309,6 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info) * the splitting limits in to make sure we still obey possibly * lower limitations of other controllers. */ - lim = queue_limits_start_update(ns->head->disk->queue); lim.logical_block_size = ns_lim->logical_block_size; lim.physical_block_size = ns_lim->physical_block_size; lim.io_min = ns_lim->io_min; @@ -3092,7 +3099,7 @@ int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, u8 csi, static int nvme_get_effects_log(struct nvme_ctrl *ctrl, u8 csi, struct nvme_effects_log **log) { - struct nvme_effects_log *cel = xa_load(&ctrl->cels, csi); + struct nvme_effects_log *old, *cel = xa_load(&ctrl->cels, csi); int ret; if (cel) @@ -3109,7 +3116,11 @@ static int nvme_get_effects_log(struct nvme_ctrl *ctrl, u8 csi, return ret; } - xa_store(&ctrl->cels, csi, cel, GFP_KERNEL); + old = xa_store(&ctrl->cels, csi, cel, GFP_KERNEL); + if (xa_is_err(old)) { + kfree(cel); + return xa_err(old); + } out: *log = cel; return 0; @@ -3171,6 +3182,25 @@ free_data: return ret; } +static int nvme_init_effects_log(struct nvme_ctrl *ctrl, + u8 csi, struct nvme_effects_log **log) +{ + struct nvme_effects_log *effects, *old; + + effects = kzalloc(sizeof(*effects), GFP_KERNEL); + if (!effects) + return -ENOMEM; + + old = xa_store(&ctrl->cels, csi, effects, GFP_KERNEL); + if (xa_is_err(old)) { + kfree(effects); + return xa_err(old); + } + + *log = effects; + return 0; +} + static void nvme_init_known_nvm_effects(struct nvme_ctrl *ctrl) { struct nvme_effects_log *log = ctrl->effects; @@ -3217,10 +3247,9 @@ static int nvme_init_effects(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) } if (!ctrl->effects) { - ctrl->effects = kzalloc(sizeof(*ctrl->effects), GFP_KERNEL); - if (!ctrl->effects) - return -ENOMEM; - xa_store(&ctrl->cels, NVME_CSI_NVM, ctrl->effects, GFP_KERNEL); + ret = nvme_init_effects_log(ctrl, NVME_CSI_NVM, &ctrl->effects); + if (ret < 0) + return ret; } nvme_init_known_nvm_effects(ctrl); @@ -4564,7 +4593,6 @@ int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, /* Reserved for fabric connect and keep alive */ set->reserved_tags = 2; set->numa_node = ctrl->numa_node; - set->flags = BLK_MQ_F_NO_SCHED; if (ctrl->ops->flags & NVME_F_BLOCKING) set->flags |= BLK_MQ_F_BLOCKING; set->cmd_size = cmd_size; @@ -4639,7 +4667,6 @@ int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, /* Reserved for fabric connect */ set->reserved_tags = 1; set->numa_node = ctrl->numa_node; - set->flags = BLK_MQ_F_SHOULD_MERGE; if (ctrl->ops->flags & NVME_F_BLOCKING) set->flags |= BLK_MQ_F_BLOCKING; set->cmd_size = cmd_size; diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index b81af7919e94..094be164ffdc 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -16,7 +16,6 @@ #include <linux/nvme-fc.h> #include "fc.h" #include <scsi/scsi_transport_fc.h> -#include <linux/blk-mq-pci.h> /* *************************** Data Structures/Defines ****************** */ diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index c4bb8dfe1a45..7be92d07430e 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -1187,43 +1187,4 @@ static inline bool nvme_multi_css(struct nvme_ctrl *ctrl) return (ctrl->ctrl_config & NVME_CC_CSS_MASK) == NVME_CC_CSS_CSI; } -#ifdef CONFIG_NVME_VERBOSE_ERRORS -const char *nvme_get_error_status_str(u16 status); -const char *nvme_get_opcode_str(u8 opcode); -const char *nvme_get_admin_opcode_str(u8 opcode); -const char *nvme_get_fabrics_opcode_str(u8 opcode); -#else /* CONFIG_NVME_VERBOSE_ERRORS */ -static inline const char *nvme_get_error_status_str(u16 status) -{ - return "I/O Error"; -} -static inline const char *nvme_get_opcode_str(u8 opcode) -{ - return "I/O Cmd"; -} -static inline const char *nvme_get_admin_opcode_str(u8 opcode) -{ - return "Admin Cmd"; -} - -static inline const char *nvme_get_fabrics_opcode_str(u8 opcode) -{ - return "Fabrics Cmd"; -} -#endif /* CONFIG_NVME_VERBOSE_ERRORS */ - -static inline const char *nvme_opcode_str(int qid, u8 opcode) -{ - return qid ? nvme_get_opcode_str(opcode) : - nvme_get_admin_opcode_str(opcode); -} - -static inline const char *nvme_fabrics_opcode_str( - int qid, const struct nvme_command *cmd) -{ - if (nvme_is_fabrics(cmd)) - return nvme_get_fabrics_opcode_str(cmd->fabrics.fctype); - - return nvme_opcode_str(qid, cmd->common.opcode); -} #endif /* _NVME_H */ diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index e2634f437f33..278bed4e35bb 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -8,7 +8,6 @@ #include <linux/async.h> #include <linux/blkdev.h> #include <linux/blk-mq.h> -#include <linux/blk-mq-pci.h> #include <linux/blk-integrity.h> #include <linux/dmi.h> #include <linux/init.h> @@ -373,7 +372,7 @@ static bool nvme_dbbuf_update_and_check_event(u16 value, __le32 *dbbuf_db, /* * Ensure that the doorbell is updated before reading the event * index from memory. The controller needs to provide similar - * ordering to ensure the envent index is updated before reading + * ordering to ensure the event index is updated before reading * the doorbell. */ mb(); @@ -463,7 +462,7 @@ static void nvme_pci_map_queues(struct blk_mq_tag_set *set) */ map->queue_offset = qoff; if (i != HCTX_TYPE_POLL && offset) - blk_mq_pci_map_queues(map, to_pci_dev(dev->dev), offset); + blk_mq_map_hw_queues(map, dev->dev, offset); else blk_mq_map_queues(map); qoff += map->nr_queues; @@ -1148,13 +1147,13 @@ static inline void nvme_update_cq_head(struct nvme_queue *nvmeq) } } -static inline int nvme_poll_cq(struct nvme_queue *nvmeq, - struct io_comp_batch *iob) +static inline bool nvme_poll_cq(struct nvme_queue *nvmeq, + struct io_comp_batch *iob) { - int found = 0; + bool found = false; while (nvme_cqe_pending(nvmeq)) { - found++; + found = true; /* * load-load control dependency between phase and the rest of * the cqe requires a full read memory barrier @@ -2086,8 +2085,8 @@ static int nvme_alloc_host_mem_single(struct nvme_dev *dev, u64 size) sizeof(*dev->host_mem_descs), &dev->host_mem_descs_dma, GFP_KERNEL); if (!dev->host_mem_descs) { - dma_free_noncontiguous(dev->dev, dev->host_mem_size, - dev->hmb_sgt, DMA_BIDIRECTIONAL); + dma_free_noncontiguous(dev->dev, size, dev->hmb_sgt, + DMA_BIDIRECTIONAL); dev->hmb_sgt = NULL; return -ENOMEM; } diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index b127d41dbbfe..841238f38fdd 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -54,6 +54,8 @@ MODULE_PARM_DESC(tls_handshake_timeout, "nvme TLS handshake timeout in seconds (default 10)"); #endif +static atomic_t nvme_tcp_cpu_queues[NR_CPUS]; + #ifdef CONFIG_DEBUG_LOCK_ALLOC /* lockdep can detect a circular dependency of the form * sk_lock -> mmap_lock (page fault) -> fs locks -> sk_lock @@ -127,6 +129,7 @@ enum nvme_tcp_queue_flags { NVME_TCP_Q_ALLOCATED = 0, NVME_TCP_Q_LIVE = 1, NVME_TCP_Q_POLLING = 2, + NVME_TCP_Q_IO_CPU_SET = 3, }; enum nvme_tcp_recv_state { @@ -1562,23 +1565,56 @@ static bool nvme_tcp_poll_queue(struct nvme_tcp_queue *queue) ctrl->io_queues[HCTX_TYPE_POLL]; } +/** + * Track the number of queues assigned to each cpu using a global per-cpu + * counter and select the least used cpu from the mq_map. Our goal is to spread + * different controllers I/O threads across different cpu cores. + * + * Note that the accounting is not 100% perfect, but we don't need to be, we're + * simply putting our best effort to select the best candidate cpu core that we + * find at any given point. + */ static void nvme_tcp_set_queue_io_cpu(struct nvme_tcp_queue *queue) { struct nvme_tcp_ctrl *ctrl = queue->ctrl; - int qid = nvme_tcp_queue_id(queue); - int n = 0; + struct blk_mq_tag_set *set = &ctrl->tag_set; + int qid = nvme_tcp_queue_id(queue) - 1; + unsigned int *mq_map = NULL; + int cpu, min_queues = INT_MAX, io_cpu; + + if (wq_unbound) + goto out; if (nvme_tcp_default_queue(queue)) - n = qid - 1; + mq_map = set->map[HCTX_TYPE_DEFAULT].mq_map; else if (nvme_tcp_read_queue(queue)) - n = qid - ctrl->io_queues[HCTX_TYPE_DEFAULT] - 1; + mq_map = set->map[HCTX_TYPE_READ].mq_map; else if (nvme_tcp_poll_queue(queue)) - n = qid - ctrl->io_queues[HCTX_TYPE_DEFAULT] - - ctrl->io_queues[HCTX_TYPE_READ] - 1; - if (wq_unbound) - queue->io_cpu = WORK_CPU_UNBOUND; - else - queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false); + mq_map = set->map[HCTX_TYPE_POLL].mq_map; + + if (WARN_ON(!mq_map)) + goto out; + + /* Search for the least used cpu from the mq_map */ + io_cpu = WORK_CPU_UNBOUND; + for_each_online_cpu(cpu) { + int num_queues = atomic_read(&nvme_tcp_cpu_queues[cpu]); + + if (mq_map[cpu] != qid) + continue; + if (num_queues < min_queues) { + io_cpu = cpu; + min_queues = num_queues; + } + } + if (io_cpu != WORK_CPU_UNBOUND) { + queue->io_cpu = io_cpu; + atomic_inc(&nvme_tcp_cpu_queues[io_cpu]); + set_bit(NVME_TCP_Q_IO_CPU_SET, &queue->flags); + } +out: + dev_dbg(ctrl->ctrl.device, "queue %d: using cpu %d\n", + qid, queue->io_cpu); } static void nvme_tcp_tls_done(void *data, int status, key_serial_t pskid) @@ -1722,7 +1758,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid, queue->sock->sk->sk_allocation = GFP_ATOMIC; queue->sock->sk->sk_use_task_frag = false; - nvme_tcp_set_queue_io_cpu(queue); + queue->io_cpu = WORK_CPU_UNBOUND; queue->request = NULL; queue->data_remaining = 0; queue->ddgst_remaining = 0; @@ -1844,6 +1880,9 @@ static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid) if (!test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags)) return; + if (test_and_clear_bit(NVME_TCP_Q_IO_CPU_SET, &queue->flags)) + atomic_dec(&nvme_tcp_cpu_queues[queue->io_cpu]); + mutex_lock(&queue->queue_lock); if (test_and_clear_bit(NVME_TCP_Q_LIVE, &queue->flags)) __nvme_tcp_stop_queue(queue); @@ -1878,9 +1917,10 @@ static int nvme_tcp_start_queue(struct nvme_ctrl *nctrl, int idx) nvme_tcp_init_recv_ctx(queue); nvme_tcp_setup_sock_ops(queue); - if (idx) + if (idx) { + nvme_tcp_set_queue_io_cpu(queue); ret = nvmf_connect_io_queue(nctrl, idx); - else + } else ret = nvmf_connect_admin_queue(nctrl); if (!ret) { @@ -2845,6 +2885,7 @@ static struct nvmf_transport_ops nvme_tcp_transport = { static int __init nvme_tcp_init_module(void) { unsigned int wq_flags = WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_SYSFS; + int cpu; BUILD_BUG_ON(sizeof(struct nvme_tcp_hdr) != 8); BUILD_BUG_ON(sizeof(struct nvme_tcp_cmd_pdu) != 72); @@ -2862,6 +2903,9 @@ static int __init nvme_tcp_init_module(void) if (!nvme_tcp_wq) return -ENOMEM; + for_each_possible_cpu(cpu) + atomic_set(&nvme_tcp_cpu_queues[cpu], 0); + nvmf_register_transport(&nvme_tcp_transport); return 0; } |