From 8ae4e4477d8f5cc7736816a0492f82934ca32ab7 Mon Sep 17 00:00:00 2001 From: Marc Olson Date: Wed, 6 Sep 2017 17:23:56 -0700 Subject: nvme: update timeout module parameter type The underlying blk_mq_tag_set, and request timeout parameters support an unsigned int. Extend the size of the nvme module parameters for io and admin commands to match. Signed-off-by: Marc Olson Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 8 ++++---- drivers/nvme/host/nvme.h | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index bb2aad078637..26c8913435b2 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -34,13 +34,13 @@ #define NVME_MINORS (1U << MINORBITS) -unsigned char admin_timeout = 60; -module_param(admin_timeout, byte, 0644); +unsigned int admin_timeout = 60; +module_param(admin_timeout, uint, 0644); MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands"); EXPORT_SYMBOL_GPL(admin_timeout); -unsigned char nvme_io_timeout = 30; -module_param_named(io_timeout, nvme_io_timeout, byte, 0644); +unsigned int nvme_io_timeout = 30; +module_param_named(io_timeout, nvme_io_timeout, uint, 0644); MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O"); EXPORT_SYMBOL_GPL(nvme_io_timeout); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index d3f3c4447515..df787f39f4c1 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -21,10 +21,10 @@ #include #include -extern unsigned char nvme_io_timeout; +extern unsigned int nvme_io_timeout; #define NVME_IO_TIMEOUT (nvme_io_timeout * HZ) -extern unsigned char admin_timeout; +extern unsigned int admin_timeout; #define ADMIN_TIMEOUT (admin_timeout * HZ) #define NVME_DEFAULT_KATO 5 -- cgit From 786456325b9f05843ec722269a3ccd77c31b9c6d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 22 Sep 2017 18:04:43 -0700 Subject: nvme: use menu Kconfig interface Add a menu interface for NVME host and target support so that it is presented to users more like other Kconfig symbols. This makes the Device Driver menu less cluttered (easier to read) and keeps all of these symbols grouped together. Signed-off-by: Randy Dunlap Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/Kconfig | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/Kconfig b/drivers/nvme/Kconfig index b7c78a5b1f7a..04008e0bbe81 100644 --- a/drivers/nvme/Kconfig +++ b/drivers/nvme/Kconfig @@ -1,2 +1,6 @@ +menu "NVME Support" + source "drivers/nvme/host/Kconfig" source "drivers/nvme/target/Kconfig" + +endmenu -- cgit From 3375e29c280894f8d5e65c6920630a42db069599 Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 11 Sep 2017 16:14:50 -0700 Subject: nvmet: bump NVMET_NR_QUEUES to 128 Raise the max number of IO queues to 128. There are several hosts with more than 64 cpus/threads. Signed-off-by: James Smart Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/target/nvmet.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 7b8e20adf760..e342f02845c1 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -314,7 +314,7 @@ u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, u32 nvmet_get_log_page_len(struct nvme_command *cmd); #define NVMET_QUEUE_SIZE 1024 -#define NVMET_NR_QUEUES 64 +#define NVMET_NR_QUEUES 128 #define NVMET_MAX_CMD NVMET_QUEUE_SIZE #define NVMET_KAS 10 #define NVMET_DISC_KATO 120 -- cgit From d1f1071f8151355ec149117238e9c5aa6404ca9f Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 24 Sep 2017 16:15:55 +0300 Subject: nvme-fabrics: request transport module Help userspace to make sure transport module is loaded. Signed-off-by: Sagi Grimberg Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 555c976cc2ee..8bca36a46924 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -841,6 +841,9 @@ nvmf_create_ctrl(struct device *dev, const char *buf, size_t count) if (ret) goto out_free_opts; + + request_module("nvme-%s", opts->transport); + /* * Check the generic options first as we need a valid transport for * the lookup below. Then clear the generic flags so that transport -- cgit From eaefd5abf6b095bfc55eb745bdf7c42cf66790eb Mon Sep 17 00:00:00 2001 From: James Smart Date: Thu, 14 Sep 2017 10:38:42 -0700 Subject: nvme-fc: add uevent for auto-connect To support auto-connecting to FC-NVME devices upon their dynamic appearance, add a uevent that can kick off connection scripts. uevent is posted against the fc_udev device. patch set tested with the following rule to kick an nvme-cli connect-all for the FC initiator and FC target ports. This is just an example for testing and not intended for real life use. ACTION=="change", SUBSYSTEM=="fc", ENV{FC_EVENT}=="nvmediscovery", \ ENV{NVMEFC_HOST_TRADDR}=="*", ENV{NVMEFC_TRADDR}=="*", \ RUN+="/bin/sh -c '/usr/local/sbin/nvme connect-all --transport=fc --host-traddr=$env{NVMEFC_HOST_TRADDR} --traddr=$env{NVMEFC_TRADDR} >> /tmp/nvme_fc.log'" I will post proposed udev/systemd scripts for possible kernel support. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index af075e998944..f546eecb1f82 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -452,6 +452,36 @@ nvme_fc_unregister_localport(struct nvme_fc_local_port *portptr) } EXPORT_SYMBOL_GPL(nvme_fc_unregister_localport); +/* + * TRADDR strings, per FC-NVME are fixed format: + * "nn-0x<16hexdigits>:pn-0x<16hexdigits>" - 43 characters + * udev event will only differ by prefix of what field is + * being specified: + * "NVMEFC_HOST_TRADDR=" or "NVMEFC_TRADDR=" - 19 max characters + * 19 + 43 + null_fudge = 64 characters + */ +#define FCNVME_TRADDR_LENGTH 64 + +static void +nvme_fc_signal_discovery_scan(struct nvme_fc_lport *lport, + struct nvme_fc_rport *rport) +{ + char hostaddr[FCNVME_TRADDR_LENGTH]; /* NVMEFC_HOST_TRADDR=...*/ + char tgtaddr[FCNVME_TRADDR_LENGTH]; /* NVMEFC_TRADDR=...*/ + char *envp[4] = { "FC_EVENT=nvmediscovery", hostaddr, tgtaddr, NULL }; + + if (!(rport->remoteport.port_role & FC_PORT_ROLE_NVME_DISCOVERY)) + return; + + snprintf(hostaddr, sizeof(hostaddr), + "NVMEFC_HOST_TRADDR=nn-0x%016llx:pn-0x%016llx", + lport->localport.node_name, lport->localport.port_name); + snprintf(tgtaddr, sizeof(tgtaddr), + "NVMEFC_TRADDR=nn-0x%016llx:pn-0x%016llx", + rport->remoteport.node_name, rport->remoteport.port_name); + kobject_uevent_env(&fc_udev_device->kobj, KOBJ_CHANGE, envp); +} + /** * nvme_fc_register_remoteport - transport entry point called by an * LLDD to register the existence of a NVME @@ -516,6 +546,8 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport, list_add_tail(&newrec->endp_list, &lport->endp_list); spin_unlock_irqrestore(&nvme_fc_lock, flags); + nvme_fc_signal_discovery_scan(lport, newrec); + *portptr = &newrec->remoteport; return 0; @@ -634,6 +666,23 @@ nvme_fc_unregister_remoteport(struct nvme_fc_remote_port *portptr) } EXPORT_SYMBOL_GPL(nvme_fc_unregister_remoteport); +/** + * nvme_fc_rescan_remoteport - transport entry point called by an + * LLDD to request a nvme device rescan. + * @remoteport: pointer to the (registered) remote port that is to be + * rescanned. + * + * Returns: N/A + */ +void +nvme_fc_rescan_remoteport(struct nvme_fc_remote_port *remoteport) +{ + struct nvme_fc_rport *rport = remoteport_to_rport(remoteport); + + nvme_fc_signal_discovery_scan(rport->lport, rport); +} +EXPORT_SYMBOL_GPL(nvme_fc_rescan_remoteport); + /* *********************** FC-NVME DMA Handling **************************** */ -- cgit From 5f5685569ae8fccb0344373d823f2e4c59bb3d8e Mon Sep 17 00:00:00 2001 From: James Smart Date: Thu, 14 Sep 2017 10:38:41 -0700 Subject: nvme-fc: create fc class and transport device Added a new fc class and a device node for udev events under it. I expect the fc class will eventually be the location where the FC SCSI and FC NVME merge in the future. Therefore names are kept somewhat generic. Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index f546eecb1f82..2dc9932e4818 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -213,6 +213,13 @@ static DEFINE_IDA(nvme_fc_ctrl_cnt); +/* + * These items are short-term. They will eventually be moved into + * a generic FC class. See comments in module init. + */ +static struct class *fc_class; +static struct device *fc_udev_device; + /* *********************** FC-NVME Port Management ************************ */ @@ -3046,7 +3053,50 @@ static struct nvmf_transport_ops nvme_fc_transport = { static int __init nvme_fc_init_module(void) { - return nvmf_register_transport(&nvme_fc_transport); + int ret; + + /* + * NOTE: + * It is expected that in the future the kernel will combine + * the FC-isms that are currently under scsi and now being + * added to by NVME into a new standalone FC class. The SCSI + * and NVME protocols and their devices would be under this + * new FC class. + * + * As we need something to post FC-specific udev events to, + * specifically for nvme probe events, start by creating the + * new device class. When the new standalone FC class is + * put in place, this code will move to a more generic + * location for the class. + */ + fc_class = class_create(THIS_MODULE, "fc"); + if (IS_ERR(fc_class)) { + pr_err("couldn't register class fc\n"); + return PTR_ERR(fc_class); + } + + /* + * Create a device for the FC-centric udev events + */ + fc_udev_device = device_create(fc_class, NULL, MKDEV(0, 0), NULL, + "fc_udev_device"); + if (IS_ERR(fc_udev_device)) { + pr_err("couldn't create fc_udev device!\n"); + ret = PTR_ERR(fc_udev_device); + goto out_destroy_class; + } + + ret = nvmf_register_transport(&nvme_fc_transport); + if (ret) + goto out_destroy_device; + + return 0; + +out_destroy_device: + device_destroy(fc_class, MKDEV(0, 0)); +out_destroy_class: + class_destroy(fc_class); + return ret; } static void __exit nvme_fc_exit_module(void) @@ -3059,6 +3109,9 @@ static void __exit nvme_fc_exit_module(void) ida_destroy(&nvme_fc_local_port_cnt); ida_destroy(&nvme_fc_ctrl_cnt); + + device_destroy(fc_class, MKDEV(0, 0)); + class_destroy(fc_class); } module_init(nvme_fc_init_module); -- cgit From 469d0ef06debe29ec60350b9976a520c4ad5a1e8 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 26 Sep 2017 21:50:45 -0700 Subject: nvme-fc: move remote port get/put/free location move nvme_fc_rport_get/put and rport free to higher in the file to avoid adding prototypes to resolve references in upcoming code additions Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 78 +++++++++++++++++++++++++------------------------- 1 file changed, 39 insertions(+), 39 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 2dc9932e4818..b8e0822127c1 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -489,6 +489,45 @@ nvme_fc_signal_discovery_scan(struct nvme_fc_lport *lport, kobject_uevent_env(&fc_udev_device->kobj, KOBJ_CHANGE, envp); } +static void +nvme_fc_free_rport(struct kref *ref) +{ + struct nvme_fc_rport *rport = + container_of(ref, struct nvme_fc_rport, ref); + struct nvme_fc_lport *lport = + localport_to_lport(rport->remoteport.localport); + unsigned long flags; + + WARN_ON(rport->remoteport.port_state != FC_OBJSTATE_DELETED); + WARN_ON(!list_empty(&rport->ctrl_list)); + + /* remove from lport list */ + spin_lock_irqsave(&nvme_fc_lock, flags); + list_del(&rport->endp_list); + spin_unlock_irqrestore(&nvme_fc_lock, flags); + + /* let the LLDD know we've finished tearing it down */ + lport->ops->remoteport_delete(&rport->remoteport); + + ida_simple_remove(&lport->endp_cnt, rport->remoteport.port_num); + + kfree(rport); + + nvme_fc_lport_put(lport); +} + +static void +nvme_fc_rport_put(struct nvme_fc_rport *rport) +{ + kref_put(&rport->ref, nvme_fc_free_rport); +} + +static int +nvme_fc_rport_get(struct nvme_fc_rport *rport) +{ + return kref_get_unless_zero(&rport->ref); +} + /** * nvme_fc_register_remoteport - transport entry point called by an * LLDD to register the existence of a NVME @@ -568,45 +607,6 @@ out_reghost_failed: } EXPORT_SYMBOL_GPL(nvme_fc_register_remoteport); -static void -nvme_fc_free_rport(struct kref *ref) -{ - struct nvme_fc_rport *rport = - container_of(ref, struct nvme_fc_rport, ref); - struct nvme_fc_lport *lport = - localport_to_lport(rport->remoteport.localport); - unsigned long flags; - - WARN_ON(rport->remoteport.port_state != FC_OBJSTATE_DELETED); - WARN_ON(!list_empty(&rport->ctrl_list)); - - /* remove from lport list */ - spin_lock_irqsave(&nvme_fc_lock, flags); - list_del(&rport->endp_list); - spin_unlock_irqrestore(&nvme_fc_lock, flags); - - /* let the LLDD know we've finished tearing it down */ - lport->ops->remoteport_delete(&rport->remoteport); - - ida_simple_remove(&lport->endp_cnt, rport->remoteport.port_num); - - kfree(rport); - - nvme_fc_lport_put(lport); -} - -static void -nvme_fc_rport_put(struct nvme_fc_rport *rport) -{ - kref_put(&rport->ref, nvme_fc_free_rport); -} - -static int -nvme_fc_rport_get(struct nvme_fc_rport *rport) -{ - return kref_get_unless_zero(&rport->ref); -} - static int nvme_fc_abort_lsops(struct nvme_fc_rport *rport) { -- cgit From 761f2e1ed8822cf12378c857de337290d0c82a81 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 5 Oct 2017 18:46:46 +0200 Subject: nvme: simplify compat_ioctl handling We can just use our normal ioctl handler for the compat case and remove the boilerplate code for it. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Max Gurtovoy --- drivers/nvme/host/core.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 26c8913435b2..573cc3b59bfa 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1052,16 +1052,6 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, } } -#ifdef CONFIG_COMPAT -static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long arg) -{ - return nvme_ioctl(bdev, mode, cmd, arg); -} -#else -#define nvme_compat_ioctl NULL -#endif - static int nvme_open(struct block_device *bdev, fmode_t mode) { return nvme_get_ns_from_disk(bdev->bd_disk) ? 0 : -ENXIO; @@ -1380,7 +1370,7 @@ EXPORT_SYMBOL_GPL(nvme_sec_submit); static const struct block_device_operations nvme_fops = { .owner = THIS_MODULE, .ioctl = nvme_ioctl, - .compat_ioctl = nvme_compat_ioctl, + .compat_ioctl = nvme_ioctl, .open = nvme_open, .release = nvme_release, .getgeo = nvme_getgeo, -- cgit From 31b8446079757575e576b0516f0e4c0fcdfbd3dd Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 11 Oct 2017 12:53:07 +0300 Subject: nvme: introduce nvme_reinit_tagset Move blk_mq_reinit_tagset from blk-mq to nvme core as the only user of it. Current transports that use it (rdma, fc) simply implement .reinit_request op. This patch does not change any functionality. Reviewed-by: Jens Axboe Reviewed-by: Max Gurtovoy Reviewed-by: Johannes Thumshirn Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 10 ++++++++++ drivers/nvme/host/fc.c | 3 ++- drivers/nvme/host/nvme.h | 2 ++ drivers/nvme/host/rdma.c | 7 +++---- 4 files changed, 17 insertions(+), 5 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 573cc3b59bfa..c95155696741 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2934,6 +2934,16 @@ void nvme_start_queues(struct nvme_ctrl *ctrl) } EXPORT_SYMBOL_GPL(nvme_start_queues); +int nvme_reinit_tagset(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set) +{ + if (!ctrl->ops->reinit_request) + return 0; + + return blk_mq_tagset_iter(set, set->driver_data, + ctrl->ops->reinit_request); +} +EXPORT_SYMBOL_GPL(nvme_reinit_tagset); + int __init nvme_core_init(void) { int result; diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index b8e0822127c1..372a0685b12a 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2393,7 +2393,7 @@ nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl) nvme_fc_init_io_queues(ctrl); - ret = blk_mq_reinit_tagset(&ctrl->tag_set, nvme_fc_reinit_request); + ret = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.tagset); if (ret) goto out_free_io_queues; @@ -2753,6 +2753,7 @@ static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = { .submit_async_event = nvme_fc_submit_async_event, .delete_ctrl = nvme_fc_del_nvme_ctrl, .get_address = nvmf_get_address, + .reinit_request = nvme_fc_reinit_request, }; static void diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index df787f39f4c1..cb9d93048f3d 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -237,6 +237,7 @@ struct nvme_ctrl_ops { void (*submit_async_event)(struct nvme_ctrl *ctrl, int aer_idx); int (*delete_ctrl)(struct nvme_ctrl *ctrl); int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size); + int (*reinit_request)(void *data, struct request *rq); }; static inline bool nvme_ctrl_ready(struct nvme_ctrl *ctrl) @@ -311,6 +312,7 @@ void nvme_unfreeze(struct nvme_ctrl *ctrl); void nvme_wait_freeze(struct nvme_ctrl *ctrl); void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout); void nvme_start_freeze(struct nvme_ctrl *ctrl); +int nvme_reinit_tagset(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set); #define NVME_QID_ANY -1 struct request *nvme_alloc_request(struct request_queue *q, diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 92a03ff5fb4d..039f38cb6987 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -774,8 +774,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, goto out_free_tagset; } } else { - error = blk_mq_reinit_tagset(&ctrl->admin_tag_set, - nvme_rdma_reinit_request); + error = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset); if (error) goto out_free_queue; } @@ -855,8 +854,7 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) goto out_free_tag_set; } } else { - ret = blk_mq_reinit_tagset(&ctrl->tag_set, - nvme_rdma_reinit_request); + ret = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.tagset); if (ret) goto out_free_io_queues; @@ -1845,6 +1843,7 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { .submit_async_event = nvme_rdma_submit_async_event, .delete_ctrl = nvme_rdma_del_ctrl, .get_address = nvmf_get_address, + .reinit_request = nvme_rdma_reinit_request, }; static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, -- cgit From 60070c78ef7b624680ffdde30bca55e515504585 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 11 Oct 2017 15:29:06 +0300 Subject: nvme-rdma: pass tagset to directly nvme_rdma_free_tagset Instead of flagging admin/io. Signed-off-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 039f38cb6987..93c8578a2ddc 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -670,11 +670,10 @@ out_free_queues: return ret; } -static void nvme_rdma_free_tagset(struct nvme_ctrl *nctrl, bool admin) +static void nvme_rdma_free_tagset(struct nvme_ctrl *nctrl, + struct blk_mq_tag_set *set) { struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); - struct blk_mq_tag_set *set = admin ? - &ctrl->admin_tag_set : &ctrl->tag_set; blk_mq_free_tag_set(set); nvme_rdma_dev_put(ctrl->device); @@ -744,7 +743,7 @@ static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl, nvme_rdma_stop_queue(&ctrl->queues[0]); if (remove) { blk_cleanup_queue(ctrl->ctrl.admin_q); - nvme_rdma_free_tagset(&ctrl->ctrl, true); + nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset); } nvme_rdma_free_queue(&ctrl->queues[0]); } @@ -818,7 +817,7 @@ out_cleanup_queue: blk_cleanup_queue(ctrl->ctrl.admin_q); out_free_tagset: if (new) - nvme_rdma_free_tagset(&ctrl->ctrl, true); + nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset); out_free_queue: nvme_rdma_free_queue(&ctrl->queues[0]); return error; @@ -830,7 +829,7 @@ static void nvme_rdma_destroy_io_queues(struct nvme_rdma_ctrl *ctrl, nvme_rdma_stop_io_queues(ctrl); if (remove) { blk_cleanup_queue(ctrl->ctrl.connect_q); - nvme_rdma_free_tagset(&ctrl->ctrl, false); + nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.tagset); } nvme_rdma_free_io_queues(ctrl); } @@ -873,7 +872,7 @@ out_cleanup_connect_q: blk_cleanup_queue(ctrl->ctrl.connect_q); out_free_tag_set: if (new) - nvme_rdma_free_tagset(&ctrl->ctrl, false); + nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.tagset); out_free_io_queues: nvme_rdma_free_io_queues(ctrl); return ret; -- cgit From d8bfceebc41762f7dde960996a24180f9a67cd51 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 11 Oct 2017 15:29:07 +0300 Subject: nvme-rdma: fix wrong logging message Not necessarily address resolution failed. Signed-off-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 93c8578a2ddc..02b3388af308 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -544,7 +544,7 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl, ret = nvme_rdma_wait_for_cm(queue); if (ret) { dev_info(ctrl->ctrl.device, - "rdma_resolve_addr wait failed (%d).\n", ret); + "rdma connection establishment failed (%d)\n", ret); goto out_destroy_cm_id; } -- cgit From 0c5b43b9c1fafc5af37915dde6670aaf1224e8cc Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 11 Oct 2017 15:29:08 +0300 Subject: nvme-rdma: move assignment to declaration No need for the extra line for trivial assignments. Signed-off-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 02b3388af308..221ace23aaa5 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -434,11 +434,9 @@ out_err: static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue) { - struct nvme_rdma_device *dev; - struct ib_device *ibdev; + struct nvme_rdma_device *dev = queue->device; + struct ib_device *ibdev = dev->dev; - dev = queue->device; - ibdev = dev->dev; rdma_destroy_qp(queue->cm_id); ib_free_cq(queue->ib_cq); -- cgit From 0fc176dfdafc78e66bd74f54e487ca1fc59d01bf Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 11 Oct 2017 15:29:09 +0300 Subject: nvme-rdma: Check that reinit_request got a proper mr Warn if req->mr is NULL as it should never happen. Reviewed-by: Johannes Thumshirn Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 221ace23aaa5..7e61e6447d2c 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -274,6 +274,9 @@ static int nvme_rdma_reinit_request(void *data, struct request *rq) struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); int ret = 0; + if (WARN_ON_ONCE(!req->mr)) + return 0; + ib_dereg_mr(req->mr); req->mr = ib_alloc_mr(dev->pd, IB_MR_TYPE_MEM_REG, -- cgit From 5e1fe61d4170b1f498e20de92c7ce4cd5e40c3c5 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 11 Oct 2017 15:29:11 +0300 Subject: nvme-rdma: teardown admin/io queues once on error recovery Relying on the queue state while tearing down on every reconnect attempt is not a good design. We should do it once in err_work and simply try to establish the queues for each reconnect attempt. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 7e61e6447d2c..95837c5317b4 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -925,10 +925,6 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) ++ctrl->ctrl.nr_reconnects; - if (ctrl->ctrl.queue_count > 1) - nvme_rdma_destroy_io_queues(ctrl, false); - - nvme_rdma_destroy_admin_queue(ctrl, false); ret = nvme_rdma_configure_admin_queue(ctrl, false); if (ret) goto requeue; @@ -936,7 +932,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) if (ctrl->ctrl.queue_count > 1) { ret = nvme_rdma_configure_io_queues(ctrl, false); if (ret) - goto requeue; + goto destroy_admin; } changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); @@ -946,14 +942,17 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) return; } - ctrl->ctrl.nr_reconnects = 0; - nvme_start_ctrl(&ctrl->ctrl); - dev_info(ctrl->ctrl.device, "Successfully reconnected\n"); + dev_info(ctrl->ctrl.device, "Successfully reconnected (%d attempts)\n", + ctrl->ctrl.nr_reconnects); + + ctrl->ctrl.nr_reconnects = 0; return; +destroy_admin: + nvme_rdma_destroy_admin_queue(ctrl, false); requeue: dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n", ctrl->ctrl.nr_reconnects); @@ -969,17 +968,15 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) if (ctrl->ctrl.queue_count > 1) { nvme_stop_queues(&ctrl->ctrl); - nvme_rdma_stop_io_queues(ctrl); - } - blk_mq_quiesce_queue(ctrl->ctrl.admin_q); - nvme_rdma_stop_queue(&ctrl->queues[0]); - - /* We must take care of fastfail/requeue all our inflight requests */ - if (ctrl->ctrl.queue_count > 1) blk_mq_tagset_busy_iter(&ctrl->tag_set, nvme_cancel_request, &ctrl->ctrl); + nvme_rdma_destroy_io_queues(ctrl, false); + } + + blk_mq_quiesce_queue(ctrl->ctrl.admin_q); blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_cancel_request, &ctrl->ctrl); + nvme_rdma_destroy_admin_queue(ctrl, false); /* * queues are not a live anymore, so restart the queues to fail fast -- cgit From 60a518863368c7e124cd8411c2481d0938f53e08 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 11 Oct 2017 15:29:10 +0300 Subject: nvme-rdma: Don't local invalidate if the queue is not live No chance for the local invalidate to succeed if the queue-pair is in error state. Most likely the target will do a remote invalidation of our mr so not a big loss on the test_bit. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 95837c5317b4..a58dd0c77a29 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1052,7 +1052,7 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue, if (!blk_rq_bytes(rq)) return; - if (req->mr->need_inval) { + if (req->mr->need_inval && test_bit(NVME_RDMA_Q_LIVE, &req->queue->flags)) { res = nvme_rdma_inv_rkey(queue, req); if (unlikely(res < 0)) { dev_err(ctrl->ctrl.device, -- cgit From 5013e98b5e8db50144e8f1ca5a96aed95d4d48a0 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 11 Oct 2017 15:29:12 +0300 Subject: nvme-rdma: change queue flag semantics DELETING -> ALLOCATED Instead of marking we are deleting, mark we are allocated and check that instead. This makes the logic symmetrical to connected mark check. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index a58dd0c77a29..ff67375982fe 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -79,8 +79,8 @@ struct nvme_rdma_request { }; enum nvme_rdma_queue_flags { - NVME_RDMA_Q_LIVE = 0, - NVME_RDMA_Q_DELETING = 1, + NVME_RDMA_Q_ALLOCATED = 0, + NVME_RDMA_Q_LIVE = 1, }; struct nvme_rdma_queue { @@ -549,7 +549,7 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl, goto out_destroy_cm_id; } - clear_bit(NVME_RDMA_Q_DELETING, &queue->flags); + set_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags); return 0; @@ -569,7 +569,7 @@ static void nvme_rdma_stop_queue(struct nvme_rdma_queue *queue) static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue) { - if (test_and_set_bit(NVME_RDMA_Q_DELETING, &queue->flags)) + if (!test_and_clear_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags)) return; nvme_rdma_destroy_queue_ib(queue); -- cgit From 0ad0bfa29822904b9186a1c04dbfb3014bc5dc8a Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 11 Oct 2017 12:49:51 +0300 Subject: nvme-rdma: stop controller reset if the controller is deleting If the controller is deleting (in case the user decided to delete it), we have no point to continue reset sequence. Signed-off-by: Sagi Grimberg Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index ff67375982fe..34d22ff42afb 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1818,7 +1818,11 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work) } changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); - WARN_ON_ONCE(!changed); + if (!changed) { + /* state change failure is ok if we're in DELETING state */ + WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING); + return; + } nvme_start_ctrl(&ctrl->ctrl); -- cgit From 16772ae6d9221b065607335a9b279375d8d3e2fd Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Wed, 18 Oct 2017 22:56:09 +0900 Subject: nvme-pci: fix typos in comments fixed comment typos in adapter_alloc_cq() and adapter_alloc_sq(). 'the the' duplications are replaced with 'that the'. Signed-off-by: Minwoo Im Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index cb73bc8cad3b..bafdc2ab5be3 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -930,7 +930,7 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED; /* - * Note: we (ab)use the fact the the prp fields survive if no data + * Note: we (ab)use the fact that the prp fields survive if no data * is attached to the request. */ memset(&c, 0, sizeof(c)); @@ -951,7 +951,7 @@ static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid, int flags = NVME_QUEUE_PHYS_CONTIG; /* - * Note: we (ab)use the fact the the prp fields survive if no data + * Note: we (ab)use the fact that the prp fields survive if no data * is attached to the request. */ memset(&c, 0, sizeof(c)); -- cgit From 94f29d4f7808e3a495fda8c5d337fc4ac4037ce7 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Wed, 18 Oct 2017 12:38:24 +0000 Subject: nvme-rdma: Add BLK_MQ_F_NO_SCHED flag to admin tag set Since commit b86dd81 "block: get rid of blk-mq default scheduler choice Kconfig entries", when setting nr_hw_queues to 1 the admin tag set uses mq-deadline scheduler. This flag is useful for admin queues that aren't used for normal IO. Signed-off-by: Israel Rukshin Reviewed-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 34d22ff42afb..a5577e06a620 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -699,6 +699,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl, set->driver_data = ctrl; set->nr_hw_queues = 1; set->timeout = ADMIN_TIMEOUT; + set->flags = BLK_MQ_F_NO_SCHED; } else { set = &ctrl->tag_set; memset(set, 0, sizeof(*set)); -- cgit From 5a22e2bf44fb311e5a6bfec6ed5680199d6e162a Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Wed, 18 Oct 2017 12:38:25 +0000 Subject: nvme-fc: Add BLK_MQ_F_NO_SCHED flag to admin tag set Since commit b86dd81 "block: get rid of blk-mq default scheduler choice Kconfig entries", when setting nr_hw_queues to 1 the admin tag set uses mq-deadline scheduler. This flag is useful for admin queues that aren't used for normal IO. Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Reviewed-by: Sagi Grimberg Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 372a0685b12a..0d0e898912ad 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2853,6 +2853,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ctrl->admin_tag_set.driver_data = ctrl; ctrl->admin_tag_set.nr_hw_queues = 1; ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT; + ctrl->admin_tag_set.flags = BLK_MQ_F_NO_SCHED; ret = blk_mq_alloc_tag_set(&ctrl->admin_tag_set); if (ret) -- cgit From 86f36b9c6c6cda28c86ead09323f55018e5e64df Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Wed, 18 Oct 2017 12:38:26 +0000 Subject: nvme-loop: Add BLK_MQ_F_NO_SCHED flag to admin tag set This flag is useful for admin queues that aren't used for normal IO. Signed-off-by: Israel Rukshin Reviewed-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/target/loop.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 92628c432926..c56354e1e4c6 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -365,6 +365,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) ctrl->admin_tag_set.driver_data = ctrl; ctrl->admin_tag_set.nr_hw_queues = 1; ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT; + ctrl->admin_tag_set.flags = BLK_MQ_F_NO_SCHED; ctrl->queues[0].ctrl = ctrl; error = nvmet_sq_init(&ctrl->queues[0].nvme_sq); -- cgit From ba2dec35e431e62e11116b8036dc372493169b39 Mon Sep 17 00:00:00 2001 From: Roy Shterman Date: Wed, 18 Oct 2017 13:46:07 +0300 Subject: nvmet: Change max_nsid in subsystem due to ns_disable if needed In case we disable namespaces which has the nsid like subsystem max_nsid we need to search for the next largest nsid in this subsystem. If the subsystem don't has more namespaces we set it to 0, else we take nsid from the last namespace in namespaces list because the list is sorted while inserting. Reviewed-by: Max Gurtovoy Reviewed-by: Sagi Grimberg Signed-off-by: Roy Shterman Reviewed-by: Johannes Thumshirn [hch: slight refactor] Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 1b208beeef50..da088293eafc 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -57,6 +57,17 @@ u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, size_t len) return 0; } +static unsigned int nvmet_max_nsid(struct nvmet_subsys *subsys) +{ + struct nvmet_ns *ns; + + if (list_empty(&subsys->namespaces)) + return 0; + + ns = list_last_entry(&subsys->namespaces, struct nvmet_ns, dev_link); + return ns->nsid; +} + static u32 nvmet_async_event_result(struct nvmet_async_event *aen) { return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16); @@ -334,6 +345,8 @@ void nvmet_ns_disable(struct nvmet_ns *ns) ns->enabled = false; list_del_rcu(&ns->dev_link); + if (ns->nsid == subsys->max_nsid) + subsys->max_nsid = nvmet_max_nsid(subsys); mutex_unlock(&subsys->lock); /* -- cgit From 9843f685ae365d0c628e6a0d929772bca7274311 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 18 Oct 2017 13:10:01 +0200 Subject: nvme: use ida_simple_{get,remove} for the controller instance Switch to the ida_simple_* helpers instead of opencoding them. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Johannes Thumshirn --- drivers/nvme/host/core.c | 40 +++++++--------------------------------- 1 file changed, 7 insertions(+), 33 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index c95155696741..7fae42d595d5 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -74,6 +74,8 @@ EXPORT_SYMBOL_GPL(nvme_wq); static LIST_HEAD(nvme_ctrl_list); static DEFINE_SPINLOCK(dev_list_lock); +static DEFINE_IDA(nvme_instance_ida); + static struct class *nvme_class; static __le32 nvme_get_log_dw10(u8 lid, size_t size) @@ -2696,35 +2698,6 @@ void nvme_queue_async_events(struct nvme_ctrl *ctrl) } EXPORT_SYMBOL_GPL(nvme_queue_async_events); -static DEFINE_IDA(nvme_instance_ida); - -static int nvme_set_instance(struct nvme_ctrl *ctrl) -{ - int instance, error; - - do { - if (!ida_pre_get(&nvme_instance_ida, GFP_KERNEL)) - return -ENODEV; - - spin_lock(&dev_list_lock); - error = ida_get_new(&nvme_instance_ida, &instance); - spin_unlock(&dev_list_lock); - } while (error == -EAGAIN); - - if (error) - return -ENODEV; - - ctrl->instance = instance; - return 0; -} - -static void nvme_release_instance(struct nvme_ctrl *ctrl) -{ - spin_lock(&dev_list_lock); - ida_remove(&nvme_instance_ida, ctrl->instance); - spin_unlock(&dev_list_lock); -} - void nvme_stop_ctrl(struct nvme_ctrl *ctrl) { nvme_stop_keep_alive(ctrl); @@ -2762,7 +2735,7 @@ static void nvme_free_ctrl(struct kref *kref) struct nvme_ctrl *ctrl = container_of(kref, struct nvme_ctrl, kref); put_device(ctrl->device); - nvme_release_instance(ctrl); + ida_simple_remove(&nvme_instance_ida, ctrl->instance); ida_destroy(&ctrl->ns_ida); ctrl->ops->free_ctrl(ctrl); @@ -2796,9 +2769,10 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, INIT_WORK(&ctrl->async_event_work, nvme_async_event_work); INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work); - ret = nvme_set_instance(ctrl); - if (ret) + ret = ida_simple_get(&nvme_instance_ida, 0, 0, GFP_KERNEL); + if (ret < 0) goto out; + ctrl->instance = ret; ctrl->device = device_create_with_groups(nvme_class, ctrl->dev, MKDEV(nvme_char_major, ctrl->instance), @@ -2825,7 +2799,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, return 0; out_release_instance: - nvme_release_instance(ctrl); + ida_simple_remove(&nvme_instance_ida, ctrl->instance); out: return ret; } -- cgit From a7a7cbe353a52665b8463e1822ce6ba46b0609d6 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Mon, 16 Oct 2017 18:24:20 -0700 Subject: nvme-pci: add SGL support This adds SGL support for NVMe PCIe driver, based on an earlier patch from Rajiv Shanmugam Madeswaran . This patch refactors the original code and adds new module parameter sgl_threshold to determine whether to use SGL or PRP for IOs. The usage of SGLs is controlled by the sgl_threshold module parameter, which allows to conditionally use SGLs if average request segment size (avg_seg_size) is greater than sgl_threshold. In the original patch, the decision of using SGLs was dependent only on the IO size, with the new approach we consider not only IO size but also the number of physical segments present in the IO. We calculate avg_seg_size based on request payload bytes and number of physical segments present in the request. For e.g.:- 1. blk_rq_nr_phys_segments = 2 blk_rq_payload_bytes = 8k avg_seg_size = 4K use sgl if avg_seg_size >= sgl_threshold. 2. blk_rq_nr_phys_segments = 2 blk_rq_payload_bytes = 64k avg_seg_size = 32K use sgl if avg_seg_size >= sgl_threshold. 3. blk_rq_nr_phys_segments = 16 blk_rq_payload_bytes = 64k avg_seg_size = 4K use sgl if avg_seg_size >= sgl_threshold. Signed-off-by: Chaitanya Kulkarni Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 214 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 187 insertions(+), 27 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index bafdc2ab5be3..11e6fd9d0ba4 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -45,6 +45,8 @@ */ #define NVME_AQ_BLKMQ_DEPTH (NVME_AQ_DEPTH - NVME_NR_AERS) +#define SGES_PER_PAGE (PAGE_SIZE / sizeof(struct nvme_sgl_desc)) + static int use_threaded_interrupts; module_param(use_threaded_interrupts, int, 0); @@ -57,6 +59,12 @@ module_param(max_host_mem_size_mb, uint, 0444); MODULE_PARM_DESC(max_host_mem_size_mb, "Maximum Host Memory Buffer (HMB) size per controller (in MiB)"); +static unsigned int sgl_threshold = SZ_32K; +module_param(sgl_threshold, uint, 0644); +MODULE_PARM_DESC(sgl_threshold, + "Use SGLs when average request segment size is larger or equal to " + "this size. Use 0 to disable SGLs."); + static int io_queue_depth_set(const char *val, const struct kernel_param *kp); static const struct kernel_param_ops io_queue_depth_ops = { .set = io_queue_depth_set, @@ -178,6 +186,7 @@ struct nvme_queue { struct nvme_iod { struct nvme_request req; struct nvme_queue *nvmeq; + bool use_sgl; int aborted; int npages; /* In the PRP list. 0 means small pool in use */ int nents; /* Used in scatterlist */ @@ -331,17 +340,35 @@ static int nvme_npages(unsigned size, struct nvme_dev *dev) return DIV_ROUND_UP(8 * nprps, PAGE_SIZE - 8); } -static unsigned int nvme_iod_alloc_size(struct nvme_dev *dev, - unsigned int size, unsigned int nseg) +/* + * Calculates the number of pages needed for the SGL segments. For example a 4k + * page can accommodate 256 SGL descriptors. + */ +static int nvme_pci_npages_sgl(unsigned int num_seg) +{ + return DIV_ROUND_UP(num_seg * sizeof(struct nvme_sgl_desc), PAGE_SIZE); +} + +static unsigned int nvme_pci_iod_alloc_size(struct nvme_dev *dev, + unsigned int size, unsigned int nseg, bool use_sgl) { - return sizeof(__le64 *) * nvme_npages(size, dev) + - sizeof(struct scatterlist) * nseg; + size_t alloc_size; + + if (use_sgl) + alloc_size = sizeof(__le64 *) * nvme_pci_npages_sgl(nseg); + else + alloc_size = sizeof(__le64 *) * nvme_npages(size, dev); + + return alloc_size + sizeof(struct scatterlist) * nseg; } -static unsigned int nvme_cmd_size(struct nvme_dev *dev) +static unsigned int nvme_pci_cmd_size(struct nvme_dev *dev, bool use_sgl) { - return sizeof(struct nvme_iod) + - nvme_iod_alloc_size(dev, NVME_INT_BYTES(dev), NVME_INT_PAGES); + unsigned int alloc_size = nvme_pci_iod_alloc_size(dev, + NVME_INT_BYTES(dev), NVME_INT_PAGES, + use_sgl); + + return sizeof(struct nvme_iod) + alloc_size; } static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, @@ -425,10 +452,10 @@ static void __nvme_submit_cmd(struct nvme_queue *nvmeq, nvmeq->sq_tail = tail; } -static __le64 **iod_list(struct request *req) +static void **nvme_pci_iod_list(struct request *req) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - return (__le64 **)(iod->sg + blk_rq_nr_phys_segments(req)); + return (void **)(iod->sg + blk_rq_nr_phys_segments(req)); } static blk_status_t nvme_init_iod(struct request *rq, struct nvme_dev *dev) @@ -438,7 +465,10 @@ static blk_status_t nvme_init_iod(struct request *rq, struct nvme_dev *dev) unsigned int size = blk_rq_payload_bytes(rq); if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) { - iod->sg = kmalloc(nvme_iod_alloc_size(dev, size, nseg), GFP_ATOMIC); + size_t alloc_size = nvme_pci_iod_alloc_size(dev, size, nseg, + iod->use_sgl); + + iod->sg = kmalloc(alloc_size, GFP_ATOMIC); if (!iod->sg) return BLK_STS_RESOURCE; } else { @@ -456,18 +486,31 @@ static blk_status_t nvme_init_iod(struct request *rq, struct nvme_dev *dev) static void nvme_free_iod(struct nvme_dev *dev, struct request *req) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - const int last_prp = dev->ctrl.page_size / 8 - 1; + const int last_prp = dev->ctrl.page_size / sizeof(__le64) - 1; + dma_addr_t dma_addr = iod->first_dma, next_dma_addr; + int i; - __le64 **list = iod_list(req); - dma_addr_t prp_dma = iod->first_dma; if (iod->npages == 0) - dma_pool_free(dev->prp_small_pool, list[0], prp_dma); + dma_pool_free(dev->prp_small_pool, nvme_pci_iod_list(req)[0], + dma_addr); + for (i = 0; i < iod->npages; i++) { - __le64 *prp_list = list[i]; - dma_addr_t next_prp_dma = le64_to_cpu(prp_list[last_prp]); - dma_pool_free(dev->prp_page_pool, prp_list, prp_dma); - prp_dma = next_prp_dma; + void *addr = nvme_pci_iod_list(req)[i]; + + if (iod->use_sgl) { + struct nvme_sgl_desc *sg_list = addr; + + next_dma_addr = + le64_to_cpu((sg_list[SGES_PER_PAGE - 1]).addr); + } else { + __le64 *prp_list = addr; + + next_dma_addr = le64_to_cpu(prp_list[last_prp]); + } + + dma_pool_free(dev->prp_page_pool, addr, dma_addr); + dma_addr = next_dma_addr; } if (iod->sg != iod->inline_sg) @@ -555,7 +598,8 @@ static void nvme_print_sgl(struct scatterlist *sgl, int nents) } } -static blk_status_t nvme_setup_prps(struct nvme_dev *dev, struct request *req) +static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev, + struct request *req, struct nvme_rw_command *cmnd) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); struct dma_pool *pool; @@ -566,14 +610,16 @@ static blk_status_t nvme_setup_prps(struct nvme_dev *dev, struct request *req) u32 page_size = dev->ctrl.page_size; int offset = dma_addr & (page_size - 1); __le64 *prp_list; - __le64 **list = iod_list(req); + void **list = nvme_pci_iod_list(req); dma_addr_t prp_dma; int nprps, i; + iod->use_sgl = false; + length -= (page_size - offset); if (length <= 0) { iod->first_dma = 0; - return BLK_STS_OK; + goto done; } dma_len -= (page_size - offset); @@ -587,7 +633,7 @@ static blk_status_t nvme_setup_prps(struct nvme_dev *dev, struct request *req) if (length <= page_size) { iod->first_dma = dma_addr; - return BLK_STS_OK; + goto done; } nprps = DIV_ROUND_UP(length, page_size); @@ -634,6 +680,10 @@ static blk_status_t nvme_setup_prps(struct nvme_dev *dev, struct request *req) dma_len = sg_dma_len(sg); } +done: + cmnd->dptr.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); + cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma); + return BLK_STS_OK; bad_sgl: @@ -643,6 +693,110 @@ static blk_status_t nvme_setup_prps(struct nvme_dev *dev, struct request *req) return BLK_STS_IOERR; } +static void nvme_pci_sgl_set_data(struct nvme_sgl_desc *sge, + struct scatterlist *sg) +{ + sge->addr = cpu_to_le64(sg_dma_address(sg)); + sge->length = cpu_to_le32(sg_dma_len(sg)); + sge->type = NVME_SGL_FMT_DATA_DESC << 4; +} + +static void nvme_pci_sgl_set_seg(struct nvme_sgl_desc *sge, + dma_addr_t dma_addr, int entries) +{ + sge->addr = cpu_to_le64(dma_addr); + if (entries < SGES_PER_PAGE) { + sge->length = cpu_to_le32(entries * sizeof(*sge)); + sge->type = NVME_SGL_FMT_LAST_SEG_DESC << 4; + } else { + sge->length = cpu_to_le32(PAGE_SIZE); + sge->type = NVME_SGL_FMT_SEG_DESC << 4; + } +} + +static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev, + struct request *req, struct nvme_rw_command *cmd) +{ + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + int length = blk_rq_payload_bytes(req); + struct dma_pool *pool; + struct nvme_sgl_desc *sg_list; + struct scatterlist *sg = iod->sg; + int entries = iod->nents, i = 0; + dma_addr_t sgl_dma; + + iod->use_sgl = true; + + /* setting the transfer type as SGL */ + cmd->flags = NVME_CMD_SGL_METABUF; + + if (length == sg_dma_len(sg)) { + nvme_pci_sgl_set_data(&cmd->dptr.sgl, sg); + return BLK_STS_OK; + } + + if (entries <= (256 / sizeof(struct nvme_sgl_desc))) { + pool = dev->prp_small_pool; + iod->npages = 0; + } else { + pool = dev->prp_page_pool; + iod->npages = 1; + } + + sg_list = dma_pool_alloc(pool, GFP_ATOMIC, &sgl_dma); + if (!sg_list) { + iod->npages = -1; + return BLK_STS_RESOURCE; + } + + nvme_pci_iod_list(req)[0] = sg_list; + iod->first_dma = sgl_dma; + + nvme_pci_sgl_set_seg(&cmd->dptr.sgl, sgl_dma, entries); + + do { + if (i == SGES_PER_PAGE) { + struct nvme_sgl_desc *old_sg_desc = sg_list; + struct nvme_sgl_desc *link = &old_sg_desc[i - 1]; + + sg_list = dma_pool_alloc(pool, GFP_ATOMIC, &sgl_dma); + if (!sg_list) + return BLK_STS_RESOURCE; + + i = 0; + nvme_pci_iod_list(req)[iod->npages++] = sg_list; + sg_list[i++] = *link; + nvme_pci_sgl_set_seg(link, sgl_dma, entries); + } + + nvme_pci_sgl_set_data(&sg_list[i++], sg); + + length -= sg_dma_len(sg); + sg = sg_next(sg); + entries--; + } while (length > 0); + + WARN_ON(entries > 0); + return BLK_STS_OK; +} + +static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req) +{ + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + unsigned int avg_seg_size; + + avg_seg_size = DIV_ROUND_UP(blk_rq_payload_bytes(req), + blk_rq_nr_phys_segments(req)); + + if (!(dev->ctrl.sgls & ((1 << 0) | (1 << 1)))) + return false; + if (!iod->nvmeq->qid) + return false; + if (!sgl_threshold || avg_seg_size < sgl_threshold) + return false; + return true; +} + static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, struct nvme_command *cmnd) { @@ -662,7 +816,11 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, DMA_ATTR_NO_WARN)) goto out; - ret = nvme_setup_prps(dev, req); + if (nvme_pci_use_sgls(dev, req)) + ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw); + else + ret = nvme_pci_setup_prps(dev, req, &cmnd->rw); + if (ret != BLK_STS_OK) goto out_unmap; @@ -682,8 +840,6 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, goto out_unmap; } - cmnd->rw.dptr.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); - cmnd->rw.dptr.prp2 = cpu_to_le64(iod->first_dma); if (blk_integrity_rq(req)) cmnd->rw.metadata = cpu_to_le64(sg_dma_address(&iod->meta_sg)); return BLK_STS_OK; @@ -1379,7 +1535,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev) dev->admin_tagset.queue_depth = NVME_AQ_BLKMQ_DEPTH - 1; dev->admin_tagset.timeout = ADMIN_TIMEOUT; dev->admin_tagset.numa_node = dev_to_node(dev->dev); - dev->admin_tagset.cmd_size = nvme_cmd_size(dev); + dev->admin_tagset.cmd_size = nvme_pci_cmd_size(dev, false); dev->admin_tagset.flags = BLK_MQ_F_NO_SCHED; dev->admin_tagset.driver_data = dev; @@ -1906,7 +2062,11 @@ static int nvme_dev_add(struct nvme_dev *dev) dev->tagset.numa_node = dev_to_node(dev->dev); dev->tagset.queue_depth = min_t(int, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1; - dev->tagset.cmd_size = nvme_cmd_size(dev); + dev->tagset.cmd_size = nvme_pci_cmd_size(dev, false); + if ((dev->ctrl.sgls & ((1 << 0) | (1 << 1))) && sgl_threshold) { + dev->tagset.cmd_size = max(dev->tagset.cmd_size, + nvme_pci_cmd_size(dev, true)); + } dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE; dev->tagset.driver_data = dev; -- cgit From 0a02e39fd1eb2ceb3dd0dc765cb2de4d09697a14 Mon Sep 17 00:00:00 2001 From: James Smart Date: Thu, 19 Oct 2017 16:11:38 -0700 Subject: nvme-fc: correct io termination handling The io completion handling for i/o's that are failing due to to a transport error or association termination had issues, causing io failures (DNR set so retries didn't kick in) or long stalls. Change the io completion handler for the following items: When an io has been completed due to a transport abort (based on an exchange error) or when marked as aborted as part of an association termination (FCOP_FLAGS_TERMIO), set the NVME completion status to NVME_SC_ABORTED. By default, do not set DNR on the status so that a retry can be attempted after association recreate. In cases where an io is failed (non-successful nvme status including aborted), if the controller is being deleted (blk_queue_dying) or the io was part of the ios used for association creation (ctrl state is NEW or RECONNECTING), then additionally set the DNR bit so the io will not be retried. If the failed io was part of association creation, the failure will tear down the partially completioned association and typically restart a new reconnect attempt (another create association later). Rearranged code flow to remove a largely unneeded local variable. Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 0d0e898912ad..e1a7e5cdfcf4 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1387,7 +1387,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) struct nvme_command *sqe = &op->cmd_iu.sqe; __le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1); union nvme_result result; - bool complete_rq, terminate_assoc = true; + bool terminate_assoc = true; /* * WARNING: @@ -1429,8 +1429,9 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) fc_dma_sync_single_for_cpu(ctrl->lport->dev, op->fcp_req.rspdma, sizeof(op->rsp_iu), DMA_FROM_DEVICE); - if (atomic_read(&op->state) == FCPOP_STATE_ABORTED) - status = cpu_to_le16((NVME_SC_ABORT_REQ | NVME_SC_DNR) << 1); + if (atomic_read(&op->state) == FCPOP_STATE_ABORTED || + op->flags & FCOP_FLAGS_TERMIO) + status = cpu_to_le16(NVME_SC_ABORT_REQ << 1); else if (freq->status) status = cpu_to_le16(NVME_SC_INTERNAL << 1); @@ -1494,23 +1495,27 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) done: if (op->flags & FCOP_FLAGS_AEN) { nvme_complete_async_event(&queue->ctrl->ctrl, status, &result); - complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op); + __nvme_fc_fcpop_chk_teardowns(ctrl, op); atomic_set(&op->state, FCPOP_STATE_IDLE); op->flags = FCOP_FLAGS_AEN; /* clear other flags */ nvme_fc_ctrl_put(ctrl); goto check_error; } - complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op); - if (!complete_rq) { - if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) { - status = cpu_to_le16(NVME_SC_ABORT_REQ << 1); - if (blk_queue_dying(rq->q)) - status |= cpu_to_le16(NVME_SC_DNR << 1); - } - nvme_end_request(rq, status, result); - } else + /* + * Force failures of commands if we're killing the controller + * or have an error on a command used to create an new association + */ + if (status && + (blk_queue_dying(rq->q) || + ctrl->ctrl.state == NVME_CTRL_NEW || + ctrl->ctrl.state == NVME_CTRL_RECONNECTING)) + status |= cpu_to_le16(NVME_SC_DNR << 1); + + if (__nvme_fc_fcpop_chk_teardowns(ctrl, op)) __nvme_fc_final_op_cleanup(rq); + else + nvme_end_request(rq, status, result); check_error: if (terminate_assoc) -- cgit From 134aedc9c157d49069e9a98636b0a917678586ee Mon Sep 17 00:00:00 2001 From: James Smart Date: Thu, 19 Oct 2017 16:11:39 -0700 Subject: nvme-fc: correct io timeout behavior The transport io timeout behavior wasn't quite correct. It ignored that the io error handler is supposed to be synchronous so it possibly allowed the blk request to be restarted while the io associated was still aborting. Timeouts on reserved commands, those used for association create, were never timing out thus they hung out forever. To correct: If an io is times out while a remoteport is not connected, just restart the io timer. The lack of connectivity will simultaneously be resetting the controller, so the reset path will abort and terminate the io. If an io is times out while it was marked for transport abort, just reset the io timer. The abort process is underway and will complete the io. Otherwise, if an io times out, abort the io. If the abort was unsuccessful (unlikely) give up and return not handled. If the abort was successful, as the abort process is underway it will terminate the io, so rather than synchronously waiting, just restart the io timer. Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index e1a7e5cdfcf4..c6c903f1b172 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1903,13 +1903,14 @@ nvme_fc_timeout(struct request *rq, bool reserved) struct nvme_fc_ctrl *ctrl = op->ctrl; int ret; - if (reserved) + if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE || + atomic_read(&op->state) == FCPOP_STATE_ABORTED) return BLK_EH_RESET_TIMER; ret = __nvme_fc_abort_op(ctrl, op); if (ret) - /* io wasn't active to abort consider it done */ - return BLK_EH_HANDLED; + /* io wasn't active to abort */ + return BLK_EH_NOT_HANDLED; /* * we can't individually ABTS an io without affecting the queue, @@ -1920,7 +1921,12 @@ nvme_fc_timeout(struct request *rq, bool reserved) */ nvme_fc_error_recovery(ctrl, "io timeout error"); - return BLK_EH_HANDLED; + /* + * the io abort has been initiated. Have the reset timer + * restarted and the abort completion will complete the io + * shortly. Avoids a synchronous wait while the abort finishes. + */ + return BLK_EH_RESET_TIMER; } static int -- cgit From f87c89ad93c95857c4d4b19ca580e27994254b7f Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Mon, 23 Oct 2017 12:59:27 +0300 Subject: nvme-rdma: align nvme_rdma_device structure Signed-off-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index a5577e06a620..ff4635d317b5 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -50,8 +50,8 @@ (NVME_AQ_DEPTH - NVME_RDMA_NR_AEN_COMMANDS) struct nvme_rdma_device { - struct ib_device *dev; - struct ib_pd *pd; + struct ib_device *dev; + struct ib_pd *pd; struct kref ref; struct list_head entry; }; -- cgit From e62a538da2e7401bad5293d210ce00583ac12f39 Mon Sep 17 00:00:00 2001 From: Nitzan Carmi Date: Sun, 22 Oct 2017 09:37:04 +0000 Subject: nvme-rdma: Add debug message when reaches timeout Signed-off-by: Nitzan Carmi Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index ff4635d317b5..4552744eff45 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1578,6 +1578,10 @@ nvme_rdma_timeout(struct request *rq, bool reserved) { struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); + dev_warn(req->queue->ctrl->ctrl.device, + "I/O %d QID %d timeout, reset controller\n", + rq->tag, nvme_rdma_queue_idx(req->queue)); + /* queue error recovery */ nvme_rdma_error_recovery(req->queue->ctrl); -- cgit From 2dd4122854f697afc777582d18548dded03ce5dd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 18 Oct 2017 13:20:01 +0200 Subject: nvme: use kref_get_unless_zero in nvme_find_get_ns For kref_get_unless_zero to protect against lookup vs free races we need to use it in all places where we aren't guaranteed to already hold a reference. There is no such guarantee in nvme_find_get_ns, so switch to kref_get_unless_zero in this function. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn --- drivers/nvme/host/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 7fae42d595d5..1d931deac83b 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2290,7 +2290,8 @@ static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid) mutex_lock(&ctrl->namespaces_mutex); list_for_each_entry(ns, &ctrl->namespaces, list) { if (ns->ns_id == nsid) { - kref_get(&ns->kref); + if (!kref_get_unless_zero(&ns->kref)) + continue; ret = ns; break; } -- cgit From c6424a90da446cff6c67be06767fc0d0be787110 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 18 Oct 2017 13:22:00 +0200 Subject: nvme: simplify nvme_open Now that we are protected against lookup vs free races for the namespace by using kref_get_unless_zero we don't need the hack of NULLing out the disk private data during removal. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn --- drivers/nvme/host/core.c | 40 ++++++++++------------------------------ 1 file changed, 10 insertions(+), 30 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 1d931deac83b..9f8ae15c9fe8 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -253,12 +253,6 @@ static void nvme_free_ns(struct kref *kref) if (ns->ndev) nvme_nvm_unregister(ns); - if (ns->disk) { - spin_lock(&dev_list_lock); - ns->disk->private_data = NULL; - spin_unlock(&dev_list_lock); - } - put_disk(ns->disk); ida_simple_remove(&ns->ctrl->ns_ida, ns->instance); nvme_put_ctrl(ns->ctrl); @@ -270,29 +264,6 @@ static void nvme_put_ns(struct nvme_ns *ns) kref_put(&ns->kref, nvme_free_ns); } -static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk) -{ - struct nvme_ns *ns; - - spin_lock(&dev_list_lock); - ns = disk->private_data; - if (ns) { - if (!kref_get_unless_zero(&ns->kref)) - goto fail; - if (!try_module_get(ns->ctrl->ops->module)) - goto fail_put_ns; - } - spin_unlock(&dev_list_lock); - - return ns; - -fail_put_ns: - kref_put(&ns->kref, nvme_free_ns); -fail: - spin_unlock(&dev_list_lock); - return NULL; -} - struct request *nvme_alloc_request(struct request_queue *q, struct nvme_command *cmd, unsigned int flags, int qid) { @@ -1056,7 +1027,16 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, static int nvme_open(struct block_device *bdev, fmode_t mode) { - return nvme_get_ns_from_disk(bdev->bd_disk) ? 0 : -ENXIO; + struct nvme_ns *ns = bdev->bd_disk->private_data; + + if (!kref_get_unless_zero(&ns->kref)) + return -ENXIO; + if (!try_module_get(ns->ctrl->ops->module)) { + kref_put(&ns->kref, nvme_free_ns); + return -ENXIO; + } + + return 0; } static void nvme_release(struct gendisk *disk, fmode_t mode) -- cgit From d22524a4782a943bb02a9cf6885ac470210aabfc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 18 Oct 2017 13:25:42 +0200 Subject: nvme: switch controller refcounting to use struct device Instead of allocating a separate struct device for the character device handle embedd it into struct nvme_ctrl and use it for the main controller refcounting. This removes double refcounting and gets us an automatic reference for the character device operations. We keep ctrl->device as a pointer for now to avoid chaning printks all over, but in the future we could look into message printing helpers that take a controller structure similar to what other subsystems do. Note the delete_ctrl operation always already has a reference (either through sysfs due this change, or because every open file on the /dev/nvme-fabrics node has a refernece) when it is entered now, so we don't need to do the unless_zero variant there. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke --- drivers/nvme/host/core.c | 43 ++++++++++++++++++++++--------------------- drivers/nvme/host/fc.c | 8 ++------ drivers/nvme/host/nvme.h | 12 +++++++++++- drivers/nvme/host/pci.c | 2 +- drivers/nvme/host/rdma.c | 5 ++--- drivers/nvme/target/loop.c | 2 +- 6 files changed, 39 insertions(+), 33 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 9f8ae15c9fe8..3a97daa163f6 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1915,7 +1915,7 @@ static int nvme_dev_open(struct inode *inode, struct file *file) ret = -EWOULDBLOCK; break; } - if (!kref_get_unless_zero(&ctrl->kref)) + if (!kobject_get_unless_zero(&ctrl->device->kobj)) break; file->private_data = ctrl; ret = 0; @@ -2374,7 +2374,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) list_add_tail(&ns->list, &ctrl->namespaces); mutex_unlock(&ctrl->namespaces_mutex); - kref_get(&ctrl->kref); + nvme_get_ctrl(ctrl); kfree(id); @@ -2703,7 +2703,7 @@ EXPORT_SYMBOL_GPL(nvme_start_ctrl); void nvme_uninit_ctrl(struct nvme_ctrl *ctrl) { - device_destroy(nvme_class, MKDEV(nvme_char_major, ctrl->instance)); + device_del(ctrl->device); spin_lock(&dev_list_lock); list_del(&ctrl->node); @@ -2711,23 +2711,17 @@ void nvme_uninit_ctrl(struct nvme_ctrl *ctrl) } EXPORT_SYMBOL_GPL(nvme_uninit_ctrl); -static void nvme_free_ctrl(struct kref *kref) +static void nvme_free_ctrl(struct device *dev) { - struct nvme_ctrl *ctrl = container_of(kref, struct nvme_ctrl, kref); + struct nvme_ctrl *ctrl = + container_of(dev, struct nvme_ctrl, ctrl_device); - put_device(ctrl->device); ida_simple_remove(&nvme_instance_ida, ctrl->instance); ida_destroy(&ctrl->ns_ida); ctrl->ops->free_ctrl(ctrl); } -void nvme_put_ctrl(struct nvme_ctrl *ctrl) -{ - kref_put(&ctrl->kref, nvme_free_ctrl); -} -EXPORT_SYMBOL_GPL(nvme_put_ctrl); - /* * Initialize a NVMe controller structures. This needs to be called during * earliest initialization so that we have the initialized structured around @@ -2742,7 +2736,6 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, spin_lock_init(&ctrl->lock); INIT_LIST_HEAD(&ctrl->namespaces); mutex_init(&ctrl->namespaces_mutex); - kref_init(&ctrl->kref); ctrl->dev = dev; ctrl->ops = ops; ctrl->quirks = quirks; @@ -2755,15 +2748,21 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, goto out; ctrl->instance = ret; - ctrl->device = device_create_with_groups(nvme_class, ctrl->dev, - MKDEV(nvme_char_major, ctrl->instance), - ctrl, nvme_dev_attr_groups, - "nvme%d", ctrl->instance); - if (IS_ERR(ctrl->device)) { - ret = PTR_ERR(ctrl->device); + device_initialize(&ctrl->ctrl_device); + ctrl->device = &ctrl->ctrl_device; + ctrl->device->devt = MKDEV(nvme_char_major, ctrl->instance); + ctrl->device->class = nvme_class; + ctrl->device->parent = ctrl->dev; + ctrl->device->groups = nvme_dev_attr_groups; + ctrl->device->release = nvme_free_ctrl; + dev_set_drvdata(ctrl->device, ctrl); + ret = dev_set_name(ctrl->device, "nvme%d", ctrl->instance); + if (ret) goto out_release_instance; - } - get_device(ctrl->device); + ret = device_add(ctrl->device); + if (ret) + goto out_free_name; + ida_init(&ctrl->ns_ida); spin_lock(&dev_list_lock); @@ -2779,6 +2778,8 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, min(default_ps_max_latency_us, (unsigned long)S32_MAX)); return 0; +out_free_name: + kfree_const(dev->kobj.name); out_release_instance: ida_simple_remove(&nvme_instance_ida, ctrl->instance); out: diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index c6c903f1b172..aa9aec6923bb 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2692,14 +2692,10 @@ nvme_fc_del_nvme_ctrl(struct nvme_ctrl *nctrl) struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); int ret; - if (!kref_get_unless_zero(&ctrl->ctrl.kref)) - return -EBUSY; - + nvme_get_ctrl(&ctrl->ctrl); ret = __nvme_fc_del_ctrl(ctrl); - if (!ret) flush_workqueue(nvme_wq); - nvme_put_ctrl(&ctrl->ctrl); return ret; @@ -2918,7 +2914,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, return ERR_PTR(ret); } - kref_get(&ctrl->ctrl.kref); + nvme_get_ctrl(&ctrl->ctrl); dev_info(ctrl->ctrl.device, "NVME-FC{%d}: new ctrl: NQN \"%s\"\n", diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index cb9d93048f3d..ae60d8342e60 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -127,12 +127,12 @@ struct nvme_ctrl { struct request_queue *admin_q; struct request_queue *connect_q; struct device *dev; - struct kref kref; int instance; struct blk_mq_tag_set *tagset; struct blk_mq_tag_set *admin_tagset; struct list_head namespaces; struct mutex namespaces_mutex; + struct device ctrl_device; struct device *device; /* char device */ struct list_head node; struct ida ns_ida; @@ -279,6 +279,16 @@ static inline void nvme_end_request(struct request *req, __le16 status, blk_mq_complete_request(req); } +static inline void nvme_get_ctrl(struct nvme_ctrl *ctrl) +{ + get_device(ctrl->device); +} + +static inline void nvme_put_ctrl(struct nvme_ctrl *ctrl) +{ + put_device(ctrl->device); +} + void nvme_complete_rq(struct request *req); void nvme_cancel_request(struct request *req, void *data, bool reserved); bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 11e6fd9d0ba4..7735571ffc9a 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2292,7 +2292,7 @@ static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status) { dev_warn(dev->ctrl.device, "Removing after probe failure status: %d\n", status); - kref_get(&dev->ctrl.kref); + nvme_get_ctrl(&dev->ctrl); nvme_dev_disable(dev, false); if (!schedule_work(&dev->remove_work)) nvme_put_ctrl(&dev->ctrl); diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 4552744eff45..62b58f9b7d00 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1793,8 +1793,7 @@ static int nvme_rdma_del_ctrl(struct nvme_ctrl *nctrl) * Keep a reference until all work is flushed since * __nvme_rdma_del_ctrl can free the ctrl mem */ - if (!kref_get_unless_zero(&ctrl->ctrl.kref)) - return -EBUSY; + nvme_get_ctrl(&ctrl->ctrl); ret = __nvme_rdma_del_ctrl(ctrl); if (!ret) flush_work(&ctrl->delete_work); @@ -1955,7 +1954,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs\n", ctrl->ctrl.opts->subsysnqn, &ctrl->addr); - kref_get(&ctrl->ctrl.kref); + nvme_get_ctrl(&ctrl->ctrl); mutex_lock(&nvme_rdma_ctrl_mutex); list_add_tail(&ctrl->list, &nvme_rdma_ctrl_list); diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index c56354e1e4c6..f83e925fe64a 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -642,7 +642,7 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev, dev_info(ctrl->ctrl.device, "new ctrl: \"%s\"\n", ctrl->ctrl.opts->subsysnqn); - kref_get(&ctrl->ctrl.kref); + nvme_get_ctrl(&ctrl->ctrl); changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); WARN_ON_ONCE(!changed); -- cgit From a6a5149b10ec8ab8b4a9479a8230265c1b573be0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 18 Oct 2017 16:59:25 +0200 Subject: nvme: get rid of nvme_ctrl_list Use the core chrdev code to set up the link between the character device and the nvme controller. This allows us to get rid of the global list of all controllers, and also ensures that we have both a reference to the controller and the transport module before the open method of the character device is called. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke --- drivers/nvme/host/core.c | 76 ++++++++++-------------------------------------- drivers/nvme/host/nvme.h | 3 +- 2 files changed, 18 insertions(+), 61 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 3a97daa163f6..a56a1e0432e7 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -52,9 +52,6 @@ static u8 nvme_max_retries = 5; module_param_named(max_retries, nvme_max_retries, byte, 0644); MODULE_PARM_DESC(max_retries, "max number of retries a command may have"); -static int nvme_char_major; -module_param(nvme_char_major, int, 0); - static unsigned long default_ps_max_latency_us = 100000; module_param(default_ps_max_latency_us, ulong, 0644); MODULE_PARM_DESC(default_ps_max_latency_us, @@ -71,11 +68,8 @@ MODULE_PARM_DESC(streams, "turn on support for Streams write directives"); struct workqueue_struct *nvme_wq; EXPORT_SYMBOL_GPL(nvme_wq); -static LIST_HEAD(nvme_ctrl_list); -static DEFINE_SPINLOCK(dev_list_lock); - static DEFINE_IDA(nvme_instance_ida); - +static dev_t nvme_chr_devt; static struct class *nvme_class; static __le32 nvme_get_log_dw10(u8 lid, size_t size) @@ -1031,20 +1025,12 @@ static int nvme_open(struct block_device *bdev, fmode_t mode) if (!kref_get_unless_zero(&ns->kref)) return -ENXIO; - if (!try_module_get(ns->ctrl->ops->module)) { - kref_put(&ns->kref, nvme_free_ns); - return -ENXIO; - } - return 0; } static void nvme_release(struct gendisk *disk, fmode_t mode) { - struct nvme_ns *ns = disk->private_data; - - module_put(ns->ctrl->ops->module); - nvme_put_ns(ns); + nvme_put_ns(disk->private_data); } static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo) @@ -1902,33 +1888,12 @@ EXPORT_SYMBOL_GPL(nvme_init_identify); static int nvme_dev_open(struct inode *inode, struct file *file) { - struct nvme_ctrl *ctrl; - int instance = iminor(inode); - int ret = -ENODEV; - - spin_lock(&dev_list_lock); - list_for_each_entry(ctrl, &nvme_ctrl_list, node) { - if (ctrl->instance != instance) - continue; - - if (!ctrl->admin_q) { - ret = -EWOULDBLOCK; - break; - } - if (!kobject_get_unless_zero(&ctrl->device->kobj)) - break; - file->private_data = ctrl; - ret = 0; - break; - } - spin_unlock(&dev_list_lock); - - return ret; -} + struct nvme_ctrl *ctrl = + container_of(inode->i_cdev, struct nvme_ctrl, cdev); -static int nvme_dev_release(struct inode *inode, struct file *file) -{ - nvme_put_ctrl(file->private_data); + if (!ctrl->admin_q) + return -EWOULDBLOCK; + file->private_data = ctrl; return 0; } @@ -1992,7 +1957,6 @@ static long nvme_dev_ioctl(struct file *file, unsigned int cmd, static const struct file_operations nvme_dev_fops = { .owner = THIS_MODULE, .open = nvme_dev_open, - .release = nvme_dev_release, .unlocked_ioctl = nvme_dev_ioctl, .compat_ioctl = nvme_dev_ioctl, }; @@ -2703,11 +2667,7 @@ EXPORT_SYMBOL_GPL(nvme_start_ctrl); void nvme_uninit_ctrl(struct nvme_ctrl *ctrl) { - device_del(ctrl->device); - - spin_lock(&dev_list_lock); - list_del(&ctrl->node); - spin_unlock(&dev_list_lock); + cdev_device_del(&ctrl->cdev, ctrl->device); } EXPORT_SYMBOL_GPL(nvme_uninit_ctrl); @@ -2750,7 +2710,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, device_initialize(&ctrl->ctrl_device); ctrl->device = &ctrl->ctrl_device; - ctrl->device->devt = MKDEV(nvme_char_major, ctrl->instance); + ctrl->device->devt = MKDEV(MAJOR(nvme_chr_devt), ctrl->instance); ctrl->device->class = nvme_class; ctrl->device->parent = ctrl->dev; ctrl->device->groups = nvme_dev_attr_groups; @@ -2759,16 +2719,15 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, ret = dev_set_name(ctrl->device, "nvme%d", ctrl->instance); if (ret) goto out_release_instance; - ret = device_add(ctrl->device); + + cdev_init(&ctrl->cdev, &nvme_dev_fops); + ctrl->cdev.owner = ops->module; + ret = cdev_device_add(&ctrl->cdev, ctrl->device); if (ret) goto out_free_name; ida_init(&ctrl->ns_ida); - spin_lock(&dev_list_lock); - list_add_tail(&ctrl->node, &nvme_ctrl_list); - spin_unlock(&dev_list_lock); - /* * Initialize latency tolerance controls. The sysfs files won't * be visible to userspace unless the device actually supports APST. @@ -2909,12 +2868,9 @@ int __init nvme_core_init(void) if (!nvme_wq) return -ENOMEM; - result = __register_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme", - &nvme_dev_fops); + result = alloc_chrdev_region(&nvme_chr_devt, 0, NVME_MINORS, "nvme"); if (result < 0) goto destroy_wq; - else if (result > 0) - nvme_char_major = result; nvme_class = class_create(THIS_MODULE, "nvme"); if (IS_ERR(nvme_class)) { @@ -2925,7 +2881,7 @@ int __init nvme_core_init(void) return 0; unregister_chrdev: - __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme"); + unregister_chrdev_region(nvme_chr_devt, NVME_MINORS); destroy_wq: destroy_workqueue(nvme_wq); return result; @@ -2934,7 +2890,7 @@ destroy_wq: void nvme_core_exit(void) { class_destroy(nvme_class); - __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme"); + unregister_chrdev_region(nvme_chr_devt, NVME_MINORS); destroy_workqueue(nvme_wq); } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index ae60d8342e60..1bb2bc165e54 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -15,6 +15,7 @@ #define _NVME_H #include +#include #include #include #include @@ -134,7 +135,7 @@ struct nvme_ctrl { struct mutex namespaces_mutex; struct device ctrl_device; struct device *device; /* char device */ - struct list_head node; + struct cdev cdev; struct ida ns_ida; struct work_struct reset_work; -- cgit From 999ada28713d7bcf4a2c70cbab47b08cd95f2cf8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 18 Oct 2017 17:09:31 +0200 Subject: nvme: check for a live controller in nvme_dev_open This is a much more sensible check than just the admin queue. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index a56a1e0432e7..df525ab42fcd 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1891,7 +1891,7 @@ static int nvme_dev_open(struct inode *inode, struct file *file) struct nvme_ctrl *ctrl = container_of(inode->i_cdev, struct nvme_ctrl, cdev); - if (!ctrl->admin_q) + if (ctrl->state != NVME_CTRL_LIVE) return -EWOULDBLOCK; file->private_data = ctrl; return 0; -- cgit From 3b3387620780fc9699021c85bdce5cb45a763d41 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 20 Oct 2017 16:17:06 -0700 Subject: nvme: add duplicate_connect option Add the "duplicate_connect" boolean option (presence means true). Default is false. When false, the transport should validate whether a new controller request is targeted for the same host transport addressing and target transport addressing as an existing controller. If so, the new controller request should be rejected. When true, the callee is explicitly requesting a duplicate controller connection to be made and the new request should be attempted. Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.c | 7 ++++++- drivers/nvme/host/fabrics.h | 2 ++ 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 8bca36a46924..4a83137b0268 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -548,6 +548,7 @@ static const match_table_t opt_tokens = { { NVMF_OPT_HOSTNQN, "hostnqn=%s" }, { NVMF_OPT_HOST_TRADDR, "host_traddr=%s" }, { NVMF_OPT_HOST_ID, "hostid=%s" }, + { NVMF_OPT_DUP_CONNECT, "duplicate_connect" }, { NVMF_OPT_ERR, NULL } }; @@ -566,6 +567,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, opts->nr_io_queues = num_online_cpus(); opts->reconnect_delay = NVMF_DEF_RECONNECT_DELAY; opts->kato = NVME_DEFAULT_KATO; + opts->duplicate_connect = false; options = o = kstrdup(buf, GFP_KERNEL); if (!options) @@ -742,6 +744,9 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, goto out; } break; + case NVMF_OPT_DUP_CONNECT: + opts->duplicate_connect = true; + break; default: pr_warn("unknown parameter or missing value '%s' in ctrl creation request\n", p); @@ -823,7 +828,7 @@ EXPORT_SYMBOL_GPL(nvmf_free_options); #define NVMF_REQUIRED_OPTS (NVMF_OPT_TRANSPORT | NVMF_OPT_NQN) #define NVMF_ALLOWED_OPTS (NVMF_OPT_QUEUE_SIZE | NVMF_OPT_NR_IO_QUEUES | \ NVMF_OPT_KATO | NVMF_OPT_HOSTNQN | \ - NVMF_OPT_HOST_ID) + NVMF_OPT_HOST_ID | NVMF_OPT_DUP_CONNECT) static struct nvme_ctrl * nvmf_create_ctrl(struct device *dev, const char *buf, size_t count) diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index bf33663218cd..a7590cc59ba2 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -57,6 +57,7 @@ enum { NVMF_OPT_HOST_TRADDR = 1 << 10, NVMF_OPT_CTRL_LOSS_TMO = 1 << 11, NVMF_OPT_HOST_ID = 1 << 12, + NVMF_OPT_DUP_CONNECT = 1 << 13, }; /** @@ -96,6 +97,7 @@ struct nvmf_ctrl_options { unsigned int nr_io_queues; unsigned int reconnect_delay; bool discovery_nqn; + bool duplicate_connect; unsigned int kato; struct nvmf_host *host; int max_reconnects; -- cgit From 991231dc48ae9bcdd363ee231f10beb771d455c1 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 20 Oct 2017 16:17:07 -0700 Subject: nvme: add helper to compare options to controller Adds a helper function that compares the host and subsytem specified in a connect options list vs a controller. Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index a7590cc59ba2..42232e731f19 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -133,6 +133,18 @@ struct nvmf_transport_ops { struct nvmf_ctrl_options *opts); }; +static inline bool +nvmf_ctlr_matches_baseopts(struct nvme_ctrl *ctrl, + struct nvmf_ctrl_options *opts) +{ + if (strcmp(opts->subsysnqn, ctrl->opts->subsysnqn) || + strcmp(opts->host->nqn, ctrl->opts->host->nqn) || + memcmp(&opts->host->id, &ctrl->opts->host->id, sizeof(uuid_t))) + return false; + + return true; +} + int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val); int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val); int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val); -- cgit From 36e835f2432d8f16845307b5fb7e88d1e5af041d Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 20 Oct 2017 16:17:09 -0700 Subject: nvme-rdma: add support for duplicate_connect option Adds support for the duplicate_connect option. When set to true, checks whether there's an existing controller via the same target address (traddr), target port (trsvcid), and if specified, host address (host_traddr). Fails the connection request if there is an existing controller. Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 62b58f9b7d00..32b0a9ef26e6 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1851,6 +1851,83 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { .reinit_request = nvme_rdma_reinit_request, }; +static inline bool +__nvme_rdma_options_match(struct nvme_rdma_ctrl *ctrl, + struct nvmf_ctrl_options *opts) +{ + char *stdport = __stringify(NVME_RDMA_IP_PORT); + + + if (!nvmf_ctlr_matches_baseopts(&ctrl->ctrl, opts) || + strcmp(opts->traddr, ctrl->ctrl.opts->traddr)) + return false; + + if (opts->mask & NVMF_OPT_TRSVCID && + ctrl->ctrl.opts->mask & NVMF_OPT_TRSVCID) { + if (strcmp(opts->trsvcid, ctrl->ctrl.opts->trsvcid)) + return false; + } else if (opts->mask & NVMF_OPT_TRSVCID) { + if (strcmp(opts->trsvcid, stdport)) + return false; + } else if (ctrl->ctrl.opts->mask & NVMF_OPT_TRSVCID) { + if (strcmp(stdport, ctrl->ctrl.opts->trsvcid)) + return false; + } + /* else, it's a match as both have stdport. Fall to next checks */ + + /* + * checking the local address is rough. In most cases, one + * is not specified and the host port is selected by the stack. + * + * Assume no match if: + * local address is specified and address is not the same + * local address is not specified but remote is, or vice versa + * (admin using specific host_traddr when it matters). + */ + if (opts->mask & NVMF_OPT_HOST_TRADDR && + ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR) { + if (strcmp(opts->host_traddr, ctrl->ctrl.opts->host_traddr)) + return false; + } else if (opts->mask & NVMF_OPT_HOST_TRADDR || + ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR) + return false; + /* + * if neither controller had an host port specified, assume it's + * a match as everything else matched. + */ + + return true; +} + +/* + * Fails a connection request if it matches an existing controller + * (association) with the same tuple: + * + * + * if local address is not specified in the request, it will match an + * existing controller with all the other parameters the same and no + * local port address specified as well. + * + * The ports don't need to be compared as they are intrinsically + * already matched by the port pointers supplied. + */ +static bool +nvme_rdma_existing_controller(struct nvmf_ctrl_options *opts) +{ + struct nvme_rdma_ctrl *ctrl; + bool found = false; + + mutex_lock(&nvme_rdma_ctrl_mutex); + list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) { + found = __nvme_rdma_options_match(ctrl, opts); + if (found) + break; + } + mutex_unlock(&nvme_rdma_ctrl_mutex); + + return found; +} + static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts) { @@ -1887,6 +1964,11 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, } } + if (!opts->duplicate_connect && nvme_rdma_existing_controller(opts)) { + ret = -EALREADY; + goto out_free_ctrl; + } + ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_rdma_ctrl_ops, 0 /* no quirks, we're perfect! */); if (ret) -- cgit From 56d5f4f108efd4e439d2320738e2b894af0920bb Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 20 Oct 2017 16:17:08 -0700 Subject: nvme-fc: add support for duplicate_connect option Adds support for the duplicate_connect option. When set to true, checks whether there's an existing controller via the same host port and target port for the same host (hostnqn, hostid) to the same subsystem. Fails the connection request if an existing controller. Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index aa9aec6923bb..d12775d12983 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2792,6 +2792,33 @@ static const struct blk_mq_ops nvme_fc_admin_mq_ops = { }; +/* + * Fails a controller request if it matches an existing controller + * (association) with the same tuple: + * + * + * The ports don't need to be compared as they are intrinsically + * already matched by the port pointers supplied. + */ +static bool +nvme_fc_existing_controller(struct nvme_fc_rport *rport, + struct nvmf_ctrl_options *opts) +{ + struct nvme_fc_ctrl *ctrl; + unsigned long flags; + bool found = false; + + spin_lock_irqsave(&rport->lock, flags); + list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) { + found = nvmf_ctlr_matches_baseopts(&ctrl->ctrl, opts); + if (found) + break; + } + spin_unlock_irqrestore(&rport->lock, flags); + + return found; +} + static struct nvme_ctrl * nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, struct nvme_fc_lport *lport, struct nvme_fc_rport *rport) @@ -2806,6 +2833,12 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, goto out_fail; } + if (!opts->duplicate_connect && + nvme_fc_existing_controller(rport, opts)) { + ret = -EALREADY; + goto out_fail; + } + ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); if (!ctrl) { ret = -ENOMEM; -- cgit From ecad0d2cb8a7997afdc95031ee3328b997aba5c4 Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 23 Oct 2017 15:11:36 -0700 Subject: nvme-fc: remove NVME_FC_MAX_SEGMENTS The define is an arbitrary limit to the io size on the initiator, capping the io to 1MB-4KB. Remove the define from the transport. I/O size will solely be limited by the LLDD sg limits. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index d12775d12983..b600c07803cf 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2435,7 +2435,6 @@ static int nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) { struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; - u32 segs; int ret; bool changed; @@ -2486,9 +2485,8 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) if (ret) goto out_disconnect_admin_queue; - segs = min_t(u32, NVME_FC_MAX_SEGMENTS, - ctrl->lport->ops->max_sgl_segments); - ctrl->ctrl.max_hw_sectors = (segs - 1) << (PAGE_SHIFT - 9); + ctrl->ctrl.max_hw_sectors = + (ctrl->lport->ops->max_sgl_segments - 1) << (PAGE_SHIFT - 9); ret = nvme_init_identify(&ctrl->ctrl); if (ret) -- cgit From 71c691fd06cc2625966620c93ce21bdcce32ed95 Mon Sep 17 00:00:00 2001 From: James Smart Date: Thu, 28 Sep 2017 09:56:31 -0700 Subject: nvme-fc: avoid workqueue flush stalls There's no need to wait for the full nvme_wq, which is now shared, to flush. flush only the delete_work item. Signed-off-by: James Smart Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index b600c07803cf..50cc17e99475 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2693,7 +2693,7 @@ nvme_fc_del_nvme_ctrl(struct nvme_ctrl *nctrl) nvme_get_ctrl(&ctrl->ctrl); ret = __nvme_fc_del_ctrl(ctrl); if (!ret) - flush_workqueue(nvme_wq); + flush_work(&ctrl->delete_work); nvme_put_ctrl(&ctrl->ctrl); return ret; -- cgit From 29c09648734b94e42802f021658a47361169403b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 29 Oct 2017 10:44:28 +0200 Subject: nvme-fc: merge __nvme_fc_schedule_delete_work into __nvme_fc_del_ctrl No need to have two functions doing the same thing. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 50cc17e99475..827e9efbe556 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2663,22 +2663,14 @@ nvme_fc_delete_ctrl_work(struct work_struct *work) nvme_put_ctrl(&ctrl->ctrl); } -static bool -__nvme_fc_schedule_delete_work(struct nvme_fc_ctrl *ctrl) -{ - if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING)) - return true; - - if (!queue_work(nvme_wq, &ctrl->delete_work)) - return true; - - return false; -} - static int __nvme_fc_del_ctrl(struct nvme_fc_ctrl *ctrl) { - return __nvme_fc_schedule_delete_work(ctrl) ? -EBUSY : 0; + if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING)) + return -EBUSY; + if (!queue_work(nvme_wq, &ctrl->delete_work)) + return -EBUSY; + return 0; } /* @@ -2724,7 +2716,7 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status) "NVME-FC{%d}: Max reconnect attempts (%d) " "reached. Removing controller\n", ctrl->cnum, ctrl->ctrl.nr_reconnects); - WARN_ON(__nvme_fc_schedule_delete_work(ctrl)); + WARN_ON(__nvme_fc_del_ctrl(ctrl)); } } -- cgit From c5017e85705bfea721732e153305d1988ff965c2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 29 Oct 2017 10:44:29 +0200 Subject: nvme: move controller deletion to common code Move the ->delete_work and the associated helpers to common code instead of duplicating them in every driver. This also adds the missing reference get/put for the loop driver. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 38 ++++++++++++++++++++++++++++++++++- drivers/nvme/host/fabrics.c | 2 +- drivers/nvme/host/fc.c | 42 +++++---------------------------------- drivers/nvme/host/nvme.h | 5 ++++- drivers/nvme/host/rdma.c | 45 ++++++------------------------------------ drivers/nvme/target/loop.c | 48 +++++++++------------------------------------ 6 files changed, 62 insertions(+), 118 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index df525ab42fcd..d835ac05bbf7 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -97,6 +97,41 @@ static int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl) return ret; } +static void nvme_delete_ctrl_work(struct work_struct *work) +{ + struct nvme_ctrl *ctrl = + container_of(work, struct nvme_ctrl, delete_work); + + ctrl->ops->delete_ctrl(ctrl); +} + +int nvme_delete_ctrl(struct nvme_ctrl *ctrl) +{ + if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING)) + return -EBUSY; + if (!queue_work(nvme_wq, &ctrl->delete_work)) + return -EBUSY; + return 0; +} +EXPORT_SYMBOL_GPL(nvme_delete_ctrl); + +int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl) +{ + int ret = 0; + + /* + * Keep a reference until the work is flushed since ->delete_ctrl + * can free the controller. + */ + nvme_get_ctrl(ctrl); + ret = nvme_delete_ctrl(ctrl); + if (!ret) + flush_work(&ctrl->delete_work); + nvme_put_ctrl(ctrl); + return ret; +} +EXPORT_SYMBOL_GPL(nvme_delete_ctrl_sync); + static blk_status_t nvme_error_status(struct request *req) { switch (nvme_req(req)->status & 0x7ff) { @@ -2122,7 +2157,7 @@ static ssize_t nvme_sysfs_delete(struct device *dev, struct nvme_ctrl *ctrl = dev_get_drvdata(dev); if (device_remove_file_self(dev, attr)) - ctrl->ops->delete_ctrl(ctrl); + nvme_delete_ctrl_sync(ctrl); return count; } static DEVICE_ATTR(delete_controller, S_IWUSR, NULL, nvme_sysfs_delete); @@ -2702,6 +2737,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, INIT_WORK(&ctrl->scan_work, nvme_scan_work); INIT_WORK(&ctrl->async_event_work, nvme_async_event_work); INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work); + INIT_WORK(&ctrl->delete_work, nvme_delete_ctrl_work); ret = ida_simple_get(&nvme_instance_ida, 0, 0, GFP_KERNEL); if (ret < 0) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 4a83137b0268..a4967de5bb25 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -887,7 +887,7 @@ nvmf_create_ctrl(struct device *dev, const char *buf, size_t count) "controller returned incorrect NQN: \"%s\".\n", ctrl->subnqn); up_read(&nvmf_transports_rwsem); - ctrl->ops->delete_ctrl(ctrl); + nvme_delete_ctrl_sync(ctrl); return ERR_PTR(-EINVAL); } diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 827e9efbe556..a7bdb17de29d 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -157,7 +157,6 @@ struct nvme_fc_ctrl { struct blk_mq_tag_set admin_tag_set; struct blk_mq_tag_set tag_set; - struct work_struct delete_work; struct delayed_work connect_work; struct kref ref; @@ -223,7 +222,6 @@ static struct device *fc_udev_device; /* *********************** FC-NVME Port Management ************************ */ -static int __nvme_fc_del_ctrl(struct nvme_fc_ctrl *); static void __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *, struct nvme_fc_queue *, unsigned int); @@ -662,7 +660,7 @@ nvme_fc_unregister_remoteport(struct nvme_fc_remote_port *portptr) /* tear down all associations to the remote port */ list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) - __nvme_fc_del_ctrl(ctrl); + nvme_delete_ctrl(&ctrl->ctrl); spin_unlock_irqrestore(&rport->lock, flags); @@ -2636,10 +2634,9 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) } static void -nvme_fc_delete_ctrl_work(struct work_struct *work) +nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl) { - struct nvme_fc_ctrl *ctrl = - container_of(work, struct nvme_fc_ctrl, delete_work); + struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); cancel_work_sync(&ctrl->ctrl.reset_work); cancel_delayed_work_sync(&ctrl->connect_work); @@ -2663,34 +2660,6 @@ nvme_fc_delete_ctrl_work(struct work_struct *work) nvme_put_ctrl(&ctrl->ctrl); } -static int -__nvme_fc_del_ctrl(struct nvme_fc_ctrl *ctrl) -{ - if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING)) - return -EBUSY; - if (!queue_work(nvme_wq, &ctrl->delete_work)) - return -EBUSY; - return 0; -} - -/* - * Request from nvme core layer to delete the controller - */ -static int -nvme_fc_del_nvme_ctrl(struct nvme_ctrl *nctrl) -{ - struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); - int ret; - - nvme_get_ctrl(&ctrl->ctrl); - ret = __nvme_fc_del_ctrl(ctrl); - if (!ret) - flush_work(&ctrl->delete_work); - nvme_put_ctrl(&ctrl->ctrl); - - return ret; -} - static void nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status) { @@ -2716,7 +2685,7 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status) "NVME-FC{%d}: Max reconnect attempts (%d) " "reached. Removing controller\n", ctrl->cnum, ctrl->ctrl.nr_reconnects); - WARN_ON(__nvme_fc_del_ctrl(ctrl)); + WARN_ON(nvme_delete_ctrl(&ctrl->ctrl)); } } @@ -2748,7 +2717,7 @@ static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = { .reg_write32 = nvmf_reg_write32, .free_ctrl = nvme_fc_nvme_ctrl_freed, .submit_async_event = nvme_fc_submit_async_event, - .delete_ctrl = nvme_fc_del_nvme_ctrl, + .delete_ctrl = nvme_fc_delete_ctrl, .get_address = nvmf_get_address, .reinit_request = nvme_fc_reinit_request, }; @@ -2851,7 +2820,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, get_device(ctrl->dev); kref_init(&ctrl->ref); - INIT_WORK(&ctrl->delete_work, nvme_fc_delete_ctrl_work); INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work); INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work); spin_lock_init(&ctrl->lock); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 1bb2bc165e54..35d9cee515f1 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -138,6 +138,7 @@ struct nvme_ctrl { struct cdev cdev; struct ida ns_ida; struct work_struct reset_work; + struct work_struct delete_work; struct opal_dev *opal_dev; @@ -236,7 +237,7 @@ struct nvme_ctrl_ops { int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val); void (*free_ctrl)(struct nvme_ctrl *ctrl); void (*submit_async_event)(struct nvme_ctrl *ctrl, int aer_idx); - int (*delete_ctrl)(struct nvme_ctrl *ctrl); + void (*delete_ctrl)(struct nvme_ctrl *ctrl); int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size); int (*reinit_request)(void *data, struct request *rq); }; @@ -339,6 +340,8 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count); void nvme_start_keep_alive(struct nvme_ctrl *ctrl); void nvme_stop_keep_alive(struct nvme_ctrl *ctrl); int nvme_reset_ctrl(struct nvme_ctrl *ctrl); +int nvme_delete_ctrl(struct nvme_ctrl *ctrl); +int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl); #ifdef CONFIG_NVM int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node); diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 32b0a9ef26e6..5175b465997d 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -105,7 +105,6 @@ struct nvme_rdma_ctrl { /* other member variables */ struct blk_mq_tag_set tag_set; - struct work_struct delete_work; struct work_struct err_work; struct nvme_rdma_qe async_event_sqe; @@ -913,7 +912,7 @@ static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl) ctrl->ctrl.opts->reconnect_delay * HZ); } else { dev_info(ctrl->ctrl.device, "Removing controller...\n"); - queue_work(nvme_wq, &ctrl->delete_work); + queue_work(nvme_wq, &ctrl->ctrl.delete_work); } } @@ -1764,41 +1763,10 @@ static void nvme_rdma_remove_ctrl(struct nvme_rdma_ctrl *ctrl) nvme_put_ctrl(&ctrl->ctrl); } -static void nvme_rdma_del_ctrl_work(struct work_struct *work) +static void nvme_rdma_delete_ctrl(struct nvme_ctrl *ctrl) { - struct nvme_rdma_ctrl *ctrl = container_of(work, - struct nvme_rdma_ctrl, delete_work); - - nvme_stop_ctrl(&ctrl->ctrl); - nvme_rdma_remove_ctrl(ctrl); -} - -static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl) -{ - if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING)) - return -EBUSY; - - if (!queue_work(nvme_wq, &ctrl->delete_work)) - return -EBUSY; - - return 0; -} - -static int nvme_rdma_del_ctrl(struct nvme_ctrl *nctrl) -{ - struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); - int ret = 0; - - /* - * Keep a reference until all work is flushed since - * __nvme_rdma_del_ctrl can free the ctrl mem - */ - nvme_get_ctrl(&ctrl->ctrl); - ret = __nvme_rdma_del_ctrl(ctrl); - if (!ret) - flush_work(&ctrl->delete_work); - nvme_put_ctrl(&ctrl->ctrl); - return ret; + nvme_stop_ctrl(ctrl); + nvme_rdma_remove_ctrl(to_rdma_ctrl(ctrl)); } static void nvme_rdma_reset_ctrl_work(struct work_struct *work) @@ -1846,7 +1814,7 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { .reg_write32 = nvmf_reg_write32, .free_ctrl = nvme_rdma_free_ctrl, .submit_async_event = nvme_rdma_submit_async_event, - .delete_ctrl = nvme_rdma_del_ctrl, + .delete_ctrl = nvme_rdma_delete_ctrl, .get_address = nvmf_get_address, .reinit_request = nvme_rdma_reinit_request, }; @@ -1977,7 +1945,6 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, INIT_DELAYED_WORK(&ctrl->reconnect_work, nvme_rdma_reconnect_ctrl_work); INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work); - INIT_WORK(&ctrl->delete_work, nvme_rdma_del_ctrl_work); INIT_WORK(&ctrl->ctrl.reset_work, nvme_rdma_reset_ctrl_work); ctrl->ctrl.queue_count = opts->nr_io_queues + 1; /* +1 for admin queue */ @@ -2081,7 +2048,7 @@ static void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data) dev_info(ctrl->ctrl.device, "Removing ctrl: NQN \"%s\", addr %pISp\n", ctrl->ctrl.opts->subsysnqn, &ctrl->addr); - __nvme_rdma_del_ctrl(ctrl); + nvme_delete_ctrl(&ctrl->ctrl); } mutex_unlock(&nvme_rdma_ctrl_mutex); diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index f83e925fe64a..7f9f3fc3fb2a 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -53,7 +53,6 @@ struct nvme_loop_ctrl { struct nvme_ctrl ctrl; struct nvmet_ctrl *target_ctrl; - struct work_struct delete_work; }; static inline struct nvme_loop_ctrl *to_loop_ctrl(struct nvme_ctrl *ctrl) @@ -439,41 +438,13 @@ static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl) nvme_loop_destroy_admin_queue(ctrl); } -static void nvme_loop_del_ctrl_work(struct work_struct *work) +static void nvme_loop_delete_ctrl_host(struct nvme_ctrl *ctrl) { - struct nvme_loop_ctrl *ctrl = container_of(work, - struct nvme_loop_ctrl, delete_work); - - nvme_stop_ctrl(&ctrl->ctrl); - nvme_remove_namespaces(&ctrl->ctrl); - nvme_loop_shutdown_ctrl(ctrl); - nvme_uninit_ctrl(&ctrl->ctrl); - nvme_put_ctrl(&ctrl->ctrl); -} - -static int __nvme_loop_del_ctrl(struct nvme_loop_ctrl *ctrl) -{ - if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING)) - return -EBUSY; - - if (!queue_work(nvme_wq, &ctrl->delete_work)) - return -EBUSY; - - return 0; -} - -static int nvme_loop_del_ctrl(struct nvme_ctrl *nctrl) -{ - struct nvme_loop_ctrl *ctrl = to_loop_ctrl(nctrl); - int ret; - - ret = __nvme_loop_del_ctrl(ctrl); - if (ret) - return ret; - - flush_work(&ctrl->delete_work); - - return 0; + nvme_stop_ctrl(ctrl); + nvme_remove_namespaces(ctrl); + nvme_loop_shutdown_ctrl(to_loop_ctrl(ctrl)); + nvme_uninit_ctrl(ctrl); + nvme_put_ctrl(ctrl); } static void nvme_loop_delete_ctrl(struct nvmet_ctrl *nctrl) @@ -483,7 +454,7 @@ static void nvme_loop_delete_ctrl(struct nvmet_ctrl *nctrl) mutex_lock(&nvme_loop_ctrl_mutex); list_for_each_entry(ctrl, &nvme_loop_ctrl_list, list) { if (ctrl->ctrl.cntlid == nctrl->cntlid) - __nvme_loop_del_ctrl(ctrl); + nvme_delete_ctrl(&ctrl->ctrl); } mutex_unlock(&nvme_loop_ctrl_mutex); } @@ -539,7 +510,7 @@ static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = { .reg_write32 = nvmf_reg_write32, .free_ctrl = nvme_loop_free_ctrl, .submit_async_event = nvme_loop_submit_async_event, - .delete_ctrl = nvme_loop_del_ctrl, + .delete_ctrl = nvme_loop_delete_ctrl_host, }; static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl) @@ -601,7 +572,6 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev, ctrl->ctrl.opts = opts; INIT_LIST_HEAD(&ctrl->list); - INIT_WORK(&ctrl->delete_work, nvme_loop_del_ctrl_work); INIT_WORK(&ctrl->ctrl.reset_work, nvme_loop_reset_ctrl_work); ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_loop_ctrl_ops, @@ -731,7 +701,7 @@ static void __exit nvme_loop_cleanup_module(void) mutex_lock(&nvme_loop_ctrl_mutex); list_for_each_entry_safe(ctrl, next, &nvme_loop_ctrl_list, list) - __nvme_loop_del_ctrl(ctrl); + nvme_delete_ctrl(&ctrl->ctrl); mutex_unlock(&nvme_loop_ctrl_mutex); flush_workqueue(nvme_wq); -- cgit From e9bc25874c0bde47b65c58ccd01e339a603a7f40 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 29 Oct 2017 10:44:30 +0200 Subject: nvme-rdma: remove nvme_rdma_remove_ctrl It is only used in two places, and some of the work done by it will be taken into common code soon. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 5175b465997d..a3521b852ea8 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1755,18 +1755,13 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown) nvme_rdma_destroy_admin_queue(ctrl, shutdown); } -static void nvme_rdma_remove_ctrl(struct nvme_rdma_ctrl *ctrl) -{ - nvme_remove_namespaces(&ctrl->ctrl); - nvme_rdma_shutdown_ctrl(ctrl, true); - nvme_uninit_ctrl(&ctrl->ctrl); - nvme_put_ctrl(&ctrl->ctrl); -} - static void nvme_rdma_delete_ctrl(struct nvme_ctrl *ctrl) { nvme_stop_ctrl(ctrl); - nvme_rdma_remove_ctrl(to_rdma_ctrl(ctrl)); + nvme_remove_namespaces(ctrl); + nvme_rdma_shutdown_ctrl(to_rdma_ctrl(ctrl), true); + nvme_uninit_ctrl(ctrl); + nvme_put_ctrl(ctrl); } static void nvme_rdma_reset_ctrl_work(struct work_struct *work) @@ -1802,7 +1797,10 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work) out_fail: dev_warn(ctrl->ctrl.device, "Removing after reset failure\n"); - nvme_rdma_remove_ctrl(ctrl); + nvme_remove_namespaces(&ctrl->ctrl); + nvme_rdma_shutdown_ctrl(ctrl, true); + nvme_uninit_ctrl(&ctrl->ctrl); + nvme_put_ctrl(&ctrl->ctrl); } static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { -- cgit From 6cd53d14aaa006b5543f06fbf5e1680ce61c6c6e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 29 Oct 2017 10:44:31 +0200 Subject: nvme: consolidate common code from ->reset_work No change in behavior except that the FC code cancels two work items a little later now. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 4 ++++ drivers/nvme/host/fc.c | 13 ------------- drivers/nvme/host/rdma.c | 4 ---- drivers/nvme/target/loop.c | 4 ---- 4 files changed, 4 insertions(+), 21 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index d835ac05bbf7..4fa748c9a3f6 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -102,7 +102,11 @@ static void nvme_delete_ctrl_work(struct work_struct *work) struct nvme_ctrl *ctrl = container_of(work, struct nvme_ctrl, delete_work); + nvme_stop_ctrl(ctrl); + nvme_remove_namespaces(ctrl); ctrl->ops->delete_ctrl(ctrl); + nvme_uninit_ctrl(ctrl); + nvme_put_ctrl(ctrl); } int nvme_delete_ctrl(struct nvme_ctrl *ctrl) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index a7bdb17de29d..e447b532b9ee 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2640,24 +2640,11 @@ nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl) cancel_work_sync(&ctrl->ctrl.reset_work); cancel_delayed_work_sync(&ctrl->connect_work); - nvme_stop_ctrl(&ctrl->ctrl); - nvme_remove_namespaces(&ctrl->ctrl); /* * kill the association on the link side. this will block * waiting for io to terminate */ nvme_fc_delete_association(ctrl); - - /* - * tear down the controller - * After the last reference on the nvme ctrl is removed, - * the transport nvme_fc_nvme_ctrl_freed() callback will be - * invoked. From there, the transport will tear down it's - * logical queues and association. - */ - nvme_uninit_ctrl(&ctrl->ctrl); - - nvme_put_ctrl(&ctrl->ctrl); } static void diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index a3521b852ea8..ed6e05018a92 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1757,11 +1757,7 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown) static void nvme_rdma_delete_ctrl(struct nvme_ctrl *ctrl) { - nvme_stop_ctrl(ctrl); - nvme_remove_namespaces(ctrl); nvme_rdma_shutdown_ctrl(to_rdma_ctrl(ctrl), true); - nvme_uninit_ctrl(ctrl); - nvme_put_ctrl(ctrl); } static void nvme_rdma_reset_ctrl_work(struct work_struct *work) diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 7f9f3fc3fb2a..bc95c6ed531a 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -440,11 +440,7 @@ static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl) static void nvme_loop_delete_ctrl_host(struct nvme_ctrl *ctrl) { - nvme_stop_ctrl(ctrl); - nvme_remove_namespaces(ctrl); nvme_loop_shutdown_ctrl(to_loop_ctrl(ctrl)); - nvme_uninit_ctrl(ctrl); - nvme_put_ctrl(ctrl); } static void nvme_loop_delete_ctrl(struct nvmet_ctrl *nctrl) -- cgit From 12fa1304badde26c5cf495d5af48df22a31c3a52 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 29 Oct 2017 14:21:01 +0200 Subject: nvme-rdma: reuse nvme_delete_ctrl when reconnect attempts expire instead of just queueing delete work Reported-by: Christoph Hellwig Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index ed6e05018a92..03644ecf68d2 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -912,7 +912,7 @@ static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl) ctrl->ctrl.opts->reconnect_delay * HZ); } else { dev_info(ctrl->ctrl.device, "Removing controller...\n"); - queue_work(nvme_wq, &ctrl->ctrl.delete_work); + nvme_delete_ctrl(&ctrl->ctrl); } } -- cgit From 4054637c9b4fbe9feef0cf6f2516ef00d8053560 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 29 Oct 2017 14:21:02 +0200 Subject: nvme: flush reset_work before safely continuing with delete operation Prevent racing controller reset and delete flows. reset_work must not ever self-requeue so flushing it suffices. Reported-by: Christoph Hellwig Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 1 + drivers/nvme/host/fc.c | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 4fa748c9a3f6..003314eb6341 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -102,6 +102,7 @@ static void nvme_delete_ctrl_work(struct work_struct *work) struct nvme_ctrl *ctrl = container_of(work, struct nvme_ctrl, delete_work); + flush_work(&ctrl->reset_work); nvme_stop_ctrl(ctrl); nvme_remove_namespaces(ctrl); ctrl->ops->delete_ctrl(ctrl); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index e447b532b9ee..6a025a8d8c32 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2638,7 +2638,6 @@ nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl) { struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); - cancel_work_sync(&ctrl->ctrl.reset_work); cancel_delayed_work_sync(&ctrl->connect_work); /* * kill the association on the link side. this will block -- cgit From 44c6ec77e12c387aaba420b30a54b94966f0d9e8 Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 25 Oct 2017 16:43:14 -0700 Subject: nvme-fc: change ctlr state assignments during reset/reconnect Clean up some of the controller state checks and add the RESETTING->RECONNECTING state transition. Specifically: - the movement of the RESETTING state change and schedule of reset_work to core doesn't work wiht nvme_fc_error_recovery setting state to RECONNECTING before attempting to reset. Remove the state change as the reset request does it. - In the rare cases where an error occurs right as we're transitioning to LIVE, defer the controller start actions. - In error handling on teardown of associations while performing initial controller creation - avoid quiesce calls on the admin_q. They are unneeded. - Add the RESETTING->RECONNECTING transition in the reset handler. Signed-off-by: James Smart Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 6a025a8d8c32..e37c69f7921d 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1884,13 +1884,6 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) dev_warn(ctrl->ctrl.device, "NVME-FC{%d}: resetting controller\n", ctrl->cnum); - if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) { - dev_err(ctrl->ctrl.device, - "NVME-FC{%d}: error_recovery: Couldn't change state " - "to RECONNECTING\n", ctrl->cnum); - return; - } - nvme_reset_ctrl(&ctrl->ctrl); } @@ -2528,11 +2521,11 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) } changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); - WARN_ON_ONCE(!changed); ctrl->ctrl.nr_reconnects = 0; - nvme_start_ctrl(&ctrl->ctrl); + if (changed) + nvme_start_ctrl(&ctrl->ctrl); return 0; /* Success */ @@ -2600,7 +2593,8 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) * use blk_mq_tagset_busy_itr() and the transport routine to * terminate the exchanges. */ - blk_mq_quiesce_queue(ctrl->ctrl.admin_q); + if (ctrl->ctrl.state != NVME_CTRL_NEW) + blk_mq_quiesce_queue(ctrl->ctrl.admin_q); blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_fc_terminate_exchange, &ctrl->ctrl); @@ -2649,12 +2643,8 @@ nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl) static void nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status) { - /* If we are resetting/deleting then do nothing */ - if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING) { - WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW || - ctrl->ctrl.state == NVME_CTRL_LIVE); + if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING) return; - } dev_info(ctrl->ctrl.device, "NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n", @@ -2683,9 +2673,17 @@ nvme_fc_reset_ctrl_work(struct work_struct *work) int ret; nvme_stop_ctrl(&ctrl->ctrl); + /* will block will waiting for io to terminate */ nvme_fc_delete_association(ctrl); + if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) { + dev_err(ctrl->ctrl.device, + "NVME-FC{%d}: error_recovery: Couldn't change state " + "to RECONNECTING\n", ctrl->cnum); + return; + } + ret = nvme_fc_create_association(ctrl); if (ret) nvme_fc_reconnect_or_delete(ctrl, ret); -- cgit From ac7fe82b6fcf77e757e88005c33b8147c1b7b73f Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 25 Oct 2017 16:43:15 -0700 Subject: nvme-fc: add a dev_loss_tmo field to the remoteport Add a dev_loss_tmo value, paralleling the SCSI FC transport, for device connectivity loss. The transport initializes the value in the nvme_fc_register_remoteport() call. If the value is not set, a default of 60s is set. Add a new routine to the api, nvme_fc_set_remoteport_devloss() routine, which allows the lldd to dynamically update the value on an existing remoteport. Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index e37c69f7921d..25479d3031fa 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -45,6 +45,8 @@ enum nvme_fc_queue_flags { #define NVMEFC_QUEUE_DELAY 3 /* ms units */ +#define NVME_FC_DEFAULT_DEV_LOSS_TMO 60 /* seconds */ + struct nvme_fc_queue { struct nvme_fc_ctrl *ctrl; struct device *dev; @@ -585,6 +587,11 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport, newrec->remoteport.port_id = pinfo->port_id; newrec->remoteport.port_state = FC_OBJSTATE_ONLINE; newrec->remoteport.port_num = idx; + /* a registration value of dev_loss_tmo=0 results in the default */ + if (pinfo->dev_loss_tmo) + newrec->remoteport.dev_loss_tmo = pinfo->dev_loss_tmo; + else + newrec->remoteport.dev_loss_tmo = NVME_FC_DEFAULT_DEV_LOSS_TMO; spin_lock_irqsave(&nvme_fc_lock, flags); list_add_tail(&newrec->endp_list, &lport->endp_list); @@ -688,6 +695,30 @@ nvme_fc_rescan_remoteport(struct nvme_fc_remote_port *remoteport) } EXPORT_SYMBOL_GPL(nvme_fc_rescan_remoteport); +int +nvme_fc_set_remoteport_devloss(struct nvme_fc_remote_port *portptr, + u32 dev_loss_tmo) +{ + struct nvme_fc_rport *rport = remoteport_to_rport(portptr); + struct nvme_fc_ctrl *ctrl; + unsigned long flags; + + spin_lock_irqsave(&rport->lock, flags); + + if (portptr->port_state != FC_OBJSTATE_ONLINE) { + spin_unlock_irqrestore(&rport->lock, flags); + return -EINVAL; + } + + /* a dev_loss_tmo of 0 (immediate) is allowed to be set */ + rport->remoteport.dev_loss_tmo = dev_loss_tmo; + + spin_unlock_irqrestore(&rport->lock, flags); + + return 0; +} +EXPORT_SYMBOL_GPL(nvme_fc_set_remoteport_devloss); + /* *********************** FC-NVME DMA Handling **************************** */ -- cgit From 96e24801056467c6483f68ef71d642fb925c3100 Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 25 Oct 2017 16:43:16 -0700 Subject: nvme-fc: check connectivity before initiating reconnects Check remoteport connectivity before initiating reconnects Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 25479d3031fa..b64cc1002452 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -808,7 +808,6 @@ fc_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, dma_unmap_sg(dev, sg, nents, dir); } - /* *********************** FC-NVME LS Handling **************************** */ static void nvme_fc_ctrl_put(struct nvme_fc_ctrl *); @@ -2462,6 +2461,9 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) ++ctrl->ctrl.nr_reconnects; + if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE) + return -ENODEV; + /* * Create the admin queue */ @@ -2682,6 +2684,10 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status) ctrl->cnum, status); if (nvmf_should_reconnect(&ctrl->ctrl)) { + /* Only schedule the reconnect if the remote port is online */ + if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE) + return; + dev_info(ctrl->ctrl.device, "NVME-FC{%d}: Reconnect attempt in %d seconds.\n", ctrl->cnum, ctrl->ctrl.opts->reconnect_delay); @@ -2715,12 +2721,15 @@ nvme_fc_reset_ctrl_work(struct work_struct *work) return; } - ret = nvme_fc_create_association(ctrl); - if (ret) - nvme_fc_reconnect_or_delete(ctrl, ret); - else - dev_info(ctrl->ctrl.device, - "NVME-FC{%d}: controller reset complete\n", ctrl->cnum); + if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE) { + ret = nvme_fc_create_association(ctrl); + if (ret) + nvme_fc_reconnect_or_delete(ctrl, ret); + else + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: controller reset complete\n", + ctrl->cnum); + } } static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = { -- cgit From 3cec7f9de448131778a1428100621fd371db75f4 Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 25 Oct 2017 16:43:13 -0700 Subject: nvme: allow controller RESETTING to RECONNECTING transition Transport will typically transition from LIVE to RESETTING when initially performing a reset or recovering from an error. Adding this transition allows a transport to transition to RECONNECTING when it checks/waits for connectivity then creates new transport connections and reinits the controller. Signed-off-by: James Smart Reviewed-by: Sagi Grimberg Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 003314eb6341..07b9f4e1c283 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -241,6 +241,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, case NVME_CTRL_RECONNECTING: switch (old_state) { case NVME_CTRL_LIVE: + case NVME_CTRL_RESETTING: changed = true; /* FALLTHRU */ default: -- cgit From 2b632970da4f288e6dbdc826c34fbf74f40ec94c Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 25 Oct 2017 16:43:17 -0700 Subject: nvme-fc: add dev_loss_tmo timeout and remoteport resume support When a remoteport is unregistered (connectivity lost), the following actions are taken: - the remoteport is marked DELETED - the time when dev_loss_tmo would expire is set in the remoteport - all controllers on the remoteport are reset. After a controller resets, it will stall in a RECONNECTING state waiting for one of the following: - the controller will continue to attempt reconnect per max_retries and reconnect_delay. As no remoteport connectivity, the reconnect attempt will immediately fail. If max reconnects has not been reached, a new reconnect_delay timer will be schedule. If the current time plus another reconnect_delay exceeds when dev_loss_tmo expires on the remote port, then the reconnect_delay will be shortend to schedule no later than when dev_loss_tmo expires. If max reconnect attempts are reached (e.g. ctrl_loss_tmo reached) or dev_loss_tmo ix exceeded without connectivity, the controller is deleted. - the remoteport is re-registered prior to dev_loss_tmo expiring. The resume of the remoteport will immediately attempt to reconnect each of its suspended controllers. Signed-off-by: James Smart Reviewed-by: Hannes Reinecke [hch: updated to use nvme_delete_ctrl] Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 278 ++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 239 insertions(+), 39 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index b64cc1002452..113c30be7276 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -138,6 +138,7 @@ struct nvme_fc_rport { struct nvme_fc_lport *lport; spinlock_t lock; struct kref ref; + unsigned long dev_loss_end; } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ enum nvme_fcctrl_flags { @@ -528,6 +529,102 @@ nvme_fc_rport_get(struct nvme_fc_rport *rport) return kref_get_unless_zero(&rport->ref); } +static void +nvme_fc_resume_controller(struct nvme_fc_ctrl *ctrl) +{ + switch (ctrl->ctrl.state) { + case NVME_CTRL_NEW: + case NVME_CTRL_RECONNECTING: + /* + * As all reconnects were suppressed, schedule a + * connect. + */ + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: connectivity re-established. " + "Attempting reconnect\n", ctrl->cnum); + + queue_delayed_work(nvme_wq, &ctrl->connect_work, 0); + break; + + case NVME_CTRL_RESETTING: + /* + * Controller is already in the process of terminating the + * association. No need to do anything further. The reconnect + * step will naturally occur after the reset completes. + */ + break; + + default: + /* no action to take - let it delete */ + break; + } +} + +static struct nvme_fc_rport * +nvme_fc_attach_to_suspended_rport(struct nvme_fc_lport *lport, + struct nvme_fc_port_info *pinfo) +{ + struct nvme_fc_rport *rport; + struct nvme_fc_ctrl *ctrl; + unsigned long flags; + + spin_lock_irqsave(&nvme_fc_lock, flags); + + list_for_each_entry(rport, &lport->endp_list, endp_list) { + if (rport->remoteport.node_name != pinfo->node_name || + rport->remoteport.port_name != pinfo->port_name) + continue; + + if (!nvme_fc_rport_get(rport)) { + rport = ERR_PTR(-ENOLCK); + goto out_done; + } + + spin_unlock_irqrestore(&nvme_fc_lock, flags); + + spin_lock_irqsave(&rport->lock, flags); + + /* has it been unregistered */ + if (rport->remoteport.port_state != FC_OBJSTATE_DELETED) { + /* means lldd called us twice */ + spin_unlock_irqrestore(&rport->lock, flags); + nvme_fc_rport_put(rport); + return ERR_PTR(-ESTALE); + } + + rport->remoteport.port_state = FC_OBJSTATE_ONLINE; + rport->dev_loss_end = 0; + + /* + * kick off a reconnect attempt on all associations to the + * remote port. A successful reconnects will resume i/o. + */ + list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) + nvme_fc_resume_controller(ctrl); + + spin_unlock_irqrestore(&rport->lock, flags); + + return rport; + } + + rport = NULL; + +out_done: + spin_unlock_irqrestore(&nvme_fc_lock, flags); + + return rport; +} + +static inline void +__nvme_fc_set_dev_loss_tmo(struct nvme_fc_rport *rport, + struct nvme_fc_port_info *pinfo) +{ + if (pinfo->dev_loss_tmo) + rport->remoteport.dev_loss_tmo = pinfo->dev_loss_tmo; + else + rport->remoteport.dev_loss_tmo = NVME_FC_DEFAULT_DEV_LOSS_TMO; +} + /** * nvme_fc_register_remoteport - transport entry point called by an * LLDD to register the existence of a NVME @@ -554,22 +651,45 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport, unsigned long flags; int ret, idx; + if (!nvme_fc_lport_get(lport)) { + ret = -ESHUTDOWN; + goto out_reghost_failed; + } + + /* + * look to see if there is already a remoteport that is waiting + * for a reconnect (within dev_loss_tmo) with the same WWN's. + * If so, transition to it and reconnect. + */ + newrec = nvme_fc_attach_to_suspended_rport(lport, pinfo); + + /* found an rport, but something about its state is bad */ + if (IS_ERR(newrec)) { + ret = PTR_ERR(newrec); + goto out_lport_put; + + /* found existing rport, which was resumed */ + } else if (newrec) { + nvme_fc_lport_put(lport); + __nvme_fc_set_dev_loss_tmo(newrec, pinfo); + nvme_fc_signal_discovery_scan(lport, newrec); + *portptr = &newrec->remoteport; + return 0; + } + + /* nothing found - allocate a new remoteport struct */ + newrec = kmalloc((sizeof(*newrec) + lport->ops->remote_priv_sz), GFP_KERNEL); if (!newrec) { ret = -ENOMEM; - goto out_reghost_failed; - } - - if (!nvme_fc_lport_get(lport)) { - ret = -ESHUTDOWN; - goto out_kfree_rport; + goto out_lport_put; } idx = ida_simple_get(&lport->endp_cnt, 0, 0, GFP_KERNEL); if (idx < 0) { ret = -ENOSPC; - goto out_lport_put; + goto out_kfree_rport; } INIT_LIST_HEAD(&newrec->endp_list); @@ -587,11 +707,7 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport, newrec->remoteport.port_id = pinfo->port_id; newrec->remoteport.port_state = FC_OBJSTATE_ONLINE; newrec->remoteport.port_num = idx; - /* a registration value of dev_loss_tmo=0 results in the default */ - if (pinfo->dev_loss_tmo) - newrec->remoteport.dev_loss_tmo = pinfo->dev_loss_tmo; - else - newrec->remoteport.dev_loss_tmo = NVME_FC_DEFAULT_DEV_LOSS_TMO; + __nvme_fc_set_dev_loss_tmo(newrec, pinfo); spin_lock_irqsave(&nvme_fc_lock, flags); list_add_tail(&newrec->endp_list, &lport->endp_list); @@ -602,10 +718,10 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport, *portptr = &newrec->remoteport; return 0; -out_lport_put: - nvme_fc_lport_put(lport); out_kfree_rport: kfree(newrec); +out_lport_put: + nvme_fc_lport_put(lport); out_reghost_failed: *portptr = NULL; return ret; @@ -636,6 +752,58 @@ restart: return 0; } +static void +nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl) +{ + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: controller connectivity lost. Awaiting " + "Reconnect", ctrl->cnum); + + switch (ctrl->ctrl.state) { + case NVME_CTRL_NEW: + case NVME_CTRL_LIVE: + /* + * Schedule a controller reset. The reset will terminate the + * association and schedule the reconnect timer. Reconnects + * will be attempted until either the ctlr_loss_tmo + * (max_retries * connect_delay) expires or the remoteport's + * dev_loss_tmo expires. + */ + if (nvme_reset_ctrl(&ctrl->ctrl)) { + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: Couldn't schedule reset. " + "Deleting controller.\n", + ctrl->cnum); + nvme_delete_ctrl(&ctrl->ctrl); + } + break; + + case NVME_CTRL_RECONNECTING: + /* + * The association has already been terminated and the + * controller is attempting reconnects. No need to do anything + * futher. Reconnects will be attempted until either the + * ctlr_loss_tmo (max_retries * connect_delay) expires or the + * remoteport's dev_loss_tmo expires. + */ + break; + + case NVME_CTRL_RESETTING: + /* + * Controller is already in the process of terminating the + * association. No need to do anything further. The reconnect + * step will kick in naturally after the association is + * terminated. + */ + break; + + case NVME_CTRL_DELETING: + default: + /* no action to take - let it delete */ + break; + } +} + /** * nvme_fc_unregister_remoteport - transport entry point called by an * LLDD to deregister/remove a previously @@ -665,15 +833,31 @@ nvme_fc_unregister_remoteport(struct nvme_fc_remote_port *portptr) } portptr->port_state = FC_OBJSTATE_DELETED; - /* tear down all associations to the remote port */ - list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) - nvme_delete_ctrl(&ctrl->ctrl); + rport->dev_loss_end = jiffies + (portptr->dev_loss_tmo * HZ); + + list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) { + /* if dev_loss_tmo==0, dev loss is immediate */ + if (!portptr->dev_loss_tmo) { + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: controller connectivity lost. " + "Deleting controller.\n", + ctrl->cnum); + nvme_delete_ctrl(&ctrl->ctrl); + } else + nvme_fc_ctrl_connectivity_loss(ctrl); + } spin_unlock_irqrestore(&rport->lock, flags); nvme_fc_abort_lsops(rport); + /* + * release the reference, which will allow, if all controllers + * go away, which should only occur after dev_loss_tmo occurs, + * for the rport to be torn down. + */ nvme_fc_rport_put(rport); + return 0; } EXPORT_SYMBOL_GPL(nvme_fc_unregister_remoteport); @@ -700,7 +884,6 @@ nvme_fc_set_remoteport_devloss(struct nvme_fc_remote_port *portptr, u32 dev_loss_tmo) { struct nvme_fc_rport *rport = remoteport_to_rport(portptr); - struct nvme_fc_ctrl *ctrl; unsigned long flags; spin_lock_irqsave(&rport->lock, flags); @@ -2676,28 +2859,43 @@ nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl) static void nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status) { + struct nvme_fc_rport *rport = ctrl->rport; + struct nvme_fc_remote_port *portptr = &rport->remoteport; + unsigned long recon_delay = ctrl->ctrl.opts->reconnect_delay * HZ; + bool recon = true; + if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING) return; - dev_info(ctrl->ctrl.device, - "NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n", - ctrl->cnum, status); + if (portptr->port_state == FC_OBJSTATE_ONLINE) + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n", + ctrl->cnum, status); + else if (time_after_eq(jiffies, rport->dev_loss_end)) + recon = false; - if (nvmf_should_reconnect(&ctrl->ctrl)) { - /* Only schedule the reconnect if the remote port is online */ - if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE) - return; + if (recon && nvmf_should_reconnect(&ctrl->ctrl)) { + if (portptr->port_state == FC_OBJSTATE_ONLINE) + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: Reconnect attempt in %ld " + "seconds\n", + ctrl->cnum, recon_delay / HZ); + else if (time_after(jiffies + recon_delay, rport->dev_loss_end)) + recon_delay = rport->dev_loss_end - jiffies; - dev_info(ctrl->ctrl.device, - "NVME-FC{%d}: Reconnect attempt in %d seconds.\n", - ctrl->cnum, ctrl->ctrl.opts->reconnect_delay); - queue_delayed_work(nvme_wq, &ctrl->connect_work, - ctrl->ctrl.opts->reconnect_delay * HZ); + queue_delayed_work(nvme_wq, &ctrl->connect_work, recon_delay); } else { - dev_warn(ctrl->ctrl.device, + if (portptr->port_state == FC_OBJSTATE_ONLINE) + dev_warn(ctrl->ctrl.device, "NVME-FC{%d}: Max reconnect attempts (%d) " "reached. Removing controller\n", ctrl->cnum, ctrl->ctrl.nr_reconnects); + else + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: dev_loss_tmo (%d) expired " + "while waiting for remoteport connectivity. " + "Removing controller\n", ctrl->cnum, + portptr->dev_loss_tmo); WARN_ON(nvme_delete_ctrl(&ctrl->ctrl)); } } @@ -2721,15 +2919,17 @@ nvme_fc_reset_ctrl_work(struct work_struct *work) return; } - if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE) { + if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE) ret = nvme_fc_create_association(ctrl); - if (ret) - nvme_fc_reconnect_or_delete(ctrl, ret); - else - dev_info(ctrl->ctrl.device, - "NVME-FC{%d}: controller reset complete\n", - ctrl->cnum); - } + else + ret = -ENOTCONN; + + if (ret) + nvme_fc_reconnect_or_delete(ctrl, ret); + else + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: controller reset complete\n", + ctrl->cnum); } static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = { -- cgit From a96d4bd867129e6124e3cd1006fcbcfea3341179 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 27 Oct 2017 13:12:49 -0700 Subject: nvmet: fix fatal_err_work deadlock Below is a stack trace for an issue that was reported. What's happening is that the nvmet layer had it's controller kato timeout fire, which causes it to schedule its fatal error handler via the fatal_err_work element. The error handler is invoked, which calls the transport delete_ctrl() entry point, and as the transport tears down the controller, nvmet_sq_destroy ends up doing the final put on the ctlr causing it to enter its free routine. The ctlr free routine does a cancel_work_sync() on fatal_err_work element, which then does a flush_work and wait_for_completion. But, as the wait is in the context of the work element being flushed, its in a catch-22 and the thread hangs. [ 326.903131] nvmet: ctrl 1 keep-alive timer (15 seconds) expired! [ 326.909832] nvmet: ctrl 1 fatal error occurred! [ 327.643100] lpfc 0000:04:00.0: 0:6313 NVMET Defer ctx release xri x114 flg x2 [ 494.582064] INFO: task kworker/0:2:243 blocked for more than 120 seconds. [ 494.589638] Not tainted 4.14.0-rc1.James+ #1 [ 494.594986] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 494.603718] kworker/0:2 D 0 243 2 0x80000000 [ 494.609839] Workqueue: events nvmet_fatal_error_handler [nvmet] [ 494.616447] Call Trace: [ 494.619177] __schedule+0x28d/0x890 [ 494.623070] schedule+0x36/0x80 [ 494.626571] schedule_timeout+0x1dd/0x300 [ 494.631044] ? dequeue_task_fair+0x592/0x840 [ 494.635810] ? pick_next_task_fair+0x23b/0x5c0 [ 494.640756] wait_for_completion+0x121/0x180 [ 494.645521] ? wake_up_q+0x80/0x80 [ 494.649315] flush_work+0x11d/0x1a0 [ 494.653206] ? wake_up_worker+0x30/0x30 [ 494.657484] __cancel_work_timer+0x10b/0x190 [ 494.662249] cancel_work_sync+0x10/0x20 [ 494.666525] nvmet_ctrl_put+0xa3/0x100 [nvmet] [ 494.671482] nvmet_sq_:q+0x64/0xd0 [nvmet] [ 494.676540] nvmet_fc_delete_target_queue+0x202/0x220 [nvmet_fc] [ 494.683245] nvmet_fc_delete_target_assoc+0x6d/0xc0 [nvmet_fc] [ 494.689743] nvmet_fc_delete_ctrl+0x137/0x1a0 [nvmet_fc] [ 494.695673] nvmet_fatal_error_handler+0x30/0x40 [nvmet] [ 494.701589] process_one_work+0x149/0x360 [ 494.706064] worker_thread+0x4d/0x3c0 [ 494.710148] kthread+0x109/0x140 [ 494.713751] ? rescuer_thread+0x380/0x380 [ 494.718214] ? kthread_park+0x60/0x60 Correct by having the fc transport convert to a different workq context for the actual controller teardown which may call the cancel_work_sync. Signed-off-by: James Smart Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fc.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 58e010bdda3e..5f2570f5dfa9 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -150,6 +150,7 @@ struct nvmet_fc_tgt_assoc { struct list_head a_list; struct nvmet_fc_tgt_queue *queues[NVMET_NR_QUEUES + 1]; struct kref ref; + struct work_struct del_work; }; @@ -232,6 +233,7 @@ static void nvmet_fc_tgtport_put(struct nvmet_fc_tgtport *tgtport); static int nvmet_fc_tgtport_get(struct nvmet_fc_tgtport *tgtport); static void nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_fcp_iod *fod); +static void nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc); /* *********************** FC-NVME DMA Handling **************************** */ @@ -802,6 +804,16 @@ nvmet_fc_find_target_queue(struct nvmet_fc_tgtport *tgtport, return NULL; } +static void +nvmet_fc_delete_assoc(struct work_struct *work) +{ + struct nvmet_fc_tgt_assoc *assoc = + container_of(work, struct nvmet_fc_tgt_assoc, del_work); + + nvmet_fc_delete_target_assoc(assoc); + nvmet_fc_tgt_a_put(assoc); +} + static struct nvmet_fc_tgt_assoc * nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport) { @@ -826,6 +838,7 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport) assoc->a_id = idx; INIT_LIST_HEAD(&assoc->a_list); kref_init(&assoc->ref); + INIT_WORK(&assoc->del_work, nvmet_fc_delete_assoc); while (needrandom) { get_random_bytes(&ran, sizeof(ran) - BYTES_FOR_QID); @@ -1118,8 +1131,7 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl) nvmet_fc_tgtport_put(tgtport); if (found_ctrl) { - nvmet_fc_delete_target_assoc(assoc); - nvmet_fc_tgt_a_put(assoc); + schedule_work(&assoc->del_work); return; } -- cgit From 3639efef8fb128f99ae38bf603189639efc3850d Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 20 Oct 2017 16:18:03 -0600 Subject: nvme: Remove unused headers Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 7735571ffc9a..32c413ec818c 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -13,7 +13,6 @@ */ #include -#include #include #include #include @@ -26,12 +25,9 @@ #include #include #include -#include #include -#include #include #include -#include #include #include "nvme.h" -- cgit From a806c6c81e6c0d07c8a8b2643bad4a37a196d681 Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Thu, 2 Nov 2017 18:07:44 +0900 Subject: nvme: comment typo fixed in clearing AER a tiny typo fixed in a comment. Signed-off-by: Minwoo Im Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 07b9f4e1c283..64744355aa88 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2637,7 +2637,7 @@ static void nvme_fw_act_work(struct work_struct *work) return; nvme_start_queues(ctrl); - /* read FW slot informationi to clear the AER*/ + /* read FW slot information to clear the AER */ nvme_get_fw_slot_info(ctrl); } -- cgit