diff options
Diffstat (limited to 'drivers/nvme/host/fc.c')
| -rw-r--r-- | drivers/nvme/host/fc.c | 2710 |
1 files changed, 1864 insertions, 846 deletions
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 5c2a08ef08ba..bc455fa98246 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1,18 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2016 Avago Technologies. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful. - * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES, - * INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A - * PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE DISCLAIMED, EXCEPT TO - * THE EXTENT THAT SUCH DISCLAIMERS ARE HELD TO BE LEGALLY INVALID. - * See the GNU General Public License for more details, a copy of which - * can be found in the file COPYING included with this package - * */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> @@ -20,37 +8,34 @@ #include <uapi/scsi/fc/fc_fs.h> #include <uapi/scsi/fc/fc_els.h> #include <linux/delay.h> - +#include <linux/overflow.h> +#include <linux/blk-cgroup.h> #include "nvme.h" #include "fabrics.h" #include <linux/nvme-fc-driver.h> #include <linux/nvme-fc.h> - +#include "fc.h" +#include <scsi/scsi_transport_fc.h> /* *************************** Data Structures/Defines ****************** */ -/* - * We handle AEN commands ourselves and don't even let the - * block layer know about them. - */ -#define NVME_FC_NR_AEN_COMMANDS 1 -#define NVME_FC_AQ_BLKMQ_DEPTH \ - (NVME_AQ_DEPTH - NVME_FC_NR_AEN_COMMANDS) -#define AEN_CMDID_BASE (NVME_FC_AQ_BLKMQ_DEPTH + 1) - enum nvme_fc_queue_flags { - NVME_FC_Q_CONNECTED = (1 << 0), + NVME_FC_Q_CONNECTED = 0, + NVME_FC_Q_LIVE, }; -#define NVMEFC_QUEUE_DELAY 3 /* ms units */ +#define NVME_FC_DEFAULT_DEV_LOSS_TMO 60 /* seconds */ +#define NVME_FC_DEFAULT_RECONNECT_TMO 2 /* delay between reconnects + * when connected and a + * connection failure. + */ struct nvme_fc_queue { struct nvme_fc_ctrl *ctrl; struct device *dev; struct blk_mq_hw_ctx *hctx; void *lldd_handle; - int queue_size; size_t cmnd_capsule_len; u32 qnum; u32 rqcnt; @@ -64,9 +49,7 @@ struct nvme_fc_queue { enum nvme_fcop_flags { FCOP_FLAGS_TERMIO = (1 << 0), - FCOP_FLAGS_RELEASED = (1 << 1), - FCOP_FLAGS_COMPLETE = (1 << 2), - FCOP_FLAGS_AEN = (1 << 3), + FCOP_FLAGS_AEN = (1 << 1), }; struct nvmefc_ls_req_op { @@ -83,6 +66,17 @@ struct nvmefc_ls_req_op { bool req_queued; }; +struct nvmefc_ls_rcv_op { + struct nvme_fc_rport *rport; + struct nvmefc_ls_rsp *lsrsp; + union nvmefc_ls_requests *rqstbuf; + union nvmefc_ls_responses *rspbuf; + u16 rqstdatalen; + bool handled; + dma_addr_t rspdma; + struct list_head lsrcv_list; /* rport->ls_rcv_list */ +} __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ + enum nvme_fcpop_state { FCPOP_STATE_UNINIT = 0, FCPOP_STATE_IDLE = 1, @@ -115,6 +109,12 @@ struct nvme_fc_fcp_op { struct nvme_fc_ersp_iu rsp_iu; }; +struct nvme_fcp_op_w_sgl { + struct nvme_fc_fcp_op op; + struct scatterlist sgl[NVME_INLINE_SG_CNT]; + uint8_t priv[]; +}; + struct nvme_fc_lport { struct nvme_fc_local_port localport; @@ -124,6 +124,7 @@ struct nvme_fc_lport { struct device *dev; /* physical device for dma */ struct nvme_fc_port_template *ops; struct kref ref; + atomic_t act_rport_cnt; } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ struct nvme_fc_rport { @@ -132,15 +133,21 @@ struct nvme_fc_rport { struct list_head endp_list; /* for lport->endp_list */ struct list_head ctrl_list; struct list_head ls_req_list; + struct list_head ls_rcv_list; + struct list_head disc_list; struct device *dev; /* physical device for dma */ struct nvme_fc_lport *lport; spinlock_t lock; struct kref ref; + atomic_t act_ctrl_cnt; + unsigned long dev_loss_end; + struct work_struct lsrcv_work; } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ -enum nvme_fcctrl_flags { - FCCTRL_TERMIO = (1 << 0), -}; +/* fc_ctrl flags values - specified as bit positions */ +#define ASSOC_ACTIVE 0 +#define ASSOC_FAILED 1 +#define FCCTRL_TERMIO 2 struct nvme_fc_ctrl { spinlock_t lock; @@ -150,22 +157,24 @@ struct nvme_fc_ctrl { struct nvme_fc_rport *rport; u32 cnum; + bool ioq_live; u64 association_id; + struct nvmefc_ls_rcv_op *rcv_disconn; struct list_head ctrl_list; /* rport->ctrl_list */ struct blk_mq_tag_set admin_tag_set; struct blk_mq_tag_set tag_set; - struct work_struct delete_work; + struct work_struct ioerr_work; struct delayed_work connect_work; struct kref ref; - u32 flags; + unsigned long flags; u32 iocnt; wait_queue_head_t ioabort_wait; - struct nvme_fc_fcp_op aen_ops[NVME_FC_NR_AEN_COMMANDS]; + struct nvme_fc_fcp_op aen_ops[NVME_NR_AEN_COMMANDS]; struct nvme_ctrl ctrl; }; @@ -211,15 +220,111 @@ static LIST_HEAD(nvme_fc_lport_list); static DEFINE_IDA(nvme_fc_local_port_cnt); static DEFINE_IDA(nvme_fc_ctrl_cnt); +/* + * These items are short-term. They will eventually be moved into + * a generic FC class. See comments in module init. + */ +static struct device *fc_udev_device; - +static void nvme_fc_complete_rq(struct request *rq); /* *********************** FC-NVME Port Management ************************ */ -static int __nvme_fc_del_ctrl(struct nvme_fc_ctrl *); static void __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *, struct nvme_fc_queue *, unsigned int); +static void nvme_fc_handle_ls_rqst_work(struct work_struct *work); + + +static void +nvme_fc_free_lport(struct kref *ref) +{ + struct nvme_fc_lport *lport = + container_of(ref, struct nvme_fc_lport, ref); + unsigned long flags; + + WARN_ON(lport->localport.port_state != FC_OBJSTATE_DELETED); + WARN_ON(!list_empty(&lport->endp_list)); + + /* remove from transport list */ + spin_lock_irqsave(&nvme_fc_lock, flags); + list_del(&lport->port_list); + spin_unlock_irqrestore(&nvme_fc_lock, flags); + + ida_free(&nvme_fc_local_port_cnt, lport->localport.port_num); + ida_destroy(&lport->endp_cnt); + + put_device(lport->dev); + + kfree(lport); +} + +static void +nvme_fc_lport_put(struct nvme_fc_lport *lport) +{ + kref_put(&lport->ref, nvme_fc_free_lport); +} + +static int +nvme_fc_lport_get(struct nvme_fc_lport *lport) +{ + return kref_get_unless_zero(&lport->ref); +} + + +static struct nvme_fc_lport * +nvme_fc_attach_to_unreg_lport(struct nvme_fc_port_info *pinfo, + struct nvme_fc_port_template *ops, + struct device *dev) +{ + struct nvme_fc_lport *lport; + unsigned long flags; + + spin_lock_irqsave(&nvme_fc_lock, flags); + + list_for_each_entry(lport, &nvme_fc_lport_list, port_list) { + if (lport->localport.node_name != pinfo->node_name || + lport->localport.port_name != pinfo->port_name) + continue; + + if (lport->dev != dev) { + lport = ERR_PTR(-EXDEV); + goto out_done; + } + + if (lport->localport.port_state != FC_OBJSTATE_DELETED) { + lport = ERR_PTR(-EEXIST); + goto out_done; + } + + if (!nvme_fc_lport_get(lport)) { + /* + * fails if ref cnt already 0. If so, + * act as if lport already deleted + */ + lport = NULL; + goto out_done; + } + + /* resume the lport */ + + lport->ops = ops; + lport->localport.port_role = pinfo->port_role; + lport->localport.port_id = pinfo->port_id; + lport->localport.port_state = FC_OBJSTATE_ONLINE; + + spin_unlock_irqrestore(&nvme_fc_lock, flags); + + return lport; + } + + lport = NULL; + +out_done: + spin_unlock_irqrestore(&nvme_fc_lock, flags); + + return lport; +} /** * nvme_fc_register_localport - transport entry point called by an @@ -229,7 +334,7 @@ static void __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *, * @template: LLDD entrypoints and operational parameters for the port * @dev: physical hardware device node port corresponds to. Will be * used for DMA mappings - * @lport_p: pointer to a local port pointer. Upon success, the routine + * @portptr: pointer to a local port pointer. Upon success, the routine * will allocate a nvme_fc_local_port structure and place its * address in the local port pointer. Upon failure, local port * pointer will be set to 0. @@ -257,6 +362,28 @@ nvme_fc_register_localport(struct nvme_fc_port_info *pinfo, goto out_reghost_failed; } + /* + * look to see if there is already a localport that had been + * deregistered and in the process of waiting for all the + * references to fully be removed. If the references haven't + * expired, we can simply re-enable the localport. Remoteports + * and controller reconnections should resume naturally. + */ + newrec = nvme_fc_attach_to_unreg_lport(pinfo, template, dev); + + /* found an lport, but something about its state is bad */ + if (IS_ERR(newrec)) { + ret = PTR_ERR(newrec); + goto out_reghost_failed; + + /* found existing lport, which was resumed */ + } else if (newrec) { + *portptr = &newrec->localport; + return 0; + } + + /* nothing found - allocate a new localport struct */ + newrec = kmalloc((sizeof(*newrec) + template->local_priv_sz), GFP_KERNEL); if (!newrec) { @@ -264,7 +391,7 @@ nvme_fc_register_localport(struct nvme_fc_port_info *pinfo, goto out_reghost_failed; } - idx = ida_simple_get(&nvme_fc_local_port_cnt, 0, 0, GFP_KERNEL); + idx = ida_alloc(&nvme_fc_local_port_cnt, GFP_KERNEL); if (idx < 0) { ret = -ENOSPC; goto out_fail_kfree; @@ -278,10 +405,14 @@ nvme_fc_register_localport(struct nvme_fc_port_info *pinfo, INIT_LIST_HEAD(&newrec->port_list); INIT_LIST_HEAD(&newrec->endp_list); kref_init(&newrec->ref); + atomic_set(&newrec->act_rport_cnt, 0); newrec->ops = template; newrec->dev = dev; ida_init(&newrec->endp_cnt); - newrec->localport.private = &newrec[1]; + if (template->local_priv_sz) + newrec->localport.private = &newrec[1]; + else + newrec->localport.private = NULL; newrec->localport.node_name = pinfo->node_name; newrec->localport.port_name = pinfo->port_name; newrec->localport.port_role = pinfo->port_role; @@ -300,7 +431,7 @@ nvme_fc_register_localport(struct nvme_fc_port_info *pinfo, return 0; out_ida_put: - ida_simple_remove(&nvme_fc_local_port_cnt, idx); + ida_free(&nvme_fc_local_port_cnt, idx); out_fail_kfree: kfree(newrec); out_reghost_failed: @@ -310,50 +441,11 @@ out_reghost_failed: } EXPORT_SYMBOL_GPL(nvme_fc_register_localport); -static void -nvme_fc_free_lport(struct kref *ref) -{ - struct nvme_fc_lport *lport = - container_of(ref, struct nvme_fc_lport, ref); - unsigned long flags; - - WARN_ON(lport->localport.port_state != FC_OBJSTATE_DELETED); - WARN_ON(!list_empty(&lport->endp_list)); - - /* remove from transport list */ - spin_lock_irqsave(&nvme_fc_lock, flags); - list_del(&lport->port_list); - spin_unlock_irqrestore(&nvme_fc_lock, flags); - - /* let the LLDD know we've finished tearing it down */ - lport->ops->localport_delete(&lport->localport); - - ida_simple_remove(&nvme_fc_local_port_cnt, lport->localport.port_num); - ida_destroy(&lport->endp_cnt); - - put_device(lport->dev); - - kfree(lport); -} - -static void -nvme_fc_lport_put(struct nvme_fc_lport *lport) -{ - kref_put(&lport->ref, nvme_fc_free_lport); -} - -static int -nvme_fc_lport_get(struct nvme_fc_lport *lport) -{ - return kref_get_unless_zero(&lport->ref); -} - /** * nvme_fc_unregister_localport - transport entry point called by an * LLDD to deregister/remove a previously * registered a NVME host FC port. - * @localport: pointer to the (registered) local port that is to be - * deregistered. + * @portptr: pointer to the (registered) local port that is to be deregistered. * * Returns: * a completion status. Must be 0 upon success; a negative errno @@ -378,12 +470,182 @@ nvme_fc_unregister_localport(struct nvme_fc_local_port *portptr) spin_unlock_irqrestore(&nvme_fc_lock, flags); + if (atomic_read(&lport->act_rport_cnt) == 0) + lport->ops->localport_delete(&lport->localport); + nvme_fc_lport_put(lport); return 0; } EXPORT_SYMBOL_GPL(nvme_fc_unregister_localport); +/* + * TRADDR strings, per FC-NVME are fixed format: + * "nn-0x<16hexdigits>:pn-0x<16hexdigits>" - 43 characters + * udev event will only differ by prefix of what field is + * being specified: + * "NVMEFC_HOST_TRADDR=" or "NVMEFC_TRADDR=" - 19 max characters + * 19 + 43 + null_fudge = 64 characters + */ +#define FCNVME_TRADDR_LENGTH 64 + +static void +nvme_fc_signal_discovery_scan(struct nvme_fc_lport *lport, + struct nvme_fc_rport *rport) +{ + char hostaddr[FCNVME_TRADDR_LENGTH]; /* NVMEFC_HOST_TRADDR=...*/ + char tgtaddr[FCNVME_TRADDR_LENGTH]; /* NVMEFC_TRADDR=...*/ + char *envp[4] = { "FC_EVENT=nvmediscovery", hostaddr, tgtaddr, NULL }; + + if (!(rport->remoteport.port_role & FC_PORT_ROLE_NVME_DISCOVERY)) + return; + + snprintf(hostaddr, sizeof(hostaddr), + "NVMEFC_HOST_TRADDR=nn-0x%016llx:pn-0x%016llx", + lport->localport.node_name, lport->localport.port_name); + snprintf(tgtaddr, sizeof(tgtaddr), + "NVMEFC_TRADDR=nn-0x%016llx:pn-0x%016llx", + rport->remoteport.node_name, rport->remoteport.port_name); + kobject_uevent_env(&fc_udev_device->kobj, KOBJ_CHANGE, envp); +} + +static void +nvme_fc_free_rport(struct kref *ref) +{ + struct nvme_fc_rport *rport = + container_of(ref, struct nvme_fc_rport, ref); + struct nvme_fc_lport *lport = + localport_to_lport(rport->remoteport.localport); + unsigned long flags; + + WARN_ON(rport->remoteport.port_state != FC_OBJSTATE_DELETED); + WARN_ON(!list_empty(&rport->ctrl_list)); + WARN_ON(!list_empty(&rport->ls_req_list)); + WARN_ON(!list_empty(&rport->ls_rcv_list)); + + /* remove from lport list */ + spin_lock_irqsave(&nvme_fc_lock, flags); + list_del(&rport->endp_list); + spin_unlock_irqrestore(&nvme_fc_lock, flags); + + WARN_ON(!list_empty(&rport->disc_list)); + ida_free(&lport->endp_cnt, rport->remoteport.port_num); + + kfree(rport); + + nvme_fc_lport_put(lport); +} + +static void +nvme_fc_rport_put(struct nvme_fc_rport *rport) +{ + kref_put(&rport->ref, nvme_fc_free_rport); +} + +static int +nvme_fc_rport_get(struct nvme_fc_rport *rport) +{ + return kref_get_unless_zero(&rport->ref); +} + +static void +nvme_fc_resume_controller(struct nvme_fc_ctrl *ctrl) +{ + switch (nvme_ctrl_state(&ctrl->ctrl)) { + case NVME_CTRL_NEW: + case NVME_CTRL_CONNECTING: + /* + * As all reconnects were suppressed, schedule a + * connect. + */ + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: connectivity re-established. " + "Attempting reconnect\n", ctrl->cnum); + + queue_delayed_work(nvme_wq, &ctrl->connect_work, 0); + break; + + case NVME_CTRL_RESETTING: + /* + * Controller is already in the process of terminating the + * association. No need to do anything further. The reconnect + * step will naturally occur after the reset completes. + */ + break; + + default: + /* no action to take - let it delete */ + break; + } +} + +static struct nvme_fc_rport * +nvme_fc_attach_to_suspended_rport(struct nvme_fc_lport *lport, + struct nvme_fc_port_info *pinfo) +{ + struct nvme_fc_rport *rport; + struct nvme_fc_ctrl *ctrl; + unsigned long flags; + + spin_lock_irqsave(&nvme_fc_lock, flags); + + list_for_each_entry(rport, &lport->endp_list, endp_list) { + if (rport->remoteport.node_name != pinfo->node_name || + rport->remoteport.port_name != pinfo->port_name) + continue; + + if (!nvme_fc_rport_get(rport)) { + rport = ERR_PTR(-ENOLCK); + goto out_done; + } + + spin_unlock_irqrestore(&nvme_fc_lock, flags); + + spin_lock_irqsave(&rport->lock, flags); + + /* has it been unregistered */ + if (rport->remoteport.port_state != FC_OBJSTATE_DELETED) { + /* means lldd called us twice */ + spin_unlock_irqrestore(&rport->lock, flags); + nvme_fc_rport_put(rport); + return ERR_PTR(-ESTALE); + } + + rport->remoteport.port_role = pinfo->port_role; + rport->remoteport.port_id = pinfo->port_id; + rport->remoteport.port_state = FC_OBJSTATE_ONLINE; + rport->dev_loss_end = 0; + + /* + * kick off a reconnect attempt on all associations to the + * remote port. A successful reconnects will resume i/o. + */ + list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) + nvme_fc_resume_controller(ctrl); + + spin_unlock_irqrestore(&rport->lock, flags); + + return rport; + } + + rport = NULL; + +out_done: + spin_unlock_irqrestore(&nvme_fc_lock, flags); + + return rport; +} + +static inline void +__nvme_fc_set_dev_loss_tmo(struct nvme_fc_rport *rport, + struct nvme_fc_port_info *pinfo) +{ + if (pinfo->dev_loss_tmo) + rport->remoteport.dev_loss_tmo = pinfo->dev_loss_tmo; + else + rport->remoteport.dev_loss_tmo = NVME_FC_DEFAULT_DEV_LOSS_TMO; +} + /** * nvme_fc_register_remoteport - transport entry point called by an * LLDD to register the existence of a NVME @@ -391,7 +653,7 @@ EXPORT_SYMBOL_GPL(nvme_fc_unregister_localport); * @localport: pointer to the (registered) local port that the remote * subsystem port is connected to. * @pinfo: pointer to information about the port to be registered - * @rport_p: pointer to a remote port pointer. Upon success, the routine + * @portptr: pointer to a remote port pointer. Upon success, the routine * will allocate a nvme_fc_remote_port structure and place its * address in the remote port pointer. Upon failure, remote port * pointer will be set to 0. @@ -410,96 +672,90 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport, unsigned long flags; int ret, idx; + if (!nvme_fc_lport_get(lport)) { + ret = -ESHUTDOWN; + goto out_reghost_failed; + } + + /* + * look to see if there is already a remoteport that is waiting + * for a reconnect (within dev_loss_tmo) with the same WWN's. + * If so, transition to it and reconnect. + */ + newrec = nvme_fc_attach_to_suspended_rport(lport, pinfo); + + /* found an rport, but something about its state is bad */ + if (IS_ERR(newrec)) { + ret = PTR_ERR(newrec); + goto out_lport_put; + + /* found existing rport, which was resumed */ + } else if (newrec) { + nvme_fc_lport_put(lport); + __nvme_fc_set_dev_loss_tmo(newrec, pinfo); + nvme_fc_signal_discovery_scan(lport, newrec); + *portptr = &newrec->remoteport; + return 0; + } + + /* nothing found - allocate a new remoteport struct */ + newrec = kmalloc((sizeof(*newrec) + lport->ops->remote_priv_sz), GFP_KERNEL); if (!newrec) { ret = -ENOMEM; - goto out_reghost_failed; - } - - if (!nvme_fc_lport_get(lport)) { - ret = -ESHUTDOWN; - goto out_kfree_rport; + goto out_lport_put; } - idx = ida_simple_get(&lport->endp_cnt, 0, 0, GFP_KERNEL); + idx = ida_alloc(&lport->endp_cnt, GFP_KERNEL); if (idx < 0) { ret = -ENOSPC; - goto out_lport_put; + goto out_kfree_rport; } INIT_LIST_HEAD(&newrec->endp_list); INIT_LIST_HEAD(&newrec->ctrl_list); INIT_LIST_HEAD(&newrec->ls_req_list); + INIT_LIST_HEAD(&newrec->disc_list); kref_init(&newrec->ref); + atomic_set(&newrec->act_ctrl_cnt, 0); spin_lock_init(&newrec->lock); newrec->remoteport.localport = &lport->localport; + INIT_LIST_HEAD(&newrec->ls_rcv_list); newrec->dev = lport->dev; newrec->lport = lport; - newrec->remoteport.private = &newrec[1]; + if (lport->ops->remote_priv_sz) + newrec->remoteport.private = &newrec[1]; + else + newrec->remoteport.private = NULL; newrec->remoteport.port_role = pinfo->port_role; newrec->remoteport.node_name = pinfo->node_name; newrec->remoteport.port_name = pinfo->port_name; newrec->remoteport.port_id = pinfo->port_id; newrec->remoteport.port_state = FC_OBJSTATE_ONLINE; newrec->remoteport.port_num = idx; + __nvme_fc_set_dev_loss_tmo(newrec, pinfo); + INIT_WORK(&newrec->lsrcv_work, nvme_fc_handle_ls_rqst_work); spin_lock_irqsave(&nvme_fc_lock, flags); list_add_tail(&newrec->endp_list, &lport->endp_list); spin_unlock_irqrestore(&nvme_fc_lock, flags); + nvme_fc_signal_discovery_scan(lport, newrec); + *portptr = &newrec->remoteport; return 0; -out_lport_put: - nvme_fc_lport_put(lport); out_kfree_rport: kfree(newrec); +out_lport_put: + nvme_fc_lport_put(lport); out_reghost_failed: *portptr = NULL; return ret; } EXPORT_SYMBOL_GPL(nvme_fc_register_remoteport); -static void -nvme_fc_free_rport(struct kref *ref) -{ - struct nvme_fc_rport *rport = - container_of(ref, struct nvme_fc_rport, ref); - struct nvme_fc_lport *lport = - localport_to_lport(rport->remoteport.localport); - unsigned long flags; - - WARN_ON(rport->remoteport.port_state != FC_OBJSTATE_DELETED); - WARN_ON(!list_empty(&rport->ctrl_list)); - - /* remove from lport list */ - spin_lock_irqsave(&nvme_fc_lock, flags); - list_del(&rport->endp_list); - spin_unlock_irqrestore(&nvme_fc_lock, flags); - - /* let the LLDD know we've finished tearing it down */ - lport->ops->remoteport_delete(&rport->remoteport); - - ida_simple_remove(&lport->endp_cnt, rport->remoteport.port_num); - - kfree(rport); - - nvme_fc_lport_put(lport); -} - -static void -nvme_fc_rport_put(struct nvme_fc_rport *rport) -{ - kref_put(&rport->ref, nvme_fc_free_rport); -} - -static int -nvme_fc_rport_get(struct nvme_fc_rport *rport) -{ - return kref_get_unless_zero(&rport->ref); -} - static int nvme_fc_abort_lsops(struct nvme_fc_rport *rport) { @@ -524,12 +780,23 @@ restart: return 0; } +static void +nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl) +{ + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: controller connectivity lost. Awaiting " + "Reconnect", ctrl->cnum); + + set_bit(ASSOC_FAILED, &ctrl->flags); + nvme_reset_ctrl(&ctrl->ctrl); +} + /** * nvme_fc_unregister_remoteport - transport entry point called by an * LLDD to deregister/remove a previously * registered a NVME subsystem FC port. - * @remoteport: pointer to the (registered) remote port that is to be - * deregistered. + * @portptr: pointer to the (registered) remote port that is to be + * deregistered. * * Returns: * a completion status. Must be 0 upon success; a negative errno @@ -553,19 +820,77 @@ nvme_fc_unregister_remoteport(struct nvme_fc_remote_port *portptr) } portptr->port_state = FC_OBJSTATE_DELETED; - /* tear down all associations to the remote port */ - list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) - __nvme_fc_del_ctrl(ctrl); + rport->dev_loss_end = jiffies + (portptr->dev_loss_tmo * HZ); + + list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) { + /* if dev_loss_tmo==0, dev loss is immediate */ + if (!portptr->dev_loss_tmo) { + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: controller connectivity lost.\n", + ctrl->cnum); + nvme_delete_ctrl(&ctrl->ctrl); + } else + nvme_fc_ctrl_connectivity_loss(ctrl); + } spin_unlock_irqrestore(&rport->lock, flags); nvme_fc_abort_lsops(rport); + if (atomic_read(&rport->act_ctrl_cnt) == 0) + rport->lport->ops->remoteport_delete(portptr); + + /* + * release the reference, which will allow, if all controllers + * go away, which should only occur after dev_loss_tmo occurs, + * for the rport to be torn down. + */ nvme_fc_rport_put(rport); + return 0; } EXPORT_SYMBOL_GPL(nvme_fc_unregister_remoteport); +/** + * nvme_fc_rescan_remoteport - transport entry point called by an + * LLDD to request a nvme device rescan. + * @remoteport: pointer to the (registered) remote port that is to be + * rescanned. + * + * Returns: N/A + */ +void +nvme_fc_rescan_remoteport(struct nvme_fc_remote_port *remoteport) +{ + struct nvme_fc_rport *rport = remoteport_to_rport(remoteport); + + nvme_fc_signal_discovery_scan(rport->lport, rport); +} +EXPORT_SYMBOL_GPL(nvme_fc_rescan_remoteport); + +int +nvme_fc_set_remoteport_devloss(struct nvme_fc_remote_port *portptr, + u32 dev_loss_tmo) +{ + struct nvme_fc_rport *rport = remoteport_to_rport(portptr); + unsigned long flags; + + spin_lock_irqsave(&rport->lock, flags); + + if (portptr->port_state != FC_OBJSTATE_ONLINE) { + spin_unlock_irqrestore(&rport->lock, flags); + return -EINVAL; + } + + /* a dev_loss_tmo of 0 (immediate) is allowed to be set */ + rport->remoteport.dev_loss_tmo = dev_loss_tmo; + + spin_unlock_irqrestore(&rport->lock, flags); + + return 0; +} +EXPORT_SYMBOL_GPL(nvme_fc_set_remoteport_devloss); + /* *********************** FC-NVME DMA Handling **************************** */ @@ -576,7 +901,7 @@ EXPORT_SYMBOL_GPL(nvme_fc_unregister_remoteport); * may crash. * * As such: - * Wrapper all the dma routines and check the dev pointer. + * Wrap all the dma routines and check the dev pointer. * * If simple mappings (return just a dma address, we'll noop them, * returning a dma address of 0. @@ -655,12 +980,12 @@ fc_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, dma_unmap_sg(dev, sg, nents, dir); } - /* *********************** FC-NVME LS Handling **************************** */ static void nvme_fc_ctrl_put(struct nvme_fc_ctrl *); static int nvme_fc_ctrl_get(struct nvme_fc_ctrl *); +static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg); static void __nvme_fc_finish_ls_req(struct nvmefc_ls_req_op *lsop) @@ -801,41 +1126,6 @@ nvme_fc_send_ls_req_async(struct nvme_fc_rport *rport, return __nvme_fc_send_ls_req(rport, lsop, done); } -/* Validation Error indexes into the string table below */ -enum { - VERR_NO_ERROR = 0, - VERR_LSACC = 1, - VERR_LSDESC_RQST = 2, - VERR_LSDESC_RQST_LEN = 3, - VERR_ASSOC_ID = 4, - VERR_ASSOC_ID_LEN = 5, - VERR_CONN_ID = 6, - VERR_CONN_ID_LEN = 7, - VERR_CR_ASSOC = 8, - VERR_CR_ASSOC_ACC_LEN = 9, - VERR_CR_CONN = 10, - VERR_CR_CONN_ACC_LEN = 11, - VERR_DISCONN = 12, - VERR_DISCONN_ACC_LEN = 13, -}; - -static char *validation_errors[] = { - "OK", - "Not LS_ACC", - "Not LSDESC_RQST", - "Bad LSDESC_RQST Length", - "Not Association ID", - "Bad Association ID Length", - "Not Connection ID", - "Bad Connection ID Length", - "Not CR_ASSOC Rqst", - "Bad CR_ASSOC ACC Length", - "Not CR_CONN Rqst", - "Bad CR_CONN ACC Length", - "Not Disconnect Rqst", - "Bad Disconnect ACC Length", -}; - static int nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, u16 qsize, u16 ersp_ratio) @@ -844,21 +1134,27 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl, struct nvmefc_ls_req *lsreq; struct fcnvme_ls_cr_assoc_rqst *assoc_rqst; struct fcnvme_ls_cr_assoc_acc *assoc_acc; + unsigned long flags; int ret, fcret = 0; lsop = kzalloc((sizeof(*lsop) + - ctrl->lport->ops->lsrqst_priv_sz + - sizeof(*assoc_rqst) + sizeof(*assoc_acc)), GFP_KERNEL); + sizeof(*assoc_rqst) + sizeof(*assoc_acc) + + ctrl->lport->ops->lsrqst_priv_sz), GFP_KERNEL); if (!lsop) { + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: send Create Association failed: ENOMEM\n", + ctrl->cnum); ret = -ENOMEM; goto out_no_memory; } - lsreq = &lsop->ls_req; - lsreq->private = (void *)&lsop[1]; - assoc_rqst = (struct fcnvme_ls_cr_assoc_rqst *) - (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz); + assoc_rqst = (struct fcnvme_ls_cr_assoc_rqst *)&lsop[1]; assoc_acc = (struct fcnvme_ls_cr_assoc_acc *)&assoc_rqst[1]; + lsreq = &lsop->ls_req; + if (ctrl->lport->ops->lsrqst_priv_sz) + lsreq->private = &assoc_acc[1]; + else + lsreq->private = NULL; assoc_rqst->w0.ls_cmd = FCNVME_LS_CREATE_ASSOCIATION; assoc_rqst->desc_list_len = @@ -871,21 +1167,21 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl, sizeof(struct fcnvme_lsdesc_cr_assoc_cmd)); assoc_rqst->assoc_cmd.ersp_ratio = cpu_to_be16(ersp_ratio); - assoc_rqst->assoc_cmd.sqsize = cpu_to_be16(qsize); + assoc_rqst->assoc_cmd.sqsize = cpu_to_be16(qsize - 1); /* Linux supports only Dynamic controllers */ assoc_rqst->assoc_cmd.cntlid = cpu_to_be16(0xffff); uuid_copy(&assoc_rqst->assoc_cmd.hostid, &ctrl->ctrl.opts->host->id); - strncpy(assoc_rqst->assoc_cmd.hostnqn, ctrl->ctrl.opts->host->nqn, - min(FCNVME_ASSOC_HOSTNQN_LEN, NVMF_NQN_SIZE)); - strncpy(assoc_rqst->assoc_cmd.subnqn, ctrl->ctrl.opts->subsysnqn, - min(FCNVME_ASSOC_SUBNQN_LEN, NVMF_NQN_SIZE)); + strscpy(assoc_rqst->assoc_cmd.hostnqn, ctrl->ctrl.opts->host->nqn, + sizeof(assoc_rqst->assoc_cmd.hostnqn)); + strscpy(assoc_rqst->assoc_cmd.subnqn, ctrl->ctrl.opts->subsysnqn, + sizeof(assoc_rqst->assoc_cmd.subnqn)); lsop->queue = queue; lsreq->rqstaddr = assoc_rqst; lsreq->rqstlen = sizeof(*assoc_rqst); lsreq->rspaddr = assoc_acc; lsreq->rsplen = sizeof(*assoc_acc); - lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; + lsreq->timeout = NVME_FC_LS_TIMEOUT_SEC; ret = nvme_fc_send_ls_req(ctrl->rport, lsop); if (ret) @@ -925,14 +1221,16 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl, if (fcret) { ret = -EBADF; dev_err(ctrl->dev, - "q %d connect failed: %s\n", + "q %d Create Association LS failed: %s\n", queue->qnum, validation_errors[fcret]); } else { + spin_lock_irqsave(&ctrl->lock, flags); ctrl->association_id = be64_to_cpu(assoc_acc->associd.association_id); queue->connection_id = be64_to_cpu(assoc_acc->connectid.connection_id); set_bit(NVME_FC_Q_CONNECTED, &queue->flags); + spin_unlock_irqrestore(&ctrl->lock, flags); } out_free_buffer: @@ -956,18 +1254,23 @@ nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, int ret, fcret = 0; lsop = kzalloc((sizeof(*lsop) + - ctrl->lport->ops->lsrqst_priv_sz + - sizeof(*conn_rqst) + sizeof(*conn_acc)), GFP_KERNEL); + sizeof(*conn_rqst) + sizeof(*conn_acc) + + ctrl->lport->ops->lsrqst_priv_sz), GFP_KERNEL); if (!lsop) { + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: send Create Connection failed: ENOMEM\n", + ctrl->cnum); ret = -ENOMEM; goto out_no_memory; } - lsreq = &lsop->ls_req; - lsreq->private = (void *)&lsop[1]; - conn_rqst = (struct fcnvme_ls_cr_conn_rqst *) - (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz); + conn_rqst = (struct fcnvme_ls_cr_conn_rqst *)&lsop[1]; conn_acc = (struct fcnvme_ls_cr_conn_acc *)&conn_rqst[1]; + lsreq = &lsop->ls_req; + if (ctrl->lport->ops->lsrqst_priv_sz) + lsreq->private = (void *)&conn_acc[1]; + else + lsreq->private = NULL; conn_rqst->w0.ls_cmd = FCNVME_LS_CREATE_CONNECTION; conn_rqst->desc_list_len = cpu_to_be32( @@ -986,14 +1289,14 @@ nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, sizeof(struct fcnvme_lsdesc_cr_conn_cmd)); conn_rqst->connect_cmd.ersp_ratio = cpu_to_be16(ersp_ratio); conn_rqst->connect_cmd.qid = cpu_to_be16(queue->qnum); - conn_rqst->connect_cmd.sqsize = cpu_to_be16(qsize); + conn_rqst->connect_cmd.sqsize = cpu_to_be16(qsize - 1); lsop->queue = queue; lsreq->rqstaddr = conn_rqst; lsreq->rqstlen = sizeof(*conn_rqst); lsreq->rspaddr = conn_acc; lsreq->rsplen = sizeof(*conn_acc); - lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; + lsreq->timeout = NVME_FC_LS_TIMEOUT_SEC; ret = nvme_fc_send_ls_req(ctrl->rport, lsop); if (ret) @@ -1024,7 +1327,7 @@ nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, if (fcret) { ret = -EBADF; dev_err(ctrl->dev, - "q %d connect failed: %s\n", + "q %d Create I/O Connection LS failed: %s\n", queue->qnum, validation_errors[fcret]); } else { queue->connection_id = @@ -1037,7 +1340,7 @@ out_free_buffer: out_no_memory: if (ret) dev_err(ctrl->dev, - "queue %d connect command failed (%d).\n", + "queue %d connect I/O queue failed (%d).\n", queue->qnum, ret); return ret; } @@ -1049,7 +1352,7 @@ nvme_fc_disconnect_assoc_done(struct nvmefc_ls_req *lsreq, int status) __nvme_fc_finish_ls_req(lsop); - /* fc-nvme iniator doesn't care about success or failure of cmd */ + /* fc-nvme initiator doesn't care about success or failure of cmd */ kfree(lsop); } @@ -1062,7 +1365,7 @@ nvme_fc_disconnect_assoc_done(struct nvmefc_ls_req *lsreq, int status) * down, and the related FC-NVME Association ID and Connection IDs * become invalid. * - * The behavior of the fc-nvme initiator is such that it's + * The behavior of the fc-nvme initiator is such that its * understanding of the association and connections will implicitly * be torn down. The action is implicit as it may be due to a loss of * connectivity with the fc-nvme target, so you may never get a @@ -1074,82 +1377,414 @@ nvme_fc_disconnect_assoc_done(struct nvmefc_ls_req *lsreq, int status) static void nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl) { - struct fcnvme_ls_disconnect_rqst *discon_rqst; - struct fcnvme_ls_disconnect_acc *discon_acc; + struct fcnvme_ls_disconnect_assoc_rqst *discon_rqst; + struct fcnvme_ls_disconnect_assoc_acc *discon_acc; struct nvmefc_ls_req_op *lsop; struct nvmefc_ls_req *lsreq; int ret; lsop = kzalloc((sizeof(*lsop) + - ctrl->lport->ops->lsrqst_priv_sz + - sizeof(*discon_rqst) + sizeof(*discon_acc)), - GFP_KERNEL); - if (!lsop) - /* couldn't sent it... too bad */ + sizeof(*discon_rqst) + sizeof(*discon_acc) + + ctrl->lport->ops->lsrqst_priv_sz), GFP_KERNEL); + if (!lsop) { + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: send Disconnect Association " + "failed: ENOMEM\n", + ctrl->cnum); return; + } + discon_rqst = (struct fcnvme_ls_disconnect_assoc_rqst *)&lsop[1]; + discon_acc = (struct fcnvme_ls_disconnect_assoc_acc *)&discon_rqst[1]; lsreq = &lsop->ls_req; + if (ctrl->lport->ops->lsrqst_priv_sz) + lsreq->private = (void *)&discon_acc[1]; + else + lsreq->private = NULL; - lsreq->private = (void *)&lsop[1]; - discon_rqst = (struct fcnvme_ls_disconnect_rqst *) - (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz); - discon_acc = (struct fcnvme_ls_disconnect_acc *)&discon_rqst[1]; + nvmefc_fmt_lsreq_discon_assoc(lsreq, discon_rqst, discon_acc, + ctrl->association_id); - discon_rqst->w0.ls_cmd = FCNVME_LS_DISCONNECT; - discon_rqst->desc_list_len = cpu_to_be32( - sizeof(struct fcnvme_lsdesc_assoc_id) + - sizeof(struct fcnvme_lsdesc_disconn_cmd)); + ret = nvme_fc_send_ls_req_async(ctrl->rport, lsop, + nvme_fc_disconnect_assoc_done); + if (ret) + kfree(lsop); +} - discon_rqst->associd.desc_tag = cpu_to_be32(FCNVME_LSDESC_ASSOC_ID); - discon_rqst->associd.desc_len = - fcnvme_lsdesc_len( - sizeof(struct fcnvme_lsdesc_assoc_id)); +static void +nvme_fc_xmt_ls_rsp_free(struct nvmefc_ls_rcv_op *lsop) +{ + struct nvme_fc_rport *rport = lsop->rport; + struct nvme_fc_lport *lport = rport->lport; + unsigned long flags; + + spin_lock_irqsave(&rport->lock, flags); + list_del(&lsop->lsrcv_list); + spin_unlock_irqrestore(&rport->lock, flags); + + fc_dma_sync_single_for_cpu(lport->dev, lsop->rspdma, + sizeof(*lsop->rspbuf), DMA_TO_DEVICE); + fc_dma_unmap_single(lport->dev, lsop->rspdma, + sizeof(*lsop->rspbuf), DMA_TO_DEVICE); + + kfree(lsop->rspbuf); + kfree(lsop->rqstbuf); + kfree(lsop); + + nvme_fc_rport_put(rport); +} + +static void +nvme_fc_xmt_ls_rsp_done(struct nvmefc_ls_rsp *lsrsp) +{ + struct nvmefc_ls_rcv_op *lsop = lsrsp->nvme_fc_private; + + nvme_fc_xmt_ls_rsp_free(lsop); +} + +static void +nvme_fc_xmt_ls_rsp(struct nvmefc_ls_rcv_op *lsop) +{ + struct nvme_fc_rport *rport = lsop->rport; + struct nvme_fc_lport *lport = rport->lport; + struct fcnvme_ls_rqst_w0 *w0 = &lsop->rqstbuf->w0; + int ret; + + fc_dma_sync_single_for_device(lport->dev, lsop->rspdma, + sizeof(*lsop->rspbuf), DMA_TO_DEVICE); + + ret = lport->ops->xmt_ls_rsp(&lport->localport, &rport->remoteport, + lsop->lsrsp); + if (ret) { + dev_warn(lport->dev, + "LLDD rejected LS RSP xmt: LS %d status %d\n", + w0->ls_cmd, ret); + nvme_fc_xmt_ls_rsp_free(lsop); + return; + } +} + +static struct nvme_fc_ctrl * +nvme_fc_match_disconn_ls(struct nvme_fc_rport *rport, + struct nvmefc_ls_rcv_op *lsop) +{ + struct fcnvme_ls_disconnect_assoc_rqst *rqst = + &lsop->rqstbuf->rq_dis_assoc; + struct nvme_fc_ctrl *ctrl, *tmp, *ret = NULL; + struct nvmefc_ls_rcv_op *oldls = NULL; + u64 association_id = be64_to_cpu(rqst->associd.association_id); + unsigned long flags; - discon_rqst->associd.association_id = cpu_to_be64(ctrl->association_id); + spin_lock_irqsave(&rport->lock, flags); - discon_rqst->discon_cmd.desc_tag = cpu_to_be32( - FCNVME_LSDESC_DISCONN_CMD); - discon_rqst->discon_cmd.desc_len = + list_for_each_entry_safe(ctrl, tmp, &rport->ctrl_list, ctrl_list) { + if (!nvme_fc_ctrl_get(ctrl)) + continue; + spin_lock(&ctrl->lock); + if (association_id == ctrl->association_id) { + oldls = ctrl->rcv_disconn; + ctrl->rcv_disconn = lsop; + ret = ctrl; + } + spin_unlock(&ctrl->lock); + if (ret) + /* leave the ctrl get reference */ + break; + spin_unlock_irqrestore(&rport->lock, flags); + nvme_fc_ctrl_put(ctrl); + spin_lock_irqsave(&rport->lock, flags); + } + + spin_unlock_irqrestore(&rport->lock, flags); + + /* transmit a response for anything that was pending */ + if (oldls) { + dev_info(rport->lport->dev, + "NVME-FC{%d}: Multiple Disconnect Association " + "LS's received\n", ctrl->cnum); + /* overwrite good response with bogus failure */ + oldls->lsrsp->rsplen = nvme_fc_format_rjt(oldls->rspbuf, + sizeof(*oldls->rspbuf), + rqst->w0.ls_cmd, + FCNVME_RJT_RC_UNAB, + FCNVME_RJT_EXP_NONE, 0); + nvme_fc_xmt_ls_rsp(oldls); + } + + return ret; +} + +/* + * returns true to mean LS handled and ls_rsp can be sent + * returns false to defer ls_rsp xmt (will be done as part of + * association termination) + */ +static bool +nvme_fc_ls_disconnect_assoc(struct nvmefc_ls_rcv_op *lsop) +{ + struct nvme_fc_rport *rport = lsop->rport; + struct fcnvme_ls_disconnect_assoc_rqst *rqst = + &lsop->rqstbuf->rq_dis_assoc; + struct fcnvme_ls_disconnect_assoc_acc *acc = + &lsop->rspbuf->rsp_dis_assoc; + struct nvme_fc_ctrl *ctrl = NULL; + int ret = 0; + + memset(acc, 0, sizeof(*acc)); + + ret = nvmefc_vldt_lsreq_discon_assoc(lsop->rqstdatalen, rqst); + if (!ret) { + /* match an active association */ + ctrl = nvme_fc_match_disconn_ls(rport, lsop); + if (!ctrl) + ret = VERR_NO_ASSOC; + } + + if (ret) { + dev_info(rport->lport->dev, + "Disconnect LS failed: %s\n", + validation_errors[ret]); + lsop->lsrsp->rsplen = nvme_fc_format_rjt(acc, + sizeof(*acc), rqst->w0.ls_cmd, + (ret == VERR_NO_ASSOC) ? + FCNVME_RJT_RC_INV_ASSOC : + FCNVME_RJT_RC_LOGIC, + FCNVME_RJT_EXP_NONE, 0); + return true; + } + + /* format an ACCept response */ + + lsop->lsrsp->rsplen = sizeof(*acc); + + nvme_fc_format_rsp_hdr(acc, FCNVME_LS_ACC, fcnvme_lsdesc_len( - sizeof(struct fcnvme_lsdesc_disconn_cmd)); - discon_rqst->discon_cmd.scope = FCNVME_DISCONN_ASSOCIATION; - discon_rqst->discon_cmd.id = cpu_to_be64(ctrl->association_id); + sizeof(struct fcnvme_ls_disconnect_assoc_acc)), + FCNVME_LS_DISCONNECT_ASSOC); + + /* + * the transmit of the response will occur after the exchanges + * for the association have been ABTS'd by + * nvme_fc_delete_association(). + */ - lsreq->rqstaddr = discon_rqst; - lsreq->rqstlen = sizeof(*discon_rqst); - lsreq->rspaddr = discon_acc; - lsreq->rsplen = sizeof(*discon_acc); - lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; + /* fail the association */ + nvme_fc_error_recovery(ctrl, "Disconnect Association LS received"); - ret = nvme_fc_send_ls_req_async(ctrl->rport, lsop, - nvme_fc_disconnect_assoc_done); - if (ret) - kfree(lsop); + /* release the reference taken by nvme_fc_match_disconn_ls() */ + nvme_fc_ctrl_put(ctrl); - /* only meaningful part to terminating the association */ - ctrl->association_id = 0; + return false; } +/* + * Actual Processing routine for received FC-NVME LS Requests from the LLD + * returns true if a response should be sent afterward, false if rsp will + * be sent asynchronously. + */ +static bool +nvme_fc_handle_ls_rqst(struct nvmefc_ls_rcv_op *lsop) +{ + struct fcnvme_ls_rqst_w0 *w0 = &lsop->rqstbuf->w0; + bool ret = true; -/* *********************** NVME Ctrl Routines **************************** */ + lsop->lsrsp->nvme_fc_private = lsop; + lsop->lsrsp->rspbuf = lsop->rspbuf; + lsop->lsrsp->rspdma = lsop->rspdma; + lsop->lsrsp->done = nvme_fc_xmt_ls_rsp_done; + /* Be preventative. handlers will later set to valid length */ + lsop->lsrsp->rsplen = 0; -static void __nvme_fc_final_op_cleanup(struct request *rq); -static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg); + /* + * handlers: + * parse request input, execute the request, and format the + * LS response + */ + switch (w0->ls_cmd) { + case FCNVME_LS_DISCONNECT_ASSOC: + ret = nvme_fc_ls_disconnect_assoc(lsop); + break; + case FCNVME_LS_DISCONNECT_CONN: + lsop->lsrsp->rsplen = nvme_fc_format_rjt(lsop->rspbuf, + sizeof(*lsop->rspbuf), w0->ls_cmd, + FCNVME_RJT_RC_UNSUP, FCNVME_RJT_EXP_NONE, 0); + break; + case FCNVME_LS_CREATE_ASSOCIATION: + case FCNVME_LS_CREATE_CONNECTION: + lsop->lsrsp->rsplen = nvme_fc_format_rjt(lsop->rspbuf, + sizeof(*lsop->rspbuf), w0->ls_cmd, + FCNVME_RJT_RC_LOGIC, FCNVME_RJT_EXP_NONE, 0); + break; + default: + lsop->lsrsp->rsplen = nvme_fc_format_rjt(lsop->rspbuf, + sizeof(*lsop->rspbuf), w0->ls_cmd, + FCNVME_RJT_RC_INVAL, FCNVME_RJT_EXP_NONE, 0); + break; + } -static int -nvme_fc_reinit_request(void *data, struct request *rq) + return(ret); +} + +static void +nvme_fc_handle_ls_rqst_work(struct work_struct *work) { - struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); - struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; + struct nvme_fc_rport *rport = + container_of(work, struct nvme_fc_rport, lsrcv_work); + struct fcnvme_ls_rqst_w0 *w0; + struct nvmefc_ls_rcv_op *lsop; + unsigned long flags; + bool sendrsp; - memset(cmdiu, 0, sizeof(*cmdiu)); - cmdiu->scsi_id = NVME_CMD_SCSI_ID; - cmdiu->fc_id = NVME_CMD_FC_ID; - cmdiu->iu_len = cpu_to_be16(sizeof(*cmdiu) / sizeof(u32)); - memset(&op->rsp_iu, 0, sizeof(op->rsp_iu)); +restart: + sendrsp = true; + spin_lock_irqsave(&rport->lock, flags); + list_for_each_entry(lsop, &rport->ls_rcv_list, lsrcv_list) { + if (lsop->handled) + continue; + + lsop->handled = true; + if (rport->remoteport.port_state == FC_OBJSTATE_ONLINE) { + spin_unlock_irqrestore(&rport->lock, flags); + sendrsp = nvme_fc_handle_ls_rqst(lsop); + } else { + spin_unlock_irqrestore(&rport->lock, flags); + w0 = &lsop->rqstbuf->w0; + lsop->lsrsp->rsplen = nvme_fc_format_rjt( + lsop->rspbuf, + sizeof(*lsop->rspbuf), + w0->ls_cmd, + FCNVME_RJT_RC_UNAB, + FCNVME_RJT_EXP_NONE, 0); + } + if (sendrsp) + nvme_fc_xmt_ls_rsp(lsop); + goto restart; + } + spin_unlock_irqrestore(&rport->lock, flags); +} + +static +void nvme_fc_rcv_ls_req_err_msg(struct nvme_fc_lport *lport, + struct fcnvme_ls_rqst_w0 *w0) +{ + dev_info(lport->dev, "RCV %s LS failed: No memory\n", + (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ? + nvmefc_ls_names[w0->ls_cmd] : ""); +} + +/** + * nvme_fc_rcv_ls_req - transport entry point called by an LLDD + * upon the reception of a NVME LS request. + * + * The nvme-fc layer will copy payload to an internal structure for + * processing. As such, upon completion of the routine, the LLDD may + * immediately free/reuse the LS request buffer passed in the call. + * + * If this routine returns error, the LLDD should abort the exchange. + * + * @portptr: pointer to the (registered) remote port that the LS + * was received from. The remoteport is associated with + * a specific localport. + * @lsrsp: pointer to a nvmefc_ls_rsp response structure to be + * used to reference the exchange corresponding to the LS + * when issuing an ls response. + * @lsreqbuf: pointer to the buffer containing the LS Request + * @lsreqbuf_len: length, in bytes, of the received LS request + */ +int +nvme_fc_rcv_ls_req(struct nvme_fc_remote_port *portptr, + struct nvmefc_ls_rsp *lsrsp, + void *lsreqbuf, u32 lsreqbuf_len) +{ + struct nvme_fc_rport *rport = remoteport_to_rport(portptr); + struct nvme_fc_lport *lport = rport->lport; + struct fcnvme_ls_rqst_w0 *w0 = (struct fcnvme_ls_rqst_w0 *)lsreqbuf; + struct nvmefc_ls_rcv_op *lsop; + unsigned long flags; + int ret; + + nvme_fc_rport_get(rport); + + /* validate there's a routine to transmit a response */ + if (!lport->ops->xmt_ls_rsp) { + dev_info(lport->dev, + "RCV %s LS failed: no LLDD xmt_ls_rsp\n", + (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ? + nvmefc_ls_names[w0->ls_cmd] : ""); + ret = -EINVAL; + goto out_put; + } + + if (lsreqbuf_len > sizeof(union nvmefc_ls_requests)) { + dev_info(lport->dev, + "RCV %s LS failed: payload too large\n", + (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ? + nvmefc_ls_names[w0->ls_cmd] : ""); + ret = -E2BIG; + goto out_put; + } + + lsop = kzalloc(sizeof(*lsop), GFP_KERNEL); + if (!lsop) { + nvme_fc_rcv_ls_req_err_msg(lport, w0); + ret = -ENOMEM; + goto out_put; + } + + lsop->rqstbuf = kzalloc(sizeof(*lsop->rqstbuf), GFP_KERNEL); + lsop->rspbuf = kzalloc(sizeof(*lsop->rspbuf), GFP_KERNEL); + if (!lsop->rqstbuf || !lsop->rspbuf) { + nvme_fc_rcv_ls_req_err_msg(lport, w0); + ret = -ENOMEM; + goto out_free; + } + + lsop->rspdma = fc_dma_map_single(lport->dev, lsop->rspbuf, + sizeof(*lsop->rspbuf), + DMA_TO_DEVICE); + if (fc_dma_mapping_error(lport->dev, lsop->rspdma)) { + dev_info(lport->dev, + "RCV %s LS failed: DMA mapping failure\n", + (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ? + nvmefc_ls_names[w0->ls_cmd] : ""); + ret = -EFAULT; + goto out_free; + } + + lsop->rport = rport; + lsop->lsrsp = lsrsp; + + memcpy(lsop->rqstbuf, lsreqbuf, lsreqbuf_len); + lsop->rqstdatalen = lsreqbuf_len; + + spin_lock_irqsave(&rport->lock, flags); + if (rport->remoteport.port_state != FC_OBJSTATE_ONLINE) { + spin_unlock_irqrestore(&rport->lock, flags); + ret = -ENOTCONN; + goto out_unmap; + } + list_add_tail(&lsop->lsrcv_list, &rport->ls_rcv_list); + spin_unlock_irqrestore(&rport->lock, flags); + + schedule_work(&rport->lsrcv_work); return 0; + +out_unmap: + fc_dma_unmap_single(lport->dev, lsop->rspdma, + sizeof(*lsop->rspbuf), DMA_TO_DEVICE); +out_free: + kfree(lsop->rspbuf); + kfree(lsop->rqstbuf); + kfree(lsop); +out_put: + nvme_fc_rport_put(rport); + return ret; } +EXPORT_SYMBOL_GPL(nvme_fc_rcv_ls_req); + + +/* *********************** NVME Ctrl Routines **************************** */ static void __nvme_fc_exit_request(struct nvme_fc_ctrl *ctrl, @@ -1169,19 +1804,27 @@ nvme_fc_exit_request(struct blk_mq_tag_set *set, struct request *rq, { struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); - return __nvme_fc_exit_request(set->driver_data, op); + return __nvme_fc_exit_request(to_fc_ctrl(set->driver_data), op); } static int __nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op) { - int state; + unsigned long flags; + int opstate; - state = atomic_xchg(&op->state, FCPOP_STATE_ABORTED); - if (state != FCPOP_STATE_ACTIVE) { - atomic_set(&op->state, state); - return -ECANCELED; + spin_lock_irqsave(&ctrl->lock, flags); + opstate = atomic_xchg(&op->state, FCPOP_STATE_ABORTED); + if (opstate != FCPOP_STATE_ACTIVE) + atomic_set(&op->state, opstate); + else if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) { + op->flags |= FCOP_FLAGS_TERMIO; + ctrl->iocnt++; } + spin_unlock_irqrestore(&ctrl->lock, flags); + + if (opstate != FCPOP_STATE_ACTIVE) + return -ECANCELED; ctrl->lport->ops->fcp_abort(&ctrl->lport->localport, &ctrl->rport->remoteport, @@ -1195,61 +1838,59 @@ static void nvme_fc_abort_aen_ops(struct nvme_fc_ctrl *ctrl) { struct nvme_fc_fcp_op *aen_op = ctrl->aen_ops; - unsigned long flags; - int i, ret; - - for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) { - if (atomic_read(&aen_op->state) != FCPOP_STATE_ACTIVE) - continue; - - spin_lock_irqsave(&ctrl->lock, flags); - if (ctrl->flags & FCCTRL_TERMIO) { - ctrl->iocnt++; - aen_op->flags |= FCOP_FLAGS_TERMIO; - } - spin_unlock_irqrestore(&ctrl->lock, flags); + int i; - ret = __nvme_fc_abort_op(ctrl, aen_op); - if (ret) { - /* - * if __nvme_fc_abort_op failed the io wasn't - * active. Thus this call path is running in - * parallel to the io complete. Treat as non-error. - */ + /* ensure we've initialized the ops once */ + if (!(aen_op->flags & FCOP_FLAGS_AEN)) + return; - /* back out the flags/counters */ - spin_lock_irqsave(&ctrl->lock, flags); - if (ctrl->flags & FCCTRL_TERMIO) - ctrl->iocnt--; - aen_op->flags &= ~FCOP_FLAGS_TERMIO; - spin_unlock_irqrestore(&ctrl->lock, flags); - return; - } - } + for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) + __nvme_fc_abort_op(ctrl, aen_op); } -static inline int +static inline void __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl, - struct nvme_fc_fcp_op *op) + struct nvme_fc_fcp_op *op, int opstate) { unsigned long flags; - bool complete_rq = false; - spin_lock_irqsave(&ctrl->lock, flags); - if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) { - if (ctrl->flags & FCCTRL_TERMIO) { + if (opstate == FCPOP_STATE_ABORTED) { + spin_lock_irqsave(&ctrl->lock, flags); + if (test_bit(FCCTRL_TERMIO, &ctrl->flags) && + op->flags & FCOP_FLAGS_TERMIO) { if (!--ctrl->iocnt) wake_up(&ctrl->ioabort_wait); } + spin_unlock_irqrestore(&ctrl->lock, flags); } - if (op->flags & FCOP_FLAGS_RELEASED) - complete_rq = true; - else - op->flags |= FCOP_FLAGS_COMPLETE; - spin_unlock_irqrestore(&ctrl->lock, flags); +} + +static void +nvme_fc_ctrl_ioerr_work(struct work_struct *work) +{ + struct nvme_fc_ctrl *ctrl = + container_of(work, struct nvme_fc_ctrl, ioerr_work); + + nvme_fc_error_recovery(ctrl, "transport detected io error"); +} + +/* + * nvme_fc_io_getuuid - Routine called to get the appid field + * associated with request by the lldd + * @req:IO request from nvme fc to driver + * Returns: UUID if there is an appid associated with VM or + * NULL if the user/libvirt has not set the appid to VM + */ +char *nvme_fc_io_getuuid(struct nvmefc_fcp_req *req) +{ + struct nvme_fc_fcp_op *op = fcp_req_to_fcp_op(req); + struct request *rq = op->rq; - return complete_rq; + if (!IS_ENABLED(CONFIG_BLK_CGROUP_FC_APPID) || !rq || !rq->bio) + return NULL; + return blkcg_get_fc_appid(rq->bio); } +EXPORT_SYMBOL_GPL(nvme_fc_io_getuuid); static void nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) @@ -1263,7 +1904,8 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) struct nvme_command *sqe = &op->cmd_iu.sqe; __le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1); union nvme_result result; - bool complete_rq, terminate_assoc = true; + bool terminate_assoc = true; + int opstate; /* * WARNING: @@ -1302,17 +1944,23 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) * association to be terminated. */ + opstate = atomic_xchg(&op->state, FCPOP_STATE_COMPLETE); + fc_dma_sync_single_for_cpu(ctrl->lport->dev, op->fcp_req.rspdma, sizeof(op->rsp_iu), DMA_FROM_DEVICE); - if (atomic_read(&op->state) == FCPOP_STATE_ABORTED) - status = cpu_to_le16((NVME_SC_ABORT_REQ | NVME_SC_DNR) << 1); - else if (freq->status) - status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1); + if (opstate == FCPOP_STATE_ABORTED) + status = cpu_to_le16(NVME_SC_HOST_ABORTED_CMD << 1); + else if (freq->status) { + status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1); + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: io failed due to lldd error %d\n", + ctrl->cnum, freq->status); + } /* - * For the linux implementation, if we have an unsuccesful - * status, they blk-mq layer can typically be called with the + * For the linux implementation, if we have an unsuccessful + * status, the blk-mq layer can typically be called with the * non-zero status and the content of the cqe isn't important. */ if (status) @@ -1335,8 +1983,13 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) * no payload in the CQE by the transport. */ if (freq->transferred_length != - be32_to_cpu(op->cmd_iu.data_len)) { - status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1); + be32_to_cpu(op->cmd_iu.data_len)) { + status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1); + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: io failed due to bad transfer " + "length: %d vs expected %d\n", + ctrl->cnum, freq->transferred_length, + be32_to_cpu(op->cmd_iu.data_len)); goto done; } result.u64 = 0; @@ -1351,9 +2004,19 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) (freq->rcv_rsplen / 4) || be32_to_cpu(op->rsp_iu.xfrd_len) != freq->transferred_length || - op->rsp_iu.status_code || + op->rsp_iu.ersp_result || sqe->common.command_id != cqe->command_id)) { - status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1); + status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1); + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: io failed due to bad NVMe_ERSP: " + "iu len %d, xfr len %d vs %d, status code " + "%d, cmdid %d vs %d\n", + ctrl->cnum, be16_to_cpu(op->rsp_iu.iu_len), + be32_to_cpu(op->rsp_iu.xfrd_len), + freq->transferred_length, + op->rsp_iu.ersp_result, + sqe->common.command_id, + cqe->command_id); goto done; } result = cqe->result; @@ -1361,7 +2024,11 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) break; default: - status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1); + status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1); + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: io failed due to odd NVMe_xRSP iu " + "len %d\n", + ctrl->cnum, freq->rcv_rsplen); goto done; } @@ -1370,27 +2037,21 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) done: if (op->flags & FCOP_FLAGS_AEN) { nvme_complete_async_event(&queue->ctrl->ctrl, status, &result); - complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op); + __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate); atomic_set(&op->state, FCPOP_STATE_IDLE); op->flags = FCOP_FLAGS_AEN; /* clear other flags */ nvme_fc_ctrl_put(ctrl); goto check_error; } - complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op); - if (!complete_rq) { - if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) { - status = cpu_to_le16(NVME_SC_ABORT_REQ << 1); - if (blk_queue_dying(rq->q)) - status |= cpu_to_le16(NVME_SC_DNR << 1); - } - nvme_end_request(rq, status, result); - } else - __nvme_fc_final_op_cleanup(rq); + __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate); + if (!nvme_try_complete_req(rq, status, result)) + nvme_fc_complete_rq(rq); check_error: - if (terminate_assoc) - nvme_fc_error_recovery(ctrl, "transport detected io error"); + if (terminate_assoc && + nvme_ctrl_state(&ctrl->ctrl) != NVME_CTRL_RESETTING) + queue_work(nvme_reset_wq, &ctrl->ioerr_work); } static int @@ -1398,6 +2059,8 @@ __nvme_fc_init_request(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, struct nvme_fc_fcp_op *op, struct request *rq, u32 rqno) { + struct nvme_fcp_op_w_sgl *op_w_sgl = + container_of(op, typeof(*op_w_sgl), op); struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; int ret = 0; @@ -1407,23 +2070,26 @@ __nvme_fc_init_request(struct nvme_fc_ctrl *ctrl, op->fcp_req.rspaddr = &op->rsp_iu; op->fcp_req.rsplen = sizeof(op->rsp_iu); op->fcp_req.done = nvme_fc_fcpio_done; - op->fcp_req.first_sgl = (struct scatterlist *)&op[1]; - op->fcp_req.private = &op->fcp_req.first_sgl[SG_CHUNK_SIZE]; op->ctrl = ctrl; op->queue = queue; op->rq = rq; op->rqno = rqno; - cmdiu->scsi_id = NVME_CMD_SCSI_ID; + cmdiu->format_id = NVME_CMD_FORMAT_ID; cmdiu->fc_id = NVME_CMD_FC_ID; cmdiu->iu_len = cpu_to_be16(sizeof(*cmdiu) / sizeof(u32)); + if (queue->qnum) + cmdiu->rsv_cat = fccmnd_set_cat_css(0, + (NVME_CC_CSS_NVM >> NVME_CC_CSS_SHIFT)); + else + cmdiu->rsv_cat = fccmnd_set_cat_admin(0); op->fcp_req.cmddma = fc_dma_map_single(ctrl->lport->dev, &op->cmd_iu, sizeof(op->cmd_iu), DMA_TO_DEVICE); if (fc_dma_mapping_error(ctrl->lport->dev, op->fcp_req.cmddma)) { dev_err(ctrl->dev, "FCP Op failed - cmdiu dma mapping failed.\n"); - ret = EFAULT; + ret = -EFAULT; goto out_on_error; } @@ -1433,7 +2099,7 @@ __nvme_fc_init_request(struct nvme_fc_ctrl *ctrl, if (fc_dma_mapping_error(ctrl->lport->dev, op->fcp_req.rspdma)) { dev_err(ctrl->dev, "FCP Op failed - rspiu dma mapping failed.\n"); - ret = EFAULT; + ret = -EFAULT; } atomic_set(&op->state, FCPOP_STATE_IDLE); @@ -1445,12 +2111,20 @@ static int nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq, unsigned int hctx_idx, unsigned int numa_node) { - struct nvme_fc_ctrl *ctrl = set->driver_data; - struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); + struct nvme_fc_ctrl *ctrl = to_fc_ctrl(set->driver_data); + struct nvme_fcp_op_w_sgl *op = blk_mq_rq_to_pdu(rq); int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0; struct nvme_fc_queue *queue = &ctrl->queues[queue_idx]; + int res; - return __nvme_fc_init_request(ctrl, queue, op, rq, queue->rqcnt++); + res = __nvme_fc_init_request(ctrl, queue, &op->op, rq, queue->rqcnt++); + if (res) + return res; + op->op.fcp_req.first_sgl = op->sgl; + op->op.fcp_req.private = &op->priv[0]; + nvme_req(rq)->ctrl = &ctrl->ctrl; + nvme_req(rq)->cmd = &op->op.cmd_iu.sqe; + return res; } static int @@ -1459,34 +2133,35 @@ nvme_fc_init_aen_ops(struct nvme_fc_ctrl *ctrl) struct nvme_fc_fcp_op *aen_op; struct nvme_fc_cmd_iu *cmdiu; struct nvme_command *sqe; - void *private; + void *private = NULL; int i, ret; aen_op = ctrl->aen_ops; - for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) { - private = kzalloc(ctrl->lport->ops->fcprqst_priv_sz, + for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) { + if (ctrl->lport->ops->fcprqst_priv_sz) { + private = kzalloc(ctrl->lport->ops->fcprqst_priv_sz, GFP_KERNEL); - if (!private) - return -ENOMEM; + if (!private) + return -ENOMEM; + } cmdiu = &aen_op->cmd_iu; sqe = &cmdiu->sqe; ret = __nvme_fc_init_request(ctrl, &ctrl->queues[0], aen_op, (struct request *)NULL, - (AEN_CMDID_BASE + i)); + (NVME_AQ_BLK_MQ_DEPTH + i)); if (ret) { kfree(private); return ret; } aen_op->flags = FCOP_FLAGS_AEN; - aen_op->fcp_req.first_sgl = NULL; /* no sg list */ aen_op->fcp_req.private = private; memset(sqe, 0, sizeof(*sqe)); sqe->common.opcode = nvme_admin_async_event; /* Note: core layer may overwrite the sqe.command_id value */ - sqe->common.command_id = AEN_CMDID_BASE + i; + sqe->common.command_id = NVME_AQ_BLK_MQ_DEPTH + i; } return 0; } @@ -1497,11 +2172,9 @@ nvme_fc_term_aen_ops(struct nvme_fc_ctrl *ctrl) struct nvme_fc_fcp_op *aen_op; int i; + cancel_work_sync(&ctrl->ctrl.async_event_work); aen_op = ctrl->aen_ops; - for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) { - if (!aen_op->fcp_req.private) - continue; - + for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) { __nvme_fc_exit_request(ctrl, aen_op); kfree(aen_op->fcp_req.private); @@ -1509,40 +2182,32 @@ nvme_fc_term_aen_ops(struct nvme_fc_ctrl *ctrl) } } -static inline void -__nvme_fc_init_hctx(struct blk_mq_hw_ctx *hctx, struct nvme_fc_ctrl *ctrl, - unsigned int qidx) +static inline int +__nvme_fc_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int qidx) { + struct nvme_fc_ctrl *ctrl = to_fc_ctrl(data); struct nvme_fc_queue *queue = &ctrl->queues[qidx]; hctx->driver_data = queue; queue->hctx = hctx; + return 0; } static int -nvme_fc_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, - unsigned int hctx_idx) +nvme_fc_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { - struct nvme_fc_ctrl *ctrl = data; - - __nvme_fc_init_hctx(hctx, ctrl, hctx_idx + 1); - - return 0; + return __nvme_fc_init_hctx(hctx, data, hctx_idx + 1); } static int nvme_fc_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { - struct nvme_fc_ctrl *ctrl = data; - - __nvme_fc_init_hctx(hctx, ctrl, hctx_idx); - - return 0; + return __nvme_fc_init_hctx(hctx, data, hctx_idx); } static void -nvme_fc_init_queue(struct nvme_fc_ctrl *ctrl, int idx, size_t queue_size) +nvme_fc_init_queue(struct nvme_fc_ctrl *ctrl, int idx) { struct nvme_fc_queue *queue; @@ -1550,7 +2215,7 @@ nvme_fc_init_queue(struct nvme_fc_ctrl *ctrl, int idx, size_t queue_size) memset(queue, 0, sizeof(*queue)); queue->ctrl = ctrl; queue->qnum = idx; - atomic_set(&queue->csn, 1); + atomic_set(&queue->csn, 0); queue->dev = ctrl->dev; if (idx > 0) @@ -1558,8 +2223,6 @@ nvme_fc_init_queue(struct nvme_fc_ctrl *ctrl, int idx, size_t queue_size) else queue->cmnd_capsule_len = sizeof(struct nvme_command); - queue->queue_size = queue_size; - /* * Considered whether we should allocate buffers for all SQEs * and CQEs and dma map them - mapping their respective entries @@ -1586,6 +2249,7 @@ nvme_fc_free_queue(struct nvme_fc_queue *queue) if (!test_and_clear_bit(NVME_FC_Q_CONNECTED, &queue->flags)) return; + clear_bit(NVME_FC_Q_LIVE, &queue->flags); /* * Current implementation never disconnects a single queue. * It always terminates a whole association. So there is never @@ -1593,7 +2257,7 @@ nvme_fc_free_queue(struct nvme_fc_queue *queue) */ queue->connection_id = 0; - clear_bit(NVME_FC_Q_CONNECTED, &queue->flags); + atomic_set(&queue->csn, 0); } static void @@ -1654,7 +2318,7 @@ nvme_fc_create_hw_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize) return 0; delete_queues: - for (; i >= 0; i--) + for (; i > 0; i--) __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[i], i); return ret; } @@ -1672,6 +2336,8 @@ nvme_fc_connect_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize) ret = nvmf_connect_io_queue(&ctrl->ctrl, i); if (ret) break; + + set_bit(NVME_FC_Q_LIVE, &ctrl->queues[i].flags); } return ret; @@ -1683,7 +2349,7 @@ nvme_fc_init_io_queues(struct nvme_fc_ctrl *ctrl) int i; for (i = 1; i < ctrl->ctrl.queue_count; i++) - nvme_fc_init_queue(ctrl, i, ctrl->ctrl.sqsize); + nvme_fc_init_queue(ctrl, i); } static void @@ -1693,26 +2359,17 @@ nvme_fc_ctrl_free(struct kref *ref) container_of(ref, struct nvme_fc_ctrl, ref); unsigned long flags; - if (ctrl->ctrl.tagset) { - blk_cleanup_queue(ctrl->ctrl.connect_q); - blk_mq_free_tag_set(&ctrl->tag_set); - } - /* remove from rport list */ spin_lock_irqsave(&ctrl->rport->lock, flags); list_del(&ctrl->ctrl_list); spin_unlock_irqrestore(&ctrl->rport->lock, flags); - blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); - blk_cleanup_queue(ctrl->ctrl.admin_q); - blk_mq_free_tag_set(&ctrl->admin_tag_set); - kfree(ctrl->queues); put_device(ctrl->dev); nvme_fc_rport_put(ctrl->rport); - ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum); + ida_free(&nvme_fc_ctrl_cnt, ctrl->cnum); if (ctrl->ctrl.opts) nvmf_free_options(ctrl->ctrl.opts); kfree(ctrl); @@ -1735,7 +2392,7 @@ nvme_fc_ctrl_get(struct nvme_fc_ctrl *ctrl) * controller. Called after last nvme_put_ctrl() call */ static void -nvme_fc_nvme_ctrl_freed(struct nvme_ctrl *nctrl) +nvme_fc_free_ctrl(struct nvme_ctrl *nctrl) { struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); @@ -1744,54 +2401,160 @@ nvme_fc_nvme_ctrl_freed(struct nvme_ctrl *nctrl) nvme_fc_ctrl_put(ctrl); } +/* + * This routine is used by the transport when it needs to find active + * io on a queue that is to be terminated. The transport uses + * blk_mq_tagset_busy_itr() to find the busy requests, which then invoke + * this routine to kill them on a 1 by 1 basis. + * + * As FC allocates FC exchange for each io, the transport must contact + * the LLDD to terminate the exchange, thus releasing the FC exchange. + * After terminating the exchange the LLDD will call the transport's + * normal io done path for the request, but it will have an aborted + * status. The done path will return the io request back to the block + * layer with an error status. + */ +static bool nvme_fc_terminate_exchange(struct request *req, void *data) +{ + struct nvme_ctrl *nctrl = data; + struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); + struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req); + + op->nreq.flags |= NVME_REQ_CANCELLED; + __nvme_fc_abort_op(ctrl, op); + return true; +} + +/* + * This routine runs through all outstanding commands on the association + * and aborts them. This routine is typically called by the + * delete_association routine. It is also called due to an error during + * reconnect. In that scenario, it is most likely a command that initializes + * the controller, including fabric Connect commands on io queues, that + * may have timed out or failed thus the io must be killed for the connect + * thread to see the error. + */ +static void +__nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues) +{ + int q; + + /* + * if aborting io, the queues are no longer good, mark them + * all as not live. + */ + if (ctrl->ctrl.queue_count > 1) { + for (q = 1; q < ctrl->ctrl.queue_count; q++) + clear_bit(NVME_FC_Q_LIVE, &ctrl->queues[q].flags); + } + clear_bit(NVME_FC_Q_LIVE, &ctrl->queues[0].flags); + + /* + * If io queues are present, stop them and terminate all outstanding + * ios on them. As FC allocates FC exchange for each io, the + * transport must contact the LLDD to terminate the exchange, + * thus releasing the FC exchange. We use blk_mq_tagset_busy_itr() + * to tell us what io's are busy and invoke a transport routine + * to kill them with the LLDD. After terminating the exchange + * the LLDD will call the transport's normal io done path, but it + * will have an aborted status. The done path will return the + * io requests back to the block layer as part of normal completions + * (but with error status). + */ + if (ctrl->ctrl.queue_count > 1) { + nvme_quiesce_io_queues(&ctrl->ctrl); + nvme_sync_io_queues(&ctrl->ctrl); + blk_mq_tagset_busy_iter(&ctrl->tag_set, + nvme_fc_terminate_exchange, &ctrl->ctrl); + blk_mq_tagset_wait_completed_request(&ctrl->tag_set); + if (start_queues) + nvme_unquiesce_io_queues(&ctrl->ctrl); + } + + /* + * Other transports, which don't have link-level contexts bound + * to sqe's, would try to gracefully shutdown the controller by + * writing the registers for shutdown and polling (call + * nvme_disable_ctrl()). Given a bunch of i/o was potentially + * just aborted and we will wait on those contexts, and given + * there was no indication of how live the controller is on the + * link, don't send more io to create more contexts for the + * shutdown. Let the controller fail via keepalive failure if + * its still present. + */ + + /* + * clean up the admin queue. Same thing as above. + */ + nvme_quiesce_admin_queue(&ctrl->ctrl); + blk_sync_queue(ctrl->ctrl.admin_q); + blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, + nvme_fc_terminate_exchange, &ctrl->ctrl); + blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set); + if (start_queues) + nvme_unquiesce_admin_queue(&ctrl->ctrl); +} + static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) { - /* only proceed if in LIVE state - e.g. on first error */ - if (ctrl->ctrl.state != NVME_CTRL_LIVE) + enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl); + + /* + * if an error (io timeout, etc) while (re)connecting, the remote + * port requested terminating of the association (disconnect_ls) + * or an error (timeout or abort) occurred on an io while creating + * the controller. Abort any ios on the association and let the + * create_association error path resolve things. + */ + if (state == NVME_CTRL_CONNECTING) { + __nvme_fc_abort_outstanding_ios(ctrl, true); + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: transport error during (re)connect\n", + ctrl->cnum); + return; + } + + /* Otherwise, only proceed if in LIVE state - e.g. on first error */ + if (state != NVME_CTRL_LIVE) return; dev_warn(ctrl->ctrl.device, - "NVME-FC{%d}: transport association error detected: %s\n", + "NVME-FC{%d}: transport association event: %s\n", ctrl->cnum, errmsg); dev_warn(ctrl->ctrl.device, "NVME-FC{%d}: resetting controller\n", ctrl->cnum); - if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) { - dev_err(ctrl->ctrl.device, - "NVME-FC{%d}: error_recovery: Couldn't change state " - "to RECONNECTING\n", ctrl->cnum); - return; - } - nvme_reset_ctrl(&ctrl->ctrl); } -static enum blk_eh_timer_return -nvme_fc_timeout(struct request *rq, bool reserved) +static enum blk_eh_timer_return nvme_fc_timeout(struct request *rq) { struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); struct nvme_fc_ctrl *ctrl = op->ctrl; - int ret; - - if (reserved) - return BLK_EH_RESET_TIMER; - - ret = __nvme_fc_abort_op(ctrl, op); - if (ret) - /* io wasn't active to abort consider it done */ - return BLK_EH_HANDLED; + u16 qnum = op->queue->qnum; + struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; + struct nvme_command *sqe = &cmdiu->sqe; /* - * we can't individually ABTS an io without affecting the queue, - * thus killing the queue, adn thus the association. - * So resolve by performing a controller reset, which will stop - * the host/io stack, terminate the association on the link, - * and recreate an association on the link. + * Attempt to abort the offending command. Command completion + * will detect the aborted io and will fail the connection. */ - nvme_fc_error_recovery(ctrl, "io timeout error"); + dev_info(ctrl->ctrl.device, + "NVME-FC{%d.%d}: io timeout: opcode %d fctype %d (%s) w10/11: " + "x%08x/x%08x\n", + ctrl->cnum, qnum, sqe->common.opcode, sqe->fabrics.fctype, + nvme_fabrics_opcode_str(qnum, sqe), + sqe->common.cdw10, sqe->common.cdw11); + if (__nvme_fc_abort_op(ctrl, op)) + nvme_fc_error_recovery(ctrl, "io timeout abort failed"); - return BLK_EH_HANDLED; + /* + * the io abort has been initiated. Have the reset timer + * restarted and the abort completion will complete the io + * shortly. Avoids a synchronous wait while the abort finishes. + */ + return BLK_EH_RESET_TIMER; } static int @@ -1799,27 +2562,26 @@ nvme_fc_map_data(struct nvme_fc_ctrl *ctrl, struct request *rq, struct nvme_fc_fcp_op *op) { struct nvmefc_fcp_req *freq = &op->fcp_req; - enum dma_data_direction dir; int ret; freq->sg_cnt = 0; - if (!blk_rq_payload_bytes(rq)) + if (!blk_rq_nr_phys_segments(rq)) return 0; freq->sg_table.sgl = freq->first_sgl; ret = sg_alloc_table_chained(&freq->sg_table, - blk_rq_nr_phys_segments(rq), freq->sg_table.sgl); + blk_rq_nr_phys_segments(rq), freq->sg_table.sgl, + NVME_INLINE_SG_CNT); if (ret) return -ENOMEM; - op->nents = blk_rq_map_sg(rq->q, rq, freq->sg_table.sgl); + op->nents = blk_rq_map_sg(rq, freq->sg_table.sgl); WARN_ON(op->nents > blk_rq_nr_phys_segments(rq)); - dir = (rq_data_dir(rq) == WRITE) ? DMA_TO_DEVICE : DMA_FROM_DEVICE; freq->sg_cnt = fc_dma_map_sg(ctrl->lport->dev, freq->sg_table.sgl, - op->nents, dir); + op->nents, rq_dma_dir(rq)); if (unlikely(freq->sg_cnt <= 0)) { - sg_free_table_chained(&freq->sg_table, true); + sg_free_table_chained(&freq->sg_table, NVME_INLINE_SG_CNT); freq->sg_cnt = 0; return -EFAULT; } @@ -1840,12 +2602,9 @@ nvme_fc_unmap_data(struct nvme_fc_ctrl *ctrl, struct request *rq, return; fc_dma_unmap_sg(ctrl->lport->dev, freq->sg_table.sgl, op->nents, - ((rq_data_dir(rq) == WRITE) ? - DMA_TO_DEVICE : DMA_FROM_DEVICE)); - - nvme_cleanup_cmd(rq); + rq_dma_dir(rq)); - sg_free_table_chained(&freq->sg_table, true); + sg_free_table_chained(&freq->sg_table, NVME_INLINE_SG_CNT); freq->sg_cnt = 0; } @@ -1861,7 +2620,7 @@ nvme_fc_unmap_data(struct nvme_fc_ctrl *ctrl, struct request *rq, * as part of the exchange. The CQE is the last thing for the io, * which is transferred (explicitly or implicitly) with the RSP IU * sent on the exchange. After the CQE is received, the FC exchange is - * terminaed and the Exchange may be used on a different io. + * terminated and the Exchange may be used on a different io. * * The transport to LLDD api has the transport making a request for a * new fcp io request to the LLDD. The LLDD then allocates a FC exchange @@ -1880,23 +2639,20 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, { struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; struct nvme_command *sqe = &cmdiu->sqe; - u32 csn; - int ret; + int ret, opstate; /* * before attempting to send the io, check to see if we believe * the target device is present */ if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE) - goto busy; + return BLK_STS_RESOURCE; if (!nvme_fc_ctrl_get(ctrl)) return BLK_STS_IOERR; /* format the FC-NVME CMD IU and fcp_req */ cmdiu->connection_id = cpu_to_be64(queue->connection_id); - csn = atomic_inc_return(&queue->csn); - cmdiu->csn = cpu_to_be32(csn); cmdiu->data_len = cpu_to_be32(data_len); switch (io_dir) { case NVMEFC_FCP_WRITE: @@ -1921,16 +2677,17 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, * as well as those by FC-NVME spec. */ WARN_ON_ONCE(sqe->common.metadata); - WARN_ON_ONCE(sqe->common.dptr.prp1); - WARN_ON_ONCE(sqe->common.dptr.prp2); sqe->common.flags |= NVME_CMD_SGL_METABUF; /* - * format SQE DPTR field per FC-NVME rules - * type=data block descr; subtype=offset; - * offset is currently 0. + * format SQE DPTR field per FC-NVME rules: + * type=0x5 Transport SGL Data Block Descriptor + * subtype=0xA Transport-specific value + * address=0 + * length=length of the data series */ - sqe->rw.dptr.sgl.type = NVME_SGL_FMT_OFFSET; + sqe->rw.dptr.sgl.type = (NVME_TRANSPORT_SGL_DATA_DESC << 4) | + NVME_SGL_FMT_TRANSPORT_A; sqe->rw.dptr.sgl.length = cpu_to_le32(data_len); sqe->rw.dptr.sgl.addr = 0; @@ -1951,15 +2708,33 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, atomic_set(&op->state, FCPOP_STATE_ACTIVE); if (!(op->flags & FCOP_FLAGS_AEN)) - blk_mq_start_request(op->rq); + nvme_start_request(op->rq); + cmdiu->csn = cpu_to_be32(atomic_inc_return(&queue->csn)); ret = ctrl->lport->ops->fcp_io(&ctrl->lport->localport, &ctrl->rport->remoteport, queue->lldd_handle, &op->fcp_req); if (ret) { - if (!(op->flags & FCOP_FLAGS_AEN)) + /* + * If the lld fails to send the command is there an issue with + * the csn value? If the command that fails is the Connect, + * no - as the connection won't be live. If it is a command + * post-connect, it's possible a gap in csn may be created. + * Does this matter? As Linux initiators don't send fused + * commands, no. The gap would exist, but as there's nothing + * that depends on csn order to be delivered on the target + * side, it shouldn't hurt. It would be difficult for a + * target to even detect the csn gap as it has no idea when the + * cmd with the csn was supposed to arrive. + */ + opstate = atomic_xchg(&op->state, FCPOP_STATE_COMPLETE); + __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate); + + if (!(op->flags & FCOP_FLAGS_AEN)) { nvme_fc_unmap_data(ctrl, op->rq, op); + nvme_cleanup_cmd(op->rq); + } nvme_fc_ctrl_put(ctrl); @@ -1967,16 +2742,10 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, ret != -EBUSY) return BLK_STS_IOERR; - goto busy; + return BLK_STS_RESOURCE; } return BLK_STS_OK; - -busy: - if (!(op->flags & FCOP_FLAGS_AEN) && queue->hctx) - blk_mq_delay_run_hw_queue(queue->hctx, NVMEFC_QUEUE_DELAY); - - return BLK_STS_RESOURCE; } static blk_status_t @@ -1988,190 +2757,103 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, struct nvme_fc_ctrl *ctrl = queue->ctrl; struct request *rq = bd->rq; struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); - struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; - struct nvme_command *sqe = &cmdiu->sqe; enum nvmefc_fcp_datadir io_dir; + bool queue_ready = test_bit(NVME_FC_Q_LIVE, &queue->flags); u32 data_len; blk_status_t ret; - ret = nvme_setup_cmd(ns, rq, sqe); + if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE || + !nvme_check_ready(&queue->ctrl->ctrl, rq, queue_ready)) + return nvme_fail_nonready_command(&queue->ctrl->ctrl, rq); + + ret = nvme_setup_cmd(ns, rq); if (ret) return ret; - data_len = blk_rq_payload_bytes(rq); - if (data_len) + /* + * nvme core doesn't quite treat the rq opaquely. Commands such + * as WRITE ZEROES will return a non-zero rq payload_bytes yet + * there is no actual payload to be transferred. + * To get it right, key data transmission on there being 1 or + * more physical segments in the sg list. If there are no + * physical segments, there is no payload. + */ + if (blk_rq_nr_phys_segments(rq)) { + data_len = blk_rq_payload_bytes(rq); io_dir = ((rq_data_dir(rq) == WRITE) ? NVMEFC_FCP_WRITE : NVMEFC_FCP_READ); - else + } else { + data_len = 0; io_dir = NVMEFC_FCP_NODATA; + } - return nvme_fc_start_fcp_op(ctrl, queue, op, data_len, io_dir); -} - -static struct blk_mq_tags * -nvme_fc_tagset(struct nvme_fc_queue *queue) -{ - if (queue->qnum == 0) - return queue->ctrl->admin_tag_set.tags[queue->qnum]; - - return queue->ctrl->tag_set.tags[queue->qnum - 1]; -} - -static int -nvme_fc_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) - -{ - struct nvme_fc_queue *queue = hctx->driver_data; - struct nvme_fc_ctrl *ctrl = queue->ctrl; - struct request *req; - struct nvme_fc_fcp_op *op; - - req = blk_mq_tag_to_rq(nvme_fc_tagset(queue), tag); - if (!req) - return 0; - - op = blk_mq_rq_to_pdu(req); - - if ((atomic_read(&op->state) == FCPOP_STATE_ACTIVE) && - (ctrl->lport->ops->poll_queue)) - ctrl->lport->ops->poll_queue(&ctrl->lport->localport, - queue->lldd_handle); - return ((atomic_read(&op->state) != FCPOP_STATE_ACTIVE)); + return nvme_fc_start_fcp_op(ctrl, queue, op, data_len, io_dir); } static void -nvme_fc_submit_async_event(struct nvme_ctrl *arg, int aer_idx) +nvme_fc_submit_async_event(struct nvme_ctrl *arg) { struct nvme_fc_ctrl *ctrl = to_fc_ctrl(arg); struct nvme_fc_fcp_op *aen_op; - unsigned long flags; - bool terminating = false; blk_status_t ret; - if (aer_idx > NVME_FC_NR_AEN_COMMANDS) - return; - - spin_lock_irqsave(&ctrl->lock, flags); - if (ctrl->flags & FCCTRL_TERMIO) - terminating = true; - spin_unlock_irqrestore(&ctrl->lock, flags); - - if (terminating) + if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) return; - aen_op = &ctrl->aen_ops[aer_idx]; + aen_op = &ctrl->aen_ops[0]; ret = nvme_fc_start_fcp_op(ctrl, aen_op->queue, aen_op, 0, NVMEFC_FCP_NODATA); if (ret) dev_err(ctrl->ctrl.device, - "failed async event work [%d]\n", aer_idx); + "failed async event work\n"); } static void -__nvme_fc_final_op_cleanup(struct request *rq) +nvme_fc_complete_rq(struct request *rq) { struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); struct nvme_fc_ctrl *ctrl = op->ctrl; atomic_set(&op->state, FCPOP_STATE_IDLE); - op->flags &= ~(FCOP_FLAGS_TERMIO | FCOP_FLAGS_RELEASED | - FCOP_FLAGS_COMPLETE); + op->flags &= ~FCOP_FLAGS_TERMIO; nvme_fc_unmap_data(ctrl, rq, op); nvme_complete_rq(rq); nvme_fc_ctrl_put(ctrl); - -} - -static void -nvme_fc_complete_rq(struct request *rq) -{ - struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); - struct nvme_fc_ctrl *ctrl = op->ctrl; - unsigned long flags; - bool completed = false; - - /* - * the core layer, on controller resets after calling - * nvme_shutdown_ctrl(), calls complete_rq without our - * calling blk_mq_complete_request(), thus there may still - * be live i/o outstanding with the LLDD. Means transport has - * to track complete calls vs fcpio_done calls to know what - * path to take on completes and dones. - */ - spin_lock_irqsave(&ctrl->lock, flags); - if (op->flags & FCOP_FLAGS_COMPLETE) - completed = true; - else - op->flags |= FCOP_FLAGS_RELEASED; - spin_unlock_irqrestore(&ctrl->lock, flags); - - if (completed) - __nvme_fc_final_op_cleanup(rq); } -/* - * This routine is used by the transport when it needs to find active - * io on a queue that is to be terminated. The transport uses - * blk_mq_tagset_busy_itr() to find the busy requests, which then invoke - * this routine to kill them on a 1 by 1 basis. - * - * As FC allocates FC exchange for each io, the transport must contact - * the LLDD to terminate the exchange, thus releasing the FC exchange. - * After terminating the exchange the LLDD will call the transport's - * normal io done path for the request, but it will have an aborted - * status. The done path will return the io request back to the block - * layer with an error status. - */ -static void -nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved) +static void nvme_fc_map_queues(struct blk_mq_tag_set *set) { - struct nvme_ctrl *nctrl = data; - struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); - struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req); - unsigned long flags; - int status; + struct nvme_fc_ctrl *ctrl = to_fc_ctrl(set->driver_data); + int i; - if (!blk_mq_request_started(req)) - return; + for (i = 0; i < set->nr_maps; i++) { + struct blk_mq_queue_map *map = &set->map[i]; - spin_lock_irqsave(&ctrl->lock, flags); - if (ctrl->flags & FCCTRL_TERMIO) { - ctrl->iocnt++; - op->flags |= FCOP_FLAGS_TERMIO; - } - spin_unlock_irqrestore(&ctrl->lock, flags); - - status = __nvme_fc_abort_op(ctrl, op); - if (status) { - /* - * if __nvme_fc_abort_op failed the io wasn't - * active. Thus this call path is running in - * parallel to the io complete. Treat as non-error. - */ + if (!map->nr_queues) { + WARN_ON(i == HCTX_TYPE_DEFAULT); + continue; + } - /* back out the flags/counters */ - spin_lock_irqsave(&ctrl->lock, flags); - if (ctrl->flags & FCCTRL_TERMIO) - ctrl->iocnt--; - op->flags &= ~FCOP_FLAGS_TERMIO; - spin_unlock_irqrestore(&ctrl->lock, flags); - return; + /* Call LLDD map queue functionality if defined */ + if (ctrl->lport->ops->map_queues) + ctrl->lport->ops->map_queues(&ctrl->lport->localport, + map); + else + blk_mq_map_queues(map); } } - static const struct blk_mq_ops nvme_fc_mq_ops = { .queue_rq = nvme_fc_queue_rq, .complete = nvme_fc_complete_rq, .init_request = nvme_fc_init_request, .exit_request = nvme_fc_exit_request, - .reinit_request = nvme_fc_reinit_request, .init_hctx = nvme_fc_init_hctx, - .poll = nvme_fc_poll, .timeout = nvme_fc_timeout, + .map_queues = nvme_fc_map_queues, }; static int @@ -2181,7 +2863,7 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) unsigned int nr_io_queues; int ret; - nr_io_queues = min(min(opts->nr_io_queues, num_online_cpus()), + nr_io_queues = min3(opts->nr_io_queues, num_online_cpus(), ctrl->lport->ops->max_hw_queues); ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues); if (ret) { @@ -2196,48 +2878,29 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) nvme_fc_init_io_queues(ctrl); - memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set)); - ctrl->tag_set.ops = &nvme_fc_mq_ops; - ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size; - ctrl->tag_set.reserved_tags = 1; /* fabric connect */ - ctrl->tag_set.numa_node = NUMA_NO_NODE; - ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; - ctrl->tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) + - (SG_CHUNK_SIZE * - sizeof(struct scatterlist)) + - ctrl->lport->ops->fcprqst_priv_sz; - ctrl->tag_set.driver_data = ctrl; - ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1; - ctrl->tag_set.timeout = NVME_IO_TIMEOUT; - - ret = blk_mq_alloc_tag_set(&ctrl->tag_set); + ret = nvme_alloc_io_tag_set(&ctrl->ctrl, &ctrl->tag_set, + &nvme_fc_mq_ops, 1, + struct_size_t(struct nvme_fcp_op_w_sgl, priv, + ctrl->lport->ops->fcprqst_priv_sz)); if (ret) return ret; - ctrl->ctrl.tagset = &ctrl->tag_set; - - ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set); - if (IS_ERR(ctrl->ctrl.connect_q)) { - ret = PTR_ERR(ctrl->ctrl.connect_q); - goto out_free_tag_set; - } - - ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.opts->queue_size); + ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1); if (ret) - goto out_cleanup_blk_queue; + goto out_cleanup_tagset; - ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.opts->queue_size); + ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.sqsize + 1); if (ret) goto out_delete_hw_queues; + ctrl->ioq_live = true; + return 0; out_delete_hw_queues: nvme_fc_delete_hw_io_queues(ctrl); -out_cleanup_blk_queue: - blk_cleanup_queue(ctrl->ctrl.connect_q); -out_free_tag_set: - blk_mq_free_tag_set(&ctrl->tag_set); +out_cleanup_tagset: + nvme_remove_io_tag_set(&ctrl->ctrl); nvme_fc_free_io_queues(ctrl); /* force put free routine to ignore io queues */ @@ -2247,13 +2910,14 @@ out_free_tag_set: } static int -nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl) +nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl) { struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; + u32 prior_ioq_cnt = ctrl->ctrl.queue_count - 1; unsigned int nr_io_queues; int ret; - nr_io_queues = min(min(opts->nr_io_queues, num_online_cpus()), + nr_io_queues = min3(opts->nr_io_queues, num_online_cpus(), ctrl->lport->ops->max_hw_queues); ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues); if (ret) { @@ -2262,27 +2926,33 @@ nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl) return ret; } + if (!nr_io_queues && prior_ioq_cnt) { + dev_info(ctrl->ctrl.device, + "Fail Reconnect: At least 1 io queue " + "required (was %d)\n", prior_ioq_cnt); + return -ENOSPC; + } + ctrl->ctrl.queue_count = nr_io_queues + 1; /* check for io queues existing */ if (ctrl->ctrl.queue_count == 1) return 0; - nvme_fc_init_io_queues(ctrl); - - ret = blk_mq_reinit_tagset(&ctrl->tag_set); - if (ret) - goto out_free_io_queues; + if (prior_ioq_cnt != nr_io_queues) { + dev_info(ctrl->ctrl.device, + "reconnect: revising io queue count from %d to %d\n", + prior_ioq_cnt, nr_io_queues); + blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues); + } - ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.opts->queue_size); + ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1); if (ret) goto out_free_io_queues; - ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.opts->queue_size); + ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.sqsize + 1); if (ret) goto out_delete_hw_queues; - blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues); - return 0; out_delete_hw_queues: @@ -2292,6 +2962,60 @@ out_free_io_queues: return ret; } +static void +nvme_fc_rport_active_on_lport(struct nvme_fc_rport *rport) +{ + struct nvme_fc_lport *lport = rport->lport; + + atomic_inc(&lport->act_rport_cnt); +} + +static void +nvme_fc_rport_inactive_on_lport(struct nvme_fc_rport *rport) +{ + struct nvme_fc_lport *lport = rport->lport; + u32 cnt; + + cnt = atomic_dec_return(&lport->act_rport_cnt); + if (cnt == 0 && lport->localport.port_state == FC_OBJSTATE_DELETED) + lport->ops->localport_delete(&lport->localport); +} + +static int +nvme_fc_ctlr_active_on_rport(struct nvme_fc_ctrl *ctrl) +{ + struct nvme_fc_rport *rport = ctrl->rport; + u32 cnt; + + if (test_and_set_bit(ASSOC_ACTIVE, &ctrl->flags)) + return 1; + + cnt = atomic_inc_return(&rport->act_ctrl_cnt); + if (cnt == 1) + nvme_fc_rport_active_on_lport(rport); + + return 0; +} + +static int +nvme_fc_ctlr_inactive_on_rport(struct nvme_fc_ctrl *ctrl) +{ + struct nvme_fc_rport *rport = ctrl->rport; + struct nvme_fc_lport *lport = rport->lport; + u32 cnt; + + /* clearing of ctrl->flags ASSOC_ACTIVE bit is in association delete */ + + cnt = atomic_dec_return(&rport->act_ctrl_cnt); + if (cnt == 0) { + if (rport->remoteport.port_state == FC_OBJSTATE_DELETED) + lport->ops->remoteport_delete(&rport->remoteport); + nvme_fc_rport_inactive_on_lport(rport); + } + + return 0; +} + /* * This routine restarts the controller on the host side, and * on the link side, recreates the controller association. @@ -2300,36 +3024,52 @@ static int nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) { struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; - u32 segs; + struct nvmefc_ls_rcv_op *disls = NULL; + unsigned long flags; int ret; - bool changed; ++ctrl->ctrl.nr_reconnects; + spin_lock_irqsave(&ctrl->rport->lock, flags); + if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE) { + spin_unlock_irqrestore(&ctrl->rport->lock, flags); + return -ENODEV; + } + + if (nvme_fc_ctlr_active_on_rport(ctrl)) { + spin_unlock_irqrestore(&ctrl->rport->lock, flags); + return -ENOTUNIQ; + } + spin_unlock_irqrestore(&ctrl->rport->lock, flags); + + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: create association : host wwpn 0x%016llx " + " rport wwpn 0x%016llx: NQN \"%s\"\n", + ctrl->cnum, ctrl->lport->localport.port_name, + ctrl->rport->remoteport.port_name, ctrl->ctrl.opts->subsysnqn); + + clear_bit(ASSOC_FAILED, &ctrl->flags); + /* * Create the admin queue */ - nvme_fc_init_queue(ctrl, 0, NVME_FC_AQ_BLKMQ_DEPTH); - ret = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0, - NVME_FC_AQ_BLKMQ_DEPTH); + NVME_AQ_DEPTH); if (ret) goto out_free_queue; ret = nvme_fc_connect_admin_queue(ctrl, &ctrl->queues[0], - NVME_FC_AQ_BLKMQ_DEPTH, - (NVME_FC_AQ_BLKMQ_DEPTH / 4)); + NVME_AQ_DEPTH, (NVME_AQ_DEPTH / 4)); if (ret) goto out_delete_hw_queue; - if (ctrl->ctrl.state != NVME_CTRL_NEW) - blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); - ret = nvmf_connect_admin_queue(&ctrl->ctrl); if (ret) goto out_disconnect_admin_queue; + set_bit(NVME_FC_Q_LIVE, &ctrl->queues[0].flags); + /* * Check controller capabilities * @@ -2337,46 +3077,51 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) * prior connection values */ - ret = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->ctrl.cap); - if (ret) { - dev_err(ctrl->ctrl.device, - "prop_get NVME_REG_CAP failed\n"); - goto out_disconnect_admin_queue; - } - - ctrl->ctrl.sqsize = - min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap) + 1, ctrl->ctrl.sqsize); - - ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap); + ret = nvme_enable_ctrl(&ctrl->ctrl); + if (!ret && test_bit(ASSOC_FAILED, &ctrl->flags)) + ret = -EIO; if (ret) goto out_disconnect_admin_queue; - segs = min_t(u32, NVME_FC_MAX_SEGMENTS, - ctrl->lport->ops->max_sgl_segments); - ctrl->ctrl.max_hw_sectors = (segs - 1) << (PAGE_SHIFT - 9); + ctrl->ctrl.max_segments = ctrl->lport->ops->max_sgl_segments; + ctrl->ctrl.max_hw_sectors = ctrl->ctrl.max_segments << + (ilog2(SZ_4K) - 9); + + nvme_unquiesce_admin_queue(&ctrl->ctrl); - ret = nvme_init_identify(&ctrl->ctrl); + ret = nvme_init_ctrl_finish(&ctrl->ctrl, false); if (ret) goto out_disconnect_admin_queue; - + if (test_bit(ASSOC_FAILED, &ctrl->flags)) { + ret = -EIO; + goto out_stop_keep_alive; + } /* sanity checks */ /* FC-NVME does not have other data in the capsule */ if (ctrl->ctrl.icdoff) { dev_err(ctrl->ctrl.device, "icdoff %d is not supported!\n", ctrl->ctrl.icdoff); - goto out_disconnect_admin_queue; + ret = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; + goto out_stop_keep_alive; } /* FC-NVME supports normal SGL Data Block Descriptors */ + if (!nvme_ctrl_sgl_supported(&ctrl->ctrl)) { + dev_err(ctrl->ctrl.device, + "Mandatory sgls are not supported!\n"); + ret = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; + goto out_stop_keep_alive; + } if (opts->queue_size > ctrl->ctrl.maxcmd) { /* warn if maxcmd is lower than queue_size */ dev_warn(ctrl->ctrl.device, "queue_size %zu > ctrl maxcmd %u, reducing " - "to queue_size\n", + "to maxcmd\n", opts->queue_size, ctrl->ctrl.maxcmd); opts->queue_size = ctrl->ctrl.maxcmd; + ctrl->ctrl.sqsize = opts->queue_size - 1; } ret = nvme_fc_init_aen_ops(ctrl); @@ -2388,36 +3133,54 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) */ if (ctrl->ctrl.queue_count > 1) { - if (ctrl->ctrl.state == NVME_CTRL_NEW) + if (!ctrl->ioq_live) ret = nvme_fc_create_io_queues(ctrl); else - ret = nvme_fc_reinit_io_queues(ctrl); - if (ret) - goto out_term_aen_ops; + ret = nvme_fc_recreate_io_queues(ctrl); } + if (!ret && test_bit(ASSOC_FAILED, &ctrl->flags)) + ret = -EIO; + if (ret) + goto out_term_aen_ops; - changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); - WARN_ON_ONCE(!changed); + if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE)) { + ret = -EIO; + goto out_term_aen_ops; + } ctrl->ctrl.nr_reconnects = 0; - nvme_start_ctrl(&ctrl->ctrl); return 0; /* Success */ out_term_aen_ops: nvme_fc_term_aen_ops(ctrl); +out_stop_keep_alive: + nvme_stop_keep_alive(&ctrl->ctrl); out_disconnect_admin_queue: + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: create_assoc failed, assoc_id %llx ret %d\n", + ctrl->cnum, ctrl->association_id, ret); /* send a Disconnect(association) LS to fc-nvme target */ nvme_fc_xmt_disconnect_assoc(ctrl); + spin_lock_irqsave(&ctrl->lock, flags); + ctrl->association_id = 0; + disls = ctrl->rcv_disconn; + ctrl->rcv_disconn = NULL; + spin_unlock_irqrestore(&ctrl->lock, flags); + if (disls) + nvme_fc_xmt_ls_rsp(disls); out_delete_hw_queue: __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0); out_free_queue: nvme_fc_free_queue(&ctrl->queues[0]); + clear_bit(ASSOC_ACTIVE, &ctrl->flags); + nvme_fc_ctlr_inactive_on_rport(ctrl); return ret; } + /* * This routine stops operation of the controller on the host side. * On the host os stack side: Admin and IO queues are stopped, @@ -2427,60 +3190,27 @@ out_free_queue: static void nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) { + struct nvmefc_ls_rcv_op *disls = NULL; unsigned long flags; + if (!test_and_clear_bit(ASSOC_ACTIVE, &ctrl->flags)) + return; + spin_lock_irqsave(&ctrl->lock, flags); - ctrl->flags |= FCCTRL_TERMIO; + set_bit(FCCTRL_TERMIO, &ctrl->flags); ctrl->iocnt = 0; spin_unlock_irqrestore(&ctrl->lock, flags); - /* - * If io queues are present, stop them and terminate all outstanding - * ios on them. As FC allocates FC exchange for each io, the - * transport must contact the LLDD to terminate the exchange, - * thus releasing the FC exchange. We use blk_mq_tagset_busy_itr() - * to tell us what io's are busy and invoke a transport routine - * to kill them with the LLDD. After terminating the exchange - * the LLDD will call the transport's normal io done path, but it - * will have an aborted status. The done path will return the - * io requests back to the block layer as part of normal completions - * (but with error status). - */ - if (ctrl->ctrl.queue_count > 1) { - nvme_stop_queues(&ctrl->ctrl); - blk_mq_tagset_busy_iter(&ctrl->tag_set, - nvme_fc_terminate_exchange, &ctrl->ctrl); - } - - /* - * Other transports, which don't have link-level contexts bound - * to sqe's, would try to gracefully shutdown the controller by - * writing the registers for shutdown and polling (call - * nvme_shutdown_ctrl()). Given a bunch of i/o was potentially - * just aborted and we will wait on those contexts, and given - * there was no indication of how live the controlelr is on the - * link, don't send more io to create more contexts for the - * shutdown. Let the controller fail via keepalive failure if - * its still present. - */ - - /* - * clean up the admin queue. Same thing as above. - * use blk_mq_tagset_busy_itr() and the transport routine to - * terminate the exchanges. - */ - blk_mq_quiesce_queue(ctrl->ctrl.admin_q); - blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, - nvme_fc_terminate_exchange, &ctrl->ctrl); + __nvme_fc_abort_outstanding_ios(ctrl, false); /* kill the aens as they are a separate path */ nvme_fc_abort_aen_ops(ctrl); /* wait for all io that had to be aborted */ - spin_lock_irqsave(&ctrl->lock, flags); + spin_lock_irq(&ctrl->lock); wait_event_lock_irq(ctrl->ioabort_wait, ctrl->iocnt == 0, ctrl->lock); - ctrl->flags &= ~FCCTRL_TERMIO; - spin_unlock_irqrestore(&ctrl->lock, flags); + clear_bit(FCCTRL_TERMIO, &ctrl->flags); + spin_unlock_irq(&ctrl->lock); nvme_fc_term_aen_ops(ctrl); @@ -2493,6 +3223,18 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) if (ctrl->association_id) nvme_fc_xmt_disconnect_assoc(ctrl); + spin_lock_irqsave(&ctrl->lock, flags); + ctrl->association_id = 0; + disls = ctrl->rcv_disconn; + ctrl->rcv_disconn = NULL; + spin_unlock_irqrestore(&ctrl->lock, flags); + if (disls) + /* + * if a Disconnect Request was waiting for a response, send + * now that all ABTS's have been issued (and are complete). + */ + nvme_fc_xmt_ls_rsp(disls); + if (ctrl->ctrl.tagset) { nvme_fc_delete_hw_io_queues(ctrl); nvme_fc_free_io_queues(ctrl); @@ -2500,102 +3242,84 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0); nvme_fc_free_queue(&ctrl->queues[0]); + + /* re-enable the admin_q so anything new can fast fail */ + nvme_unquiesce_admin_queue(&ctrl->ctrl); + + /* resume the io queues so that things will fast fail */ + nvme_unquiesce_io_queues(&ctrl->ctrl); + + nvme_fc_ctlr_inactive_on_rport(ctrl); } static void -nvme_fc_delete_ctrl_work(struct work_struct *work) +nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl) { - struct nvme_fc_ctrl *ctrl = - container_of(work, struct nvme_fc_ctrl, delete_work); + struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); - cancel_work_sync(&ctrl->ctrl.reset_work); cancel_delayed_work_sync(&ctrl->connect_work); - nvme_stop_ctrl(&ctrl->ctrl); - nvme_remove_namespaces(&ctrl->ctrl); + /* * kill the association on the link side. this will block * waiting for io to terminate */ nvme_fc_delete_association(ctrl); + cancel_work_sync(&ctrl->ioerr_work); - /* - * tear down the controller - * After the last reference on the nvme ctrl is removed, - * the transport nvme_fc_nvme_ctrl_freed() callback will be - * invoked. From there, the transport will tear down it's - * logical queues and association. - */ - nvme_uninit_ctrl(&ctrl->ctrl); - - nvme_put_ctrl(&ctrl->ctrl); -} - -static bool -__nvme_fc_schedule_delete_work(struct nvme_fc_ctrl *ctrl) -{ - if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING)) - return true; - - if (!queue_work(nvme_wq, &ctrl->delete_work)) - return true; - - return false; -} + if (ctrl->ctrl.tagset) + nvme_remove_io_tag_set(&ctrl->ctrl); -static int -__nvme_fc_del_ctrl(struct nvme_fc_ctrl *ctrl) -{ - return __nvme_fc_schedule_delete_work(ctrl) ? -EBUSY : 0; -} - -/* - * Request from nvme core layer to delete the controller - */ -static int -nvme_fc_del_nvme_ctrl(struct nvme_ctrl *nctrl) -{ - struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); - int ret; - - if (!kref_get_unless_zero(&ctrl->ctrl.kref)) - return -EBUSY; - - ret = __nvme_fc_del_ctrl(ctrl); - - if (!ret) - flush_workqueue(nvme_wq); - - nvme_put_ctrl(&ctrl->ctrl); - - return ret; + nvme_unquiesce_admin_queue(&ctrl->ctrl); + nvme_remove_admin_tag_set(&ctrl->ctrl); } static void nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status) { - /* If we are resetting/deleting then do nothing */ - if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING) { - WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW || - ctrl->ctrl.state == NVME_CTRL_LIVE); - return; - } + struct nvme_fc_rport *rport = ctrl->rport; + struct nvme_fc_remote_port *portptr = &rport->remoteport; + unsigned long recon_delay = ctrl->ctrl.opts->reconnect_delay * HZ; + bool recon = true; - dev_info(ctrl->ctrl.device, - "NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n", - ctrl->cnum, status); + if (nvme_ctrl_state(&ctrl->ctrl) != NVME_CTRL_CONNECTING) + return; - if (nvmf_should_reconnect(&ctrl->ctrl)) { + if (portptr->port_state == FC_OBJSTATE_ONLINE) { dev_info(ctrl->ctrl.device, - "NVME-FC{%d}: Reconnect attempt in %d seconds.\n", - ctrl->cnum, ctrl->ctrl.opts->reconnect_delay); - queue_delayed_work(nvme_wq, &ctrl->connect_work, - ctrl->ctrl.opts->reconnect_delay * HZ); + "NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n", + ctrl->cnum, status); + } else if (time_after_eq(jiffies, rport->dev_loss_end)) + recon = false; + + if (recon && nvmf_should_reconnect(&ctrl->ctrl, status)) { + if (portptr->port_state == FC_OBJSTATE_ONLINE) + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: Reconnect attempt in %ld " + "seconds\n", + ctrl->cnum, recon_delay / HZ); + else if (time_after(jiffies + recon_delay, rport->dev_loss_end)) + recon_delay = rport->dev_loss_end - jiffies; + + queue_delayed_work(nvme_wq, &ctrl->connect_work, recon_delay); } else { - dev_warn(ctrl->ctrl.device, - "NVME-FC{%d}: Max reconnect attempts (%d) " - "reached. Removing controller\n", - ctrl->cnum, ctrl->ctrl.nr_reconnects); - WARN_ON(__nvme_fc_schedule_delete_work(ctrl)); + if (portptr->port_state == FC_OBJSTATE_ONLINE) { + if (status > 0 && (status & NVME_STATUS_DNR)) + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: reconnect failure\n", + ctrl->cnum); + else + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: Max reconnect attempts " + "(%d) reached.\n", + ctrl->cnum, ctrl->ctrl.nr_reconnects); + } else + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: dev_loss_tmo (%d) expired " + "while waiting for remoteport connectivity.\n", + ctrl->cnum, min_t(int, portptr->dev_loss_tmo, + (ctrl->ctrl.opts->max_reconnects * + ctrl->ctrl.opts->reconnect_delay))); + WARN_ON(nvme_delete_ctrl(&ctrl->ctrl)); } } @@ -2604,20 +3328,31 @@ nvme_fc_reset_ctrl_work(struct work_struct *work) { struct nvme_fc_ctrl *ctrl = container_of(work, struct nvme_fc_ctrl, ctrl.reset_work); - int ret; nvme_stop_ctrl(&ctrl->ctrl); + /* will block will waiting for io to terminate */ nvme_fc_delete_association(ctrl); - ret = nvme_fc_create_association(ctrl); - if (ret) - nvme_fc_reconnect_or_delete(ctrl, ret); - else - dev_info(ctrl->ctrl.device, - "NVME-FC{%d}: controller reset complete\n", ctrl->cnum); + if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) + dev_err(ctrl->ctrl.device, + "NVME-FC{%d}: error_recovery: Couldn't change state " + "to CONNECTING\n", ctrl->cnum); + + if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE) { + if (!queue_delayed_work(nvme_wq, &ctrl->connect_work, 0)) { + dev_err(ctrl->ctrl.device, + "NVME-FC{%d}: failed to schedule connect " + "after reset\n", ctrl->cnum); + } else { + flush_delayed_work(&ctrl->connect_work); + } + } else { + nvme_fc_reconnect_or_delete(ctrl, -ENOTCONN); + } } + static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = { .name = "fc", .module = THIS_MODULE, @@ -2625,10 +3360,12 @@ static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = { .reg_read32 = nvmf_reg_read32, .reg_read64 = nvmf_reg_read64, .reg_write32 = nvmf_reg_write32, - .free_ctrl = nvme_fc_nvme_ctrl_freed, + .subsystem_reset = nvmf_subsystem_reset, + .free_ctrl = nvme_fc_free_ctrl, .submit_async_event = nvme_fc_submit_async_event, - .delete_ctrl = nvme_fc_del_nvme_ctrl, + .delete_ctrl = nvme_fc_delete_ctrl, .get_address = nvmf_get_address, + .get_virt_boundary = nvmf_get_virt_boundary, }; static void @@ -2645,7 +3382,7 @@ nvme_fc_connect_ctrl_work(struct work_struct *work) nvme_fc_reconnect_or_delete(ctrl, ret); else dev_info(ctrl->ctrl.device, - "NVME-FC{%d}: controller reconnect complete\n", + "NVME-FC{%d}: controller connect complete\n", ctrl->cnum); } @@ -2655,19 +3392,44 @@ static const struct blk_mq_ops nvme_fc_admin_mq_ops = { .complete = nvme_fc_complete_rq, .init_request = nvme_fc_init_request, .exit_request = nvme_fc_exit_request, - .reinit_request = nvme_fc_reinit_request, .init_hctx = nvme_fc_init_admin_hctx, .timeout = nvme_fc_timeout, }; -static struct nvme_ctrl * -nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, - struct nvme_fc_lport *lport, struct nvme_fc_rport *rport) +/* + * Fails a controller request if it matches an existing controller + * (association) with the same tuple: + * <Host NQN, Host ID, local FC port, remote FC port, SUBSYS NQN> + * + * The ports don't need to be compared as they are intrinsically + * already matched by the port pointers supplied. + */ +static bool +nvme_fc_existing_controller(struct nvme_fc_rport *rport, + struct nvmf_ctrl_options *opts) { struct nvme_fc_ctrl *ctrl; unsigned long flags; - int ret, idx; + bool found = false; + + spin_lock_irqsave(&rport->lock, flags); + list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) { + found = nvmf_ctlr_matches_baseopts(&ctrl->ctrl, opts); + if (found) + break; + } + spin_unlock_irqrestore(&rport->lock, flags); + + return found; +} + +static struct nvme_fc_ctrl * +nvme_fc_alloc_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, + struct nvme_fc_lport *lport, struct nvme_fc_rport *rport) +{ + struct nvme_fc_ctrl *ctrl; + int ret, idx, ctrl_loss_tmo; if (!(rport->remoteport.port_role & (FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) { @@ -2675,31 +3437,53 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, goto out_fail; } + if (!opts->duplicate_connect && + nvme_fc_existing_controller(rport, opts)) { + ret = -EALREADY; + goto out_fail; + } + ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); if (!ctrl) { ret = -ENOMEM; goto out_fail; } - idx = ida_simple_get(&nvme_fc_ctrl_cnt, 0, 0, GFP_KERNEL); + idx = ida_alloc(&nvme_fc_ctrl_cnt, GFP_KERNEL); if (idx < 0) { ret = -ENOSPC; goto out_free_ctrl; } + /* + * if ctrl_loss_tmo is being enforced and the default reconnect delay + * is being used, change to a shorter reconnect delay for FC. + */ + if (opts->max_reconnects != -1 && + opts->reconnect_delay == NVMF_DEF_RECONNECT_DELAY && + opts->reconnect_delay > NVME_FC_DEFAULT_RECONNECT_TMO) { + ctrl_loss_tmo = opts->max_reconnects * opts->reconnect_delay; + opts->reconnect_delay = NVME_FC_DEFAULT_RECONNECT_TMO; + opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo, + opts->reconnect_delay); + } + ctrl->ctrl.opts = opts; + ctrl->ctrl.nr_reconnects = 0; INIT_LIST_HEAD(&ctrl->ctrl_list); ctrl->lport = lport; ctrl->rport = rport; ctrl->dev = lport->dev; ctrl->cnum = idx; + ctrl->ioq_live = false; + init_waitqueue_head(&ctrl->ioabort_wait); get_device(ctrl->dev); kref_init(&ctrl->ref); - INIT_WORK(&ctrl->delete_work, nvme_fc_delete_ctrl_work); INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work); INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work); + INIT_WORK(&ctrl->ioerr_work, nvme_fc_ctrl_ioerr_work); spin_lock_init(&ctrl->lock); /* io queue count */ @@ -2710,6 +3494,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ctrl->ctrl.sqsize = opts->queue_size - 1; ctrl->ctrl.kato = opts->kato; + ctrl->ctrl.cntlid = 0xffff; ret = -ENOMEM; ctrl->queues = kcalloc(ctrl->ctrl.queue_count, @@ -2717,28 +3502,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, if (!ctrl->queues) goto out_free_ida; - memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set)); - ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops; - ctrl->admin_tag_set.queue_depth = NVME_FC_AQ_BLKMQ_DEPTH; - ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */ - ctrl->admin_tag_set.numa_node = NUMA_NO_NODE; - ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) + - (SG_CHUNK_SIZE * - sizeof(struct scatterlist)) + - ctrl->lport->ops->fcprqst_priv_sz; - ctrl->admin_tag_set.driver_data = ctrl; - ctrl->admin_tag_set.nr_hw_queues = 1; - ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT; - - ret = blk_mq_alloc_tag_set(&ctrl->admin_tag_set); - if (ret) - goto out_free_queues; - - ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); - if (IS_ERR(ctrl->ctrl.admin_q)) { - ret = PTR_ERR(ctrl->ctrl.admin_q); - goto out_free_admin_tag_set; - } + nvme_fc_init_queue(ctrl, 0); /* * Would have been nice to init io queues tag set as well. @@ -2749,62 +3513,98 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_fc_ctrl_ops, 0); if (ret) - goto out_cleanup_admin_q; + goto out_free_queues; + if (lport->dev) + ctrl->ctrl.numa_node = dev_to_node(lport->dev); - /* at this point, teardown path changes to ref counting on nvme ctrl */ + return ctrl; + +out_free_queues: + kfree(ctrl->queues); +out_free_ida: + put_device(ctrl->dev); + ida_free(&nvme_fc_ctrl_cnt, ctrl->cnum); +out_free_ctrl: + kfree(ctrl); +out_fail: + /* exit via here doesn't follow ctlr ref points */ + return ERR_PTR(ret); +} + +static struct nvme_ctrl * +nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, + struct nvme_fc_lport *lport, struct nvme_fc_rport *rport) +{ + struct nvme_fc_ctrl *ctrl; + unsigned long flags; + int ret; + + ctrl = nvme_fc_alloc_ctrl(dev, opts, lport, rport); + if (IS_ERR(ctrl)) + return ERR_CAST(ctrl); + + ret = nvme_add_ctrl(&ctrl->ctrl); + if (ret) + goto out_put_ctrl; + + ret = nvme_alloc_admin_tag_set(&ctrl->ctrl, &ctrl->admin_tag_set, + &nvme_fc_admin_mq_ops, + struct_size_t(struct nvme_fcp_op_w_sgl, priv, + ctrl->lport->ops->fcprqst_priv_sz)); + if (ret) + goto fail_ctrl; spin_lock_irqsave(&rport->lock, flags); list_add_tail(&ctrl->ctrl_list, &rport->ctrl_list); spin_unlock_irqrestore(&rport->lock, flags); - ret = nvme_fc_create_association(ctrl); - if (ret) { - ctrl->ctrl.opts = NULL; - /* initiate nvme ctrl ref counting teardown */ - nvme_uninit_ctrl(&ctrl->ctrl); - nvme_put_ctrl(&ctrl->ctrl); - - /* Remove core ctrl ref. */ - nvme_put_ctrl(&ctrl->ctrl); - - /* as we're past the point where we transition to the ref - * counting teardown path, if we return a bad pointer here, - * the calling routine, thinking it's prior to the - * transition, will do an rport put. Since the teardown - * path also does a rport put, we do an extra get here to - * so proper order/teardown happens. - */ - nvme_fc_rport_get(rport); + if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { + dev_err(ctrl->ctrl.device, + "NVME-FC{%d}: failed to init ctrl state\n", ctrl->cnum); + goto fail_ctrl; + } - if (ret > 0) - ret = -EIO; - return ERR_PTR(ret); + if (!queue_delayed_work(nvme_wq, &ctrl->connect_work, 0)) { + dev_err(ctrl->ctrl.device, + "NVME-FC{%d}: failed to schedule initial connect\n", + ctrl->cnum); + goto fail_ctrl; } - kref_get(&ctrl->ctrl.kref); + flush_delayed_work(&ctrl->connect_work); dev_info(ctrl->ctrl.device, - "NVME-FC{%d}: new ctrl: NQN \"%s\"\n", - ctrl->cnum, ctrl->ctrl.opts->subsysnqn); + "NVME-FC{%d}: new ctrl: NQN \"%s\", hostnqn: %s\n", + ctrl->cnum, nvmf_ctrl_subsysnqn(&ctrl->ctrl), opts->host->nqn); return &ctrl->ctrl; -out_cleanup_admin_q: - blk_cleanup_queue(ctrl->ctrl.admin_q); -out_free_admin_tag_set: - blk_mq_free_tag_set(&ctrl->admin_tag_set); -out_free_queues: - kfree(ctrl->queues); -out_free_ida: - put_device(ctrl->dev); - ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum); -out_free_ctrl: - kfree(ctrl); -out_fail: - /* exit via here doesn't follow ctlr ref points */ - return ERR_PTR(ret); -} +fail_ctrl: + nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING); + cancel_work_sync(&ctrl->ioerr_work); + cancel_work_sync(&ctrl->ctrl.reset_work); + cancel_delayed_work_sync(&ctrl->connect_work); + + ctrl->ctrl.opts = NULL; + /* initiate nvme ctrl ref counting teardown */ + nvme_uninit_ctrl(&ctrl->ctrl); + +out_put_ctrl: + /* Remove core ctrl ref. */ + nvme_put_ctrl(&ctrl->ctrl); + + /* as we're past the point where we transition to the ref + * counting teardown path, if we return a bad pointer here, + * the calling routine, thinking it's prior to the + * transition, will do an rport put. Since the teardown + * path also does a rport put, we do an extra get here to + * so proper order/teardown happens. + */ + nvme_fc_rport_get(rport); + + return ERR_PTR(-EIO); +} struct nvmet_fc_traddr { u64 nn; @@ -2835,7 +3635,7 @@ nvme_fc_parse_traddr(struct nvmet_fc_traddr *traddr, char *buf, size_t blen) substring_t wwn = { name, &name[sizeof(name)-1] }; int nnoffset, pnoffset; - /* validate it string one of the 2 allowed formats */ + /* validate if string is one of the 2 allowed formats */ if (strnlen(buf, blen) == NVME_FC_TRADDR_MAXLENGTH && !strncmp(buf, "nn-0x", NVME_FC_TRADDR_OXNNLEN) && !strncmp(&buf[NVME_FC_TRADDR_MAX_PN_OFFSET], @@ -2894,12 +3694,14 @@ nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts) spin_lock_irqsave(&nvme_fc_lock, flags); list_for_each_entry(lport, &nvme_fc_lport_list, port_list) { if (lport->localport.node_name != laddr.nn || - lport->localport.port_name != laddr.pn) + lport->localport.port_name != laddr.pn || + lport->localport.port_state != FC_OBJSTATE_ONLINE) continue; list_for_each_entry(rport, &lport->endp_list, endp_list) { if (rport->remoteport.node_name != raddr.nn || - rport->remoteport.port_name != raddr.pn) + rport->remoteport.port_name != raddr.pn || + rport->remoteport.port_state != FC_OBJSTATE_ONLINE) continue; /* if fail to get reference fall through. Will error */ @@ -2916,35 +3718,251 @@ nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts) } spin_unlock_irqrestore(&nvme_fc_lock, flags); + pr_warn("%s: %s - %s combination not found\n", + __func__, opts->traddr, opts->host_traddr); return ERR_PTR(-ENOENT); } static struct nvmf_transport_ops nvme_fc_transport = { .name = "fc", + .module = THIS_MODULE, .required_opts = NVMF_OPT_TRADDR | NVMF_OPT_HOST_TRADDR, .allowed_opts = NVMF_OPT_RECONNECT_DELAY | NVMF_OPT_CTRL_LOSS_TMO, .create_ctrl = nvme_fc_create_ctrl, }; +/* Arbitrary successive failures max. With lots of subsystems could be high */ +#define DISCOVERY_MAX_FAIL 20 + +static ssize_t nvme_fc_nvme_discovery_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + unsigned long flags; + LIST_HEAD(local_disc_list); + struct nvme_fc_lport *lport; + struct nvme_fc_rport *rport; + int failcnt = 0; + + spin_lock_irqsave(&nvme_fc_lock, flags); +restart: + list_for_each_entry(lport, &nvme_fc_lport_list, port_list) { + list_for_each_entry(rport, &lport->endp_list, endp_list) { + if (!nvme_fc_lport_get(lport)) + continue; + if (!nvme_fc_rport_get(rport)) { + /* + * This is a temporary condition. Upon restart + * this rport will be gone from the list. + * + * Revert the lport put and retry. Anything + * added to the list already will be skipped (as + * they are no longer list_empty). Loops should + * resume at rports that were not yet seen. + */ + nvme_fc_lport_put(lport); + + if (failcnt++ < DISCOVERY_MAX_FAIL) + goto restart; + + pr_err("nvme_discovery: too many reference " + "failures\n"); + goto process_local_list; + } + if (list_empty(&rport->disc_list)) + list_add_tail(&rport->disc_list, + &local_disc_list); + } + } + +process_local_list: + while (!list_empty(&local_disc_list)) { + rport = list_first_entry(&local_disc_list, + struct nvme_fc_rport, disc_list); + list_del_init(&rport->disc_list); + spin_unlock_irqrestore(&nvme_fc_lock, flags); + + lport = rport->lport; + /* signal discovery. Won't hurt if it repeats */ + nvme_fc_signal_discovery_scan(lport, rport); + nvme_fc_rport_put(rport); + nvme_fc_lport_put(lport); + + spin_lock_irqsave(&nvme_fc_lock, flags); + } + spin_unlock_irqrestore(&nvme_fc_lock, flags); + + return count; +} + +static DEVICE_ATTR(nvme_discovery, 0200, NULL, nvme_fc_nvme_discovery_store); + +#ifdef CONFIG_BLK_CGROUP_FC_APPID +/* Parse the cgroup id from a buf and return the length of cgrpid */ +static int fc_parse_cgrpid(const char *buf, u64 *id) +{ + char cgrp_id[16+1]; + int cgrpid_len, j; + + memset(cgrp_id, 0x0, sizeof(cgrp_id)); + for (cgrpid_len = 0, j = 0; cgrpid_len < 17; cgrpid_len++) { + if (buf[cgrpid_len] != ':') + cgrp_id[cgrpid_len] = buf[cgrpid_len]; + else { + j = 1; + break; + } + } + if (!j) + return -EINVAL; + if (kstrtou64(cgrp_id, 16, id) < 0) + return -EINVAL; + return cgrpid_len; +} + +/* + * Parse and update the appid in the blkcg associated with the cgroupid. + */ +static ssize_t fc_appid_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + size_t orig_count = count; + u64 cgrp_id; + int appid_len = 0; + int cgrpid_len = 0; + char app_id[FC_APPID_LEN]; + int ret = 0; + + if (buf[count-1] == '\n') + count--; + + if ((count > (16+1+FC_APPID_LEN)) || (!strchr(buf, ':'))) + return -EINVAL; + + cgrpid_len = fc_parse_cgrpid(buf, &cgrp_id); + if (cgrpid_len < 0) + return -EINVAL; + appid_len = count - cgrpid_len - 1; + if (appid_len > FC_APPID_LEN) + return -EINVAL; + + memset(app_id, 0x0, sizeof(app_id)); + memcpy(app_id, &buf[cgrpid_len+1], appid_len); + ret = blkcg_set_fc_appid(app_id, cgrp_id, sizeof(app_id)); + if (ret < 0) + return ret; + return orig_count; +} +static DEVICE_ATTR(appid_store, 0200, NULL, fc_appid_store); +#endif /* CONFIG_BLK_CGROUP_FC_APPID */ + +static struct attribute *nvme_fc_attrs[] = { + &dev_attr_nvme_discovery.attr, +#ifdef CONFIG_BLK_CGROUP_FC_APPID + &dev_attr_appid_store.attr, +#endif + NULL +}; + +static const struct attribute_group nvme_fc_attr_group = { + .attrs = nvme_fc_attrs, +}; + +static const struct attribute_group *nvme_fc_attr_groups[] = { + &nvme_fc_attr_group, + NULL +}; + +static struct class fc_class = { + .name = "fc", + .dev_groups = nvme_fc_attr_groups, +}; + static int __init nvme_fc_init_module(void) { - return nvmf_register_transport(&nvme_fc_transport); + int ret; + + /* + * NOTE: + * It is expected that in the future the kernel will combine + * the FC-isms that are currently under scsi and now being + * added to by NVME into a new standalone FC class. The SCSI + * and NVME protocols and their devices would be under this + * new FC class. + * + * As we need something to post FC-specific udev events to, + * specifically for nvme probe events, start by creating the + * new device class. When the new standalone FC class is + * put in place, this code will move to a more generic + * location for the class. + */ + ret = class_register(&fc_class); + if (ret) { + pr_err("couldn't register class fc\n"); + return ret; + } + + /* + * Create a device for the FC-centric udev events + */ + fc_udev_device = device_create(&fc_class, NULL, MKDEV(0, 0), NULL, + "fc_udev_device"); + if (IS_ERR(fc_udev_device)) { + pr_err("couldn't create fc_udev device!\n"); + ret = PTR_ERR(fc_udev_device); + goto out_destroy_class; + } + + ret = nvmf_register_transport(&nvme_fc_transport); + if (ret) + goto out_destroy_device; + + return 0; + +out_destroy_device: + device_destroy(&fc_class, MKDEV(0, 0)); +out_destroy_class: + class_unregister(&fc_class); + + return ret; +} + +static void +nvme_fc_delete_controllers(struct nvme_fc_rport *rport) +{ + struct nvme_fc_ctrl *ctrl; + + spin_lock(&rport->lock); + list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) { + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: transport unloading: deleting ctrl\n", + ctrl->cnum); + nvme_delete_ctrl(&ctrl->ctrl); + } + spin_unlock(&rport->lock); } static void __exit nvme_fc_exit_module(void) { - /* sanity check - all lports should be removed */ - if (!list_empty(&nvme_fc_lport_list)) - pr_warn("%s: localport list not empty\n", __func__); + struct nvme_fc_lport *lport; + struct nvme_fc_rport *rport; + unsigned long flags; + + spin_lock_irqsave(&nvme_fc_lock, flags); + list_for_each_entry(lport, &nvme_fc_lport_list, port_list) + list_for_each_entry(rport, &lport->endp_list, endp_list) + nvme_fc_delete_controllers(rport); + spin_unlock_irqrestore(&nvme_fc_lock, flags); + flush_workqueue(nvme_delete_wq); nvmf_unregister_transport(&nvme_fc_transport); - ida_destroy(&nvme_fc_local_port_cnt); - ida_destroy(&nvme_fc_ctrl_cnt); + device_destroy(&fc_class, MKDEV(0, 0)); + class_unregister(&fc_class); } module_init(nvme_fc_init_module); module_exit(nvme_fc_exit_module); +MODULE_DESCRIPTION("NVMe host FC transport driver"); MODULE_LICENSE("GPL v2"); |
