From 0b857b44b5e445dc850cd91c45ce6edeb7797480 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Sun, 31 Jul 2016 00:27:39 -0700 Subject: nvme-rdma: Don't leak uninitialized memory in connect request private data Zero out the full nvme_rdma_cm_req structure before sending it. Otherwise we end up leaking kernel memory in the reserved field, which might break forward compatibility in the future. Signed-off-by: Roland Dreier Reviewed-by: Christoph Hellwig Signed-off-by: Sagi Grimberg --- drivers/nvme/host/rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme/host') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 3e3ce2b0424e..b96b88369871 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1269,7 +1269,7 @@ static int nvme_rdma_route_resolved(struct nvme_rdma_queue *queue) { struct nvme_rdma_ctrl *ctrl = queue->ctrl; struct rdma_conn_param param = { }; - struct nvme_rdma_cm_req priv; + struct nvme_rdma_cm_req priv = { }; int ret; param.qp_num = queue->qp->qp_num; -- cgit From 5f372eb3e76317b4fe4ba53ad1547f39fc883350 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 31 Jul 2016 18:43:15 +0300 Subject: nvme-rdma: Queue ns scanning after a sucessful reconnection On an ordered target shutdown, the target can send a AEN on a namespace removal, this will trigger the host to queue ns-list query. The shutdown will trigger error recovery which will attepmt periodic reconnect. We can hit a race where the ns rescanning fails (error recovery kicked in and we're not connected) causing removing all the namespaces and when we reconnect we won't see any namespaces for this controller. So, queue a namespace rescan after we successfully reconnected to the target. Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/nvme/host') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index b96b88369871..7fd1d735ced5 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -748,8 +748,10 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); WARN_ON_ONCE(!changed); - if (ctrl->queue_count > 1) + if (ctrl->queue_count > 1) { nvme_start_queues(&ctrl->ctrl); + nvme_queue_scan(&ctrl->ctrl); + } dev_info(ctrl->ctrl.device, "Successfully reconnected\n"); -- cgit From 57de5a0a40db97bb390d3ac1f4c2e74b9f3515c3 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 14 Jul 2016 17:39:47 +0300 Subject: nvme-rdma: Fix device removal handling Device removal sequence may have crashed because the controller (and admin queue space) was freed before we destroyed the admin queue resources. Thus we want to destroy the admin queue and only then queue controller deletion and wait for it to complete. More specifically we: 1. own the controller deletion (make sure we are not competing with another deletion). 2. get rid of inflight reconnects if exists (which also destroy and create queues). 3. destroy the queue. 4. safely queue controller deletion (and wait for it to complete). Reported-by: Steve Wise Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) (limited to 'drivers/nvme/host') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 7fd1d735ced5..3ffec3728abe 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -169,7 +169,6 @@ MODULE_PARM_DESC(register_always, static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id, struct rdma_cm_event *event); static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc); -static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl); /* XXX: really should move to a generic header sooner or later.. */ static inline void put_unaligned_le24(u32 val, u8 *p) @@ -1320,37 +1319,39 @@ out_destroy_queue_ib: * that caught the event. Since we hold the callout until the controller * deletion is completed, we'll deadlock if the controller deletion will * call rdma_destroy_id on this queue's cm_id. Thus, we claim ownership - * of destroying this queue before-hand, destroy the queue resources - * after the controller deletion completed with the exception of destroying - * the cm_id implicitely by returning a non-zero rc to the callout. + * of destroying this queue before-hand, destroy the queue resources, + * then queue the controller deletion which won't destroy this queue and + * we destroy the cm_id implicitely by returning a non-zero rc to the callout. */ static int nvme_rdma_device_unplug(struct nvme_rdma_queue *queue) { struct nvme_rdma_ctrl *ctrl = queue->ctrl; - int ret, ctrl_deleted = 0; + int ret; - /* First disable the queue so ctrl delete won't free it */ - if (!test_and_clear_bit(NVME_RDMA_Q_CONNECTED, &queue->flags)) - goto out; + /* Own the controller deletion */ + if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING)) + return 0; - /* delete the controller */ - ret = __nvme_rdma_del_ctrl(ctrl); - if (!ret) { - dev_warn(ctrl->ctrl.device, - "Got rdma device removal event, deleting ctrl\n"); - flush_work(&ctrl->delete_work); + dev_warn(ctrl->ctrl.device, + "Got rdma device removal event, deleting ctrl\n"); - /* Return non-zero so the cm_id will destroy implicitly */ - ctrl_deleted = 1; + /* Get rid of reconnect work if its running */ + cancel_delayed_work_sync(&ctrl->reconnect_work); + /* Disable the queue so ctrl delete won't free it */ + if (test_and_clear_bit(NVME_RDMA_Q_CONNECTED, &queue->flags)) { /* Free this queue ourselves */ - rdma_disconnect(queue->cm_id); - ib_drain_qp(queue->qp); + nvme_rdma_stop_queue(queue); nvme_rdma_destroy_queue_ib(queue); + + /* Return non-zero so the cm_id will destroy implicitly */ + ret = 1; } -out: - return ctrl_deleted; + /* Queue controller deletion */ + queue_work(nvme_rdma_wq, &ctrl->delete_work); + flush_work(&ctrl->delete_work); + return ret; } static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id, -- cgit From 2461a8dd38bea3cb5b1c1f0323794483292fb03f Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 24 Jul 2016 09:29:51 +0300 Subject: nvme-rdma: Remove duplicate call to nvme_remove_namespaces nvme_uninit_ctrl already does that for us. Note that we reordered nvme_rdma_shutdown_ctrl and nvme_uninit_ctrl, this is perfectly fine because we actually want ctrl uninit (aen, scan cancellation and namespaces removal) to happen before we shutdown the rdma resources. Also, centralize the deletion work and the dead controller removal work code duplication into __nvme_rdma_shutdown_ctrl that accepts a shutdown boolean. Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'drivers/nvme/host') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 3ffec3728abe..1279bc292253 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1660,15 +1660,20 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl) nvme_rdma_destroy_admin_queue(ctrl); } +static void __nvme_rdma_remove_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown) +{ + nvme_uninit_ctrl(&ctrl->ctrl); + if (shutdown) + nvme_rdma_shutdown_ctrl(ctrl); + nvme_put_ctrl(&ctrl->ctrl); +} + static void nvme_rdma_del_ctrl_work(struct work_struct *work) { struct nvme_rdma_ctrl *ctrl = container_of(work, struct nvme_rdma_ctrl, delete_work); - nvme_remove_namespaces(&ctrl->ctrl); - nvme_rdma_shutdown_ctrl(ctrl); - nvme_uninit_ctrl(&ctrl->ctrl); - nvme_put_ctrl(&ctrl->ctrl); + __nvme_rdma_remove_ctrl(ctrl, true); } static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl) @@ -1701,9 +1706,7 @@ static void nvme_rdma_remove_ctrl_work(struct work_struct *work) struct nvme_rdma_ctrl *ctrl = container_of(work, struct nvme_rdma_ctrl, delete_work); - nvme_remove_namespaces(&ctrl->ctrl); - nvme_uninit_ctrl(&ctrl->ctrl); - nvme_put_ctrl(&ctrl->ctrl); + __nvme_rdma_remove_ctrl(ctrl, false); } static void nvme_rdma_reset_ctrl_work(struct work_struct *work) -- cgit From a34ca17a9717fe607cd58285a1704cb6526cf561 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 24 Jul 2016 09:22:19 +0300 Subject: nvme-rdma: Free the I/O tags when we delete the controller If we wait until we free the controller (free_ctrl) we might lose our rdma device without any notification while we still have open resources (tags mrs and dma mappings). Instead, destroy the tags with their rdma resources once we delete the device and not when freeing it. Note that we don't do that in nvme_rdma_shutdown_ctrl because controller reset uses it as well and we want to give active I/O a chance to complete successfully. Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'drivers/nvme/host') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 1279bc292253..6378dc94aeaf 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -686,11 +686,6 @@ static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl) list_del(&ctrl->list); mutex_unlock(&nvme_rdma_ctrl_mutex); - if (ctrl->ctrl.tagset) { - blk_cleanup_queue(ctrl->ctrl.connect_q); - blk_mq_free_tag_set(&ctrl->tag_set); - nvme_rdma_dev_put(ctrl->device); - } kfree(ctrl->queues); nvmf_free_options(nctrl->opts); free_ctrl: @@ -1665,6 +1660,13 @@ static void __nvme_rdma_remove_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown) nvme_uninit_ctrl(&ctrl->ctrl); if (shutdown) nvme_rdma_shutdown_ctrl(ctrl); + + if (ctrl->ctrl.tagset) { + blk_cleanup_queue(ctrl->ctrl.connect_q); + blk_mq_free_tag_set(&ctrl->tag_set); + nvme_rdma_dev_put(ctrl->device); + } + nvme_put_ctrl(&ctrl->ctrl); } -- cgit From 45862ebcc4883b1b6bc0701cd15cb2b68b140c5d Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 24 Jul 2016 09:26:16 +0300 Subject: nvme-rdma: Make sure to shutdown the controller if we can Relying on ctrl state in nvme_rdma_shutdown_ctrl is wrong because it will never be NVME_CTRL_LIVE (delete_ctrl or reset_ctrl invoked it). Instead, check that the admin queue is connected. Note that it is safe because we can never see a copmeting thread trying to destroy the admin queue (reset or delete controller). Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme/host') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 6378dc94aeaf..f4b836869d2c 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1646,7 +1646,7 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl) nvme_rdma_free_io_queues(ctrl); } - if (ctrl->ctrl.state == NVME_CTRL_LIVE) + if (test_bit(NVME_RDMA_Q_CONNECTED, &ctrl->queues[0].flags)) nvme_shutdown_ctrl(&ctrl->ctrl); blk_mq_stop_hw_queues(ctrl->ctrl.admin_q); -- cgit From 3ef1b4b298d98ccb3cc895baf1b18f7f9d073bee Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 4 Aug 2016 13:46:19 +0300 Subject: nvme-rdma: start async event handler after reconnecting to a controller When we reset or reconnect to a controller, we are cancelling the async event handler so we can safely re-establish resources, but we need to remember to start it again when we successfully reconnect. Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/nvme/host') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index f4b836869d2c..e82434ab3a28 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -745,6 +745,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) if (ctrl->queue_count > 1) { nvme_start_queues(&ctrl->ctrl); nvme_queue_scan(&ctrl->ctrl); + nvme_queue_async_events(&ctrl->ctrl); } dev_info(ctrl->ctrl.device, "Successfully reconnected\n"); @@ -1747,6 +1748,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work) if (ctrl->queue_count > 1) { nvme_start_queues(&ctrl->ctrl); nvme_queue_scan(&ctrl->ctrl); + nvme_queue_async_events(&ctrl->ctrl); } return; -- cgit From e3266378bdbca82c2854fc612fa9a391eba1f173 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 4 Aug 2016 17:37:40 +0300 Subject: nvme-rdma: Remove unused includes Signed-off-by: Sagi Grimberg Reviewed-by: Steve Wise --- drivers/nvme/host/rdma.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/nvme/host') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index e82434ab3a28..8d2875b4c56d 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -12,13 +12,11 @@ * more details. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include #include #include #include #include #include -#include #include #include #include @@ -26,7 +24,6 @@ #include #include #include -#include #include #include -- cgit From c21377f8366c95440d533edbe47d070f662c62ef Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Thu, 11 Aug 2016 09:35:57 -0600 Subject: nvme: Suspend all queues before deletion When nvme_delete_queue fails in the first pass of the nvme_disable_io_queues() loop, we return early, failing to suspend all of the IO queues. Later, on the nvme_pci_disable path, this causes us to disable MSI without actually having freed all the IRQs, which triggers the BUG_ON in free_msi_irqs(), as show below. This patch refactors nvme_disable_io_queues to suspend all queues before start submitting delete queue commands. This way, we ensure that we have at least returned every IRQ before continuing with the removal path. [ 487.529200] kernel BUG at ../drivers/pci/msi.c:368! cpu 0x46: Vector: 700 (Program Check) at [c0000078c5b83650] pc: c000000000627a50: free_msi_irqs+0x90/0x200 lr: c000000000627a40: free_msi_irqs+0x80/0x200 sp: c0000078c5b838d0 msr: 9000000100029033 current = 0xc0000078c5b40000 paca = 0xc000000002bd7600 softe: 0 irq_happened: 0x01 pid = 1376, comm = kworker/70:1H kernel BUG at ../drivers/pci/msi.c:368! Linux version 4.7.0.mainline+ (root@iod76) (gcc version 5.3.1 20160413 (Ubuntu/IBM 5.3.1-14ubuntu2.1) ) #104 SMP Fri Jul 29 09:20:17 CDT 2016 enter ? for help [c0000078c5b83920] d0000000363b0cd8 nvme_dev_disable+0x208/0x4f0 [nvme] [c0000078c5b83a10] d0000000363b12a4 nvme_timeout+0xe4/0x250 [nvme] [c0000078c5b83ad0] c0000000005690e4 blk_mq_rq_timed_out+0x64/0x110 [c0000078c5b83b40] c00000000056c930 bt_for_each+0x160/0x170 [c0000078c5b83bb0] c00000000056d928 blk_mq_queue_tag_busy_iter+0x78/0x110 [c0000078c5b83c00] c0000000005675d8 blk_mq_timeout_work+0xd8/0x1b0 [c0000078c5b83c50] c0000000000e8cf0 process_one_work+0x1e0/0x590 [c0000078c5b83ce0] c0000000000e9148 worker_thread+0xa8/0x660 [c0000078c5b83d80] c0000000000f2090 kthread+0x110/0x130 [c0000078c5b83e30] c0000000000095f0 ret_from_kernel_thread+0x5c/0x6c Signed-off-by: Gabriel Krisman Bertazi Cc: Brian King Cc: Keith Busch Cc: linux-nvme@lists.infradead.org Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) (limited to 'drivers/nvme/host') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index d7c33f9361aa..8dcf5a960951 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1543,15 +1543,10 @@ static void nvme_disable_io_queues(struct nvme_dev *dev) reinit_completion(&dev->ioq_wait); retry: timeout = ADMIN_TIMEOUT; - for (; i > 0; i--) { - struct nvme_queue *nvmeq = dev->queues[i]; - - if (!pass) - nvme_suspend_queue(nvmeq); - if (nvme_delete_queue(nvmeq, opcode)) + for (; i > 0; i--, sent++) + if (nvme_delete_queue(dev->queues[i], opcode)) break; - ++sent; - } + while (sent--) { timeout = wait_for_completion_io_timeout(&dev->ioq_wait, timeout); if (timeout == 0) @@ -1693,11 +1688,12 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) nvme_stop_queues(&dev->ctrl); csts = readl(dev->bar + NVME_REG_CSTS); } + + for (i = dev->queue_count - 1; i > 0; i--) + nvme_suspend_queue(dev->queues[i]); + if (csts & NVME_CSTS_CFS || !(csts & NVME_CSTS_RDY)) { - for (i = dev->queue_count - 1; i >= 0; i--) { - struct nvme_queue *nvmeq = dev->queues[i]; - nvme_suspend_queue(nvmeq); - } + nvme_suspend_queue(dev->queues[0]); } else { nvme_disable_io_queues(dev); nvme_disable_admin_queue(dev, shutdown); -- cgit