nvme: make fabrics command run on a separate request queue

We have a fundamental issue that fabric commands use the admin_q. The reason is, that admin-connect, register reads and writes and admin commands cannot be guaranteed ordering while we are running controller resets. For example, when we reset a controller we perform: 1. disable the controller 2. teardown the admin queue 3. re-establish the admin queue 4. enable the controller In order to perform (3), we need to unquiesce the admin queue, however we may have some admin commands that are already pending on the quiesced admin_q and will immediate execute when we unquiesce it before we execute (4). The host must not send admin commands to the controller before enabling the controller. To fix this, we have the fabric commands (admin connect and property get/set, but not I/O queue connect) use a separate fabrics_q and make sure to quiesce the admin_q before we disable the controller, and unquiesce it only after we enable the controller. This fixes the error prints from nvmet in a controller reset storm test: kernel: nvmet: got cmd 6 while CC.EN == 0 on qid = 0 Which indicate that the host is sending an admin command when the controller is not enabled. Reviewed-by: James Smart <james.smart@broadcom.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
author: Sagi Grimberg <sagi@grimberg.me> 2019-08-02 19:33:59 -0700
committer: Sagi Grimberg <sagi@grimberg.me> 2019-08-29 12:55:03 -0700
commit: e7832cb48a654cd12b2bc9181b2f0ad49d526ac6 (patch)
tree: 465c951b26754e06326e378aee3309dea3529911 /drivers/nvme/host/rdma.c
parent: d38e9f04ebf667d9cb8185b45bff747485f1d3e9 (diff)
1 files changed, 17 insertions, 2 deletions
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 5143e2a5d54c..0ef05a75c428 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -751,6 +751,7 @@ static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl,
 {
 	if (remove) {
 		blk_cleanup_queue(ctrl->ctrl.admin_q);
+		blk_cleanup_queue(ctrl->ctrl.fabrics_q);
 		blk_mq_free_tag_set(ctrl->ctrl.admin_tagset);
 	}
 	if (ctrl->async_event_sqe.data) {
@@ -792,10 +793,16 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
 			goto out_free_async_qe;
 		}
 
+		ctrl->ctrl.fabrics_q = blk_mq_init_queue(&ctrl->admin_tag_set);
+		if (IS_ERR(ctrl->ctrl.fabrics_q)) {
+			error = PTR_ERR(ctrl->ctrl.fabrics_q);
+			goto out_free_tagset;
+		}
+
 		ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set);
 		if (IS_ERR(ctrl->ctrl.admin_q)) {
 			error = PTR_ERR(ctrl->ctrl.admin_q);
-			goto out_free_tagset;
+			goto out_cleanup_fabrics_q;
 		}
 	}
 
@@ -810,6 +817,8 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
 	ctrl->ctrl.max_hw_sectors =
 		(ctrl->max_fr_pages - 1) << (ilog2(SZ_4K) - 9);
 
+	blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
+
 	error = nvme_init_identify(&ctrl->ctrl);
 	if (error)
 		goto out_stop_queue;
@@ -821,6 +830,9 @@ out_stop_queue:
 out_cleanup_queue:
 	if (new)
 		blk_cleanup_queue(ctrl->ctrl.admin_q);
+out_cleanup_fabrics_q:
+	if (new)
+		blk_cleanup_queue(ctrl->ctrl.fabrics_q);
 out_free_tagset:
 	if (new)
 		blk_mq_free_tag_set(ctrl->ctrl.admin_tagset);
@@ -895,7 +907,8 @@ static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl,
 			nvme_cancel_request, &ctrl->ctrl);
 		blk_mq_tagset_wait_completed_request(ctrl->ctrl.admin_tagset);
 	}
-	blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
+	if (remove)
+		blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
 	nvme_rdma_destroy_admin_queue(ctrl, remove);
 }
 
@@ -1046,6 +1059,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
 	nvme_rdma_teardown_io_queues(ctrl, false);
 	nvme_start_queues(&ctrl->ctrl);
 	nvme_rdma_teardown_admin_queue(ctrl, false);
+	blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
 
 	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
 		/* state change failure is ok if we're in DELETING state */
@@ -1858,6 +1872,7 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
 	cancel_delayed_work_sync(&ctrl->reconnect_work);
 
 	nvme_rdma_teardown_io_queues(ctrl, shutdown);
+	blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
 	if (shutdown)
 		nvme_shutdown_ctrl(&ctrl->ctrl);
 	else
author	Sagi Grimberg <sagi@grimberg.me>	2019-08-02 19:33:59 -0700
committer	Sagi Grimberg <sagi@grimberg.me>	2019-08-29 12:55:03 -0700
commit	e7832cb48a654cd12b2bc9181b2f0ad49d526ac6 (patch)
tree	465c951b26754e06326e378aee3309dea3529911 /drivers/nvme/host/rdma.c
parent	d38e9f04ebf667d9cb8185b45bff747485f1d3e9 (diff)