summaryrefslogtreecommitdiff
path: root/drivers/nvme/host/rdma.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme/host/rdma.c')
-rw-r--r--drivers/nvme/host/rdma.c225
1 files changed, 136 insertions, 89 deletions
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 92a03ff5fb4d..03644ecf68d2 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -50,8 +50,8 @@
(NVME_AQ_DEPTH - NVME_RDMA_NR_AEN_COMMANDS)
struct nvme_rdma_device {
- struct ib_device *dev;
- struct ib_pd *pd;
+ struct ib_device *dev;
+ struct ib_pd *pd;
struct kref ref;
struct list_head entry;
};
@@ -79,8 +79,8 @@ struct nvme_rdma_request {
};
enum nvme_rdma_queue_flags {
- NVME_RDMA_Q_LIVE = 0,
- NVME_RDMA_Q_DELETING = 1,
+ NVME_RDMA_Q_ALLOCATED = 0,
+ NVME_RDMA_Q_LIVE = 1,
};
struct nvme_rdma_queue {
@@ -105,7 +105,6 @@ struct nvme_rdma_ctrl {
/* other member variables */
struct blk_mq_tag_set tag_set;
- struct work_struct delete_work;
struct work_struct err_work;
struct nvme_rdma_qe async_event_sqe;
@@ -274,6 +273,9 @@ static int nvme_rdma_reinit_request(void *data, struct request *rq)
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
int ret = 0;
+ if (WARN_ON_ONCE(!req->mr))
+ return 0;
+
ib_dereg_mr(req->mr);
req->mr = ib_alloc_mr(dev->pd, IB_MR_TYPE_MEM_REG,
@@ -434,11 +436,9 @@ out_err:
static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
{
- struct nvme_rdma_device *dev;
- struct ib_device *ibdev;
+ struct nvme_rdma_device *dev = queue->device;
+ struct ib_device *ibdev = dev->dev;
- dev = queue->device;
- ibdev = dev->dev;
rdma_destroy_qp(queue->cm_id);
ib_free_cq(queue->ib_cq);
@@ -544,11 +544,11 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
ret = nvme_rdma_wait_for_cm(queue);
if (ret) {
dev_info(ctrl->ctrl.device,
- "rdma_resolve_addr wait failed (%d).\n", ret);
+ "rdma connection establishment failed (%d)\n", ret);
goto out_destroy_cm_id;
}
- clear_bit(NVME_RDMA_Q_DELETING, &queue->flags);
+ set_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags);
return 0;
@@ -568,7 +568,7 @@ static void nvme_rdma_stop_queue(struct nvme_rdma_queue *queue)
static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue)
{
- if (test_and_set_bit(NVME_RDMA_Q_DELETING, &queue->flags))
+ if (!test_and_clear_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags))
return;
nvme_rdma_destroy_queue_ib(queue);
@@ -670,11 +670,10 @@ out_free_queues:
return ret;
}
-static void nvme_rdma_free_tagset(struct nvme_ctrl *nctrl, bool admin)
+static void nvme_rdma_free_tagset(struct nvme_ctrl *nctrl,
+ struct blk_mq_tag_set *set)
{
struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
- struct blk_mq_tag_set *set = admin ?
- &ctrl->admin_tag_set : &ctrl->tag_set;
blk_mq_free_tag_set(set);
nvme_rdma_dev_put(ctrl->device);
@@ -699,6 +698,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
set->driver_data = ctrl;
set->nr_hw_queues = 1;
set->timeout = ADMIN_TIMEOUT;
+ set->flags = BLK_MQ_F_NO_SCHED;
} else {
set = &ctrl->tag_set;
memset(set, 0, sizeof(*set));
@@ -744,7 +744,7 @@ static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl,
nvme_rdma_stop_queue(&ctrl->queues[0]);
if (remove) {
blk_cleanup_queue(ctrl->ctrl.admin_q);
- nvme_rdma_free_tagset(&ctrl->ctrl, true);
+ nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset);
}
nvme_rdma_free_queue(&ctrl->queues[0]);
}
@@ -774,8 +774,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
goto out_free_tagset;
}
} else {
- error = blk_mq_reinit_tagset(&ctrl->admin_tag_set,
- nvme_rdma_reinit_request);
+ error = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset);
if (error)
goto out_free_queue;
}
@@ -819,7 +818,7 @@ out_cleanup_queue:
blk_cleanup_queue(ctrl->ctrl.admin_q);
out_free_tagset:
if (new)
- nvme_rdma_free_tagset(&ctrl->ctrl, true);
+ nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset);
out_free_queue:
nvme_rdma_free_queue(&ctrl->queues[0]);
return error;
@@ -831,7 +830,7 @@ static void nvme_rdma_destroy_io_queues(struct nvme_rdma_ctrl *ctrl,
nvme_rdma_stop_io_queues(ctrl);
if (remove) {
blk_cleanup_queue(ctrl->ctrl.connect_q);
- nvme_rdma_free_tagset(&ctrl->ctrl, false);
+ nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.tagset);
}
nvme_rdma_free_io_queues(ctrl);
}
@@ -855,8 +854,7 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
goto out_free_tag_set;
}
} else {
- ret = blk_mq_reinit_tagset(&ctrl->tag_set,
- nvme_rdma_reinit_request);
+ ret = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.tagset);
if (ret)
goto out_free_io_queues;
@@ -875,7 +873,7 @@ out_cleanup_connect_q:
blk_cleanup_queue(ctrl->ctrl.connect_q);
out_free_tag_set:
if (new)
- nvme_rdma_free_tagset(&ctrl->ctrl, false);
+ nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.tagset);
out_free_io_queues:
nvme_rdma_free_io_queues(ctrl);
return ret;
@@ -914,7 +912,7 @@ static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
ctrl->ctrl.opts->reconnect_delay * HZ);
} else {
dev_info(ctrl->ctrl.device, "Removing controller...\n");
- queue_work(nvme_wq, &ctrl->delete_work);
+ nvme_delete_ctrl(&ctrl->ctrl);
}
}
@@ -927,10 +925,6 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
++ctrl->ctrl.nr_reconnects;
- if (ctrl->ctrl.queue_count > 1)
- nvme_rdma_destroy_io_queues(ctrl, false);
-
- nvme_rdma_destroy_admin_queue(ctrl, false);
ret = nvme_rdma_configure_admin_queue(ctrl, false);
if (ret)
goto requeue;
@@ -938,7 +932,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
if (ctrl->ctrl.queue_count > 1) {
ret = nvme_rdma_configure_io_queues(ctrl, false);
if (ret)
- goto requeue;
+ goto destroy_admin;
}
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
@@ -948,14 +942,17 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
return;
}
- ctrl->ctrl.nr_reconnects = 0;
-
nvme_start_ctrl(&ctrl->ctrl);
- dev_info(ctrl->ctrl.device, "Successfully reconnected\n");
+ dev_info(ctrl->ctrl.device, "Successfully reconnected (%d attempts)\n",
+ ctrl->ctrl.nr_reconnects);
+
+ ctrl->ctrl.nr_reconnects = 0;
return;
+destroy_admin:
+ nvme_rdma_destroy_admin_queue(ctrl, false);
requeue:
dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
ctrl->ctrl.nr_reconnects);
@@ -971,17 +968,15 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
if (ctrl->ctrl.queue_count > 1) {
nvme_stop_queues(&ctrl->ctrl);
- nvme_rdma_stop_io_queues(ctrl);
- }
- blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
- nvme_rdma_stop_queue(&ctrl->queues[0]);
-
- /* We must take care of fastfail/requeue all our inflight requests */
- if (ctrl->ctrl.queue_count > 1)
blk_mq_tagset_busy_iter(&ctrl->tag_set,
nvme_cancel_request, &ctrl->ctrl);
+ nvme_rdma_destroy_io_queues(ctrl, false);
+ }
+
+ blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
nvme_cancel_request, &ctrl->ctrl);
+ nvme_rdma_destroy_admin_queue(ctrl, false);
/*
* queues are not a live anymore, so restart the queues to fail fast
@@ -1057,7 +1052,7 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue,
if (!blk_rq_bytes(rq))
return;
- if (req->mr->need_inval) {
+ if (req->mr->need_inval && test_bit(NVME_RDMA_Q_LIVE, &req->queue->flags)) {
res = nvme_rdma_inv_rkey(queue, req);
if (unlikely(res < 0)) {
dev_err(ctrl->ctrl.device,
@@ -1582,6 +1577,10 @@ nvme_rdma_timeout(struct request *rq, bool reserved)
{
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
+ dev_warn(req->queue->ctrl->ctrl.device,
+ "I/O %d QID %d timeout, reset controller\n",
+ rq->tag, nvme_rdma_queue_idx(req->queue));
+
/* queue error recovery */
nvme_rdma_error_recovery(req->queue->ctrl);
@@ -1756,50 +1755,9 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
nvme_rdma_destroy_admin_queue(ctrl, shutdown);
}
-static void nvme_rdma_remove_ctrl(struct nvme_rdma_ctrl *ctrl)
+static void nvme_rdma_delete_ctrl(struct nvme_ctrl *ctrl)
{
- nvme_remove_namespaces(&ctrl->ctrl);
- nvme_rdma_shutdown_ctrl(ctrl, true);
- nvme_uninit_ctrl(&ctrl->ctrl);
- nvme_put_ctrl(&ctrl->ctrl);
-}
-
-static void nvme_rdma_del_ctrl_work(struct work_struct *work)
-{
- struct nvme_rdma_ctrl *ctrl = container_of(work,
- struct nvme_rdma_ctrl, delete_work);
-
- nvme_stop_ctrl(&ctrl->ctrl);
- nvme_rdma_remove_ctrl(ctrl);
-}
-
-static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl)
-{
- if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
- return -EBUSY;
-
- if (!queue_work(nvme_wq, &ctrl->delete_work))
- return -EBUSY;
-
- return 0;
-}
-
-static int nvme_rdma_del_ctrl(struct nvme_ctrl *nctrl)
-{
- struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
- int ret = 0;
-
- /*
- * Keep a reference until all work is flushed since
- * __nvme_rdma_del_ctrl can free the ctrl mem
- */
- if (!kref_get_unless_zero(&ctrl->ctrl.kref))
- return -EBUSY;
- ret = __nvme_rdma_del_ctrl(ctrl);
- if (!ret)
- flush_work(&ctrl->delete_work);
- nvme_put_ctrl(&ctrl->ctrl);
- return ret;
+ nvme_rdma_shutdown_ctrl(to_rdma_ctrl(ctrl), true);
}
static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
@@ -1823,7 +1781,11 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
}
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
- WARN_ON_ONCE(!changed);
+ if (!changed) {
+ /* state change failure is ok if we're in DELETING state */
+ WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING);
+ return;
+ }
nvme_start_ctrl(&ctrl->ctrl);
@@ -1831,7 +1793,10 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
out_fail:
dev_warn(ctrl->ctrl.device, "Removing after reset failure\n");
- nvme_rdma_remove_ctrl(ctrl);
+ nvme_remove_namespaces(&ctrl->ctrl);
+ nvme_rdma_shutdown_ctrl(ctrl, true);
+ nvme_uninit_ctrl(&ctrl->ctrl);
+ nvme_put_ctrl(&ctrl->ctrl);
}
static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
@@ -1843,10 +1808,88 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
.reg_write32 = nvmf_reg_write32,
.free_ctrl = nvme_rdma_free_ctrl,
.submit_async_event = nvme_rdma_submit_async_event,
- .delete_ctrl = nvme_rdma_del_ctrl,
+ .delete_ctrl = nvme_rdma_delete_ctrl,
.get_address = nvmf_get_address,
+ .reinit_request = nvme_rdma_reinit_request,
};
+static inline bool
+__nvme_rdma_options_match(struct nvme_rdma_ctrl *ctrl,
+ struct nvmf_ctrl_options *opts)
+{
+ char *stdport = __stringify(NVME_RDMA_IP_PORT);
+
+
+ if (!nvmf_ctlr_matches_baseopts(&ctrl->ctrl, opts) ||
+ strcmp(opts->traddr, ctrl->ctrl.opts->traddr))
+ return false;
+
+ if (opts->mask & NVMF_OPT_TRSVCID &&
+ ctrl->ctrl.opts->mask & NVMF_OPT_TRSVCID) {
+ if (strcmp(opts->trsvcid, ctrl->ctrl.opts->trsvcid))
+ return false;
+ } else if (opts->mask & NVMF_OPT_TRSVCID) {
+ if (strcmp(opts->trsvcid, stdport))
+ return false;
+ } else if (ctrl->ctrl.opts->mask & NVMF_OPT_TRSVCID) {
+ if (strcmp(stdport, ctrl->ctrl.opts->trsvcid))
+ return false;
+ }
+ /* else, it's a match as both have stdport. Fall to next checks */
+
+ /*
+ * checking the local address is rough. In most cases, one
+ * is not specified and the host port is selected by the stack.
+ *
+ * Assume no match if:
+ * local address is specified and address is not the same
+ * local address is not specified but remote is, or vice versa
+ * (admin using specific host_traddr when it matters).
+ */
+ if (opts->mask & NVMF_OPT_HOST_TRADDR &&
+ ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR) {
+ if (strcmp(opts->host_traddr, ctrl->ctrl.opts->host_traddr))
+ return false;
+ } else if (opts->mask & NVMF_OPT_HOST_TRADDR ||
+ ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR)
+ return false;
+ /*
+ * if neither controller had an host port specified, assume it's
+ * a match as everything else matched.
+ */
+
+ return true;
+}
+
+/*
+ * Fails a connection request if it matches an existing controller
+ * (association) with the same tuple:
+ * <Host NQN, Host ID, local address, remote address, remote port, SUBSYS NQN>
+ *
+ * if local address is not specified in the request, it will match an
+ * existing controller with all the other parameters the same and no
+ * local port address specified as well.
+ *
+ * The ports don't need to be compared as they are intrinsically
+ * already matched by the port pointers supplied.
+ */
+static bool
+nvme_rdma_existing_controller(struct nvmf_ctrl_options *opts)
+{
+ struct nvme_rdma_ctrl *ctrl;
+ bool found = false;
+
+ mutex_lock(&nvme_rdma_ctrl_mutex);
+ list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) {
+ found = __nvme_rdma_options_match(ctrl, opts);
+ if (found)
+ break;
+ }
+ mutex_unlock(&nvme_rdma_ctrl_mutex);
+
+ return found;
+}
+
static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
struct nvmf_ctrl_options *opts)
{
@@ -1883,6 +1926,11 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
}
}
+ if (!opts->duplicate_connect && nvme_rdma_existing_controller(opts)) {
+ ret = -EALREADY;
+ goto out_free_ctrl;
+ }
+
ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_rdma_ctrl_ops,
0 /* no quirks, we're perfect! */);
if (ret)
@@ -1891,7 +1939,6 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
INIT_DELAYED_WORK(&ctrl->reconnect_work,
nvme_rdma_reconnect_ctrl_work);
INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work);
- INIT_WORK(&ctrl->delete_work, nvme_rdma_del_ctrl_work);
INIT_WORK(&ctrl->ctrl.reset_work, nvme_rdma_reset_ctrl_work);
ctrl->ctrl.queue_count = opts->nr_io_queues + 1; /* +1 for admin queue */
@@ -1950,7 +1997,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs\n",
ctrl->ctrl.opts->subsysnqn, &ctrl->addr);
- kref_get(&ctrl->ctrl.kref);
+ nvme_get_ctrl(&ctrl->ctrl);
mutex_lock(&nvme_rdma_ctrl_mutex);
list_add_tail(&ctrl->list, &nvme_rdma_ctrl_list);
@@ -1995,7 +2042,7 @@ static void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data)
dev_info(ctrl->ctrl.device,
"Removing ctrl: NQN \"%s\", addr %pISp\n",
ctrl->ctrl.opts->subsysnqn, &ctrl->addr);
- __nvme_rdma_del_ctrl(ctrl);
+ nvme_delete_ctrl(&ctrl->ctrl);
}
mutex_unlock(&nvme_rdma_ctrl_mutex);