summaryrefslogtreecommitdiff
path: root/drivers/nvme/target/rdma.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme/target/rdma.c')
-rw-r--r--drivers/nvme/target/rdma.c167
1 files changed, 93 insertions, 74 deletions
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 3a0f2c170f4c..2a4536ef6184 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -16,7 +16,7 @@
#include <linux/string.h>
#include <linux/wait.h>
#include <linux/inet.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
@@ -39,6 +39,8 @@
#define NVMET_RDMA_BACKLOG 128
+#define NVMET_RDMA_DISCRETE_RSP_TAG -1
+
struct nvmet_rdma_srq;
struct nvmet_rdma_cmd {
@@ -53,7 +55,6 @@ struct nvmet_rdma_cmd {
enum {
NVMET_RDMA_REQ_INLINE_DATA = (1 << 0),
- NVMET_RDMA_REQ_INVALIDATE_RKEY = (1 << 1),
};
struct nvmet_rdma_rsp {
@@ -76,7 +77,7 @@ struct nvmet_rdma_rsp {
u32 invalidate_rkey;
struct list_head wait_list;
- struct list_head free_list;
+ int tag;
};
enum nvmet_rdma_queue_state {
@@ -99,8 +100,7 @@ struct nvmet_rdma_queue {
struct nvmet_sq nvme_sq;
struct nvmet_rdma_rsp *rsps;
- struct list_head free_rsps;
- spinlock_t rsps_lock;
+ struct sbitmap rsp_tags;
struct nvmet_rdma_cmd *cmds;
struct work_struct release_work;
@@ -173,7 +173,8 @@ static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue);
static void nvmet_rdma_free_rsp(struct nvmet_rdma_device *ndev,
struct nvmet_rdma_rsp *r);
static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev,
- struct nvmet_rdma_rsp *r);
+ struct nvmet_rdma_rsp *r,
+ int tag);
static const struct nvmet_fabrics_ops nvmet_rdma_ops;
@@ -211,15 +212,12 @@ static inline bool nvmet_rdma_need_data_out(struct nvmet_rdma_rsp *rsp)
static inline struct nvmet_rdma_rsp *
nvmet_rdma_get_rsp(struct nvmet_rdma_queue *queue)
{
- struct nvmet_rdma_rsp *rsp;
- unsigned long flags;
+ struct nvmet_rdma_rsp *rsp = NULL;
+ int tag;
- spin_lock_irqsave(&queue->rsps_lock, flags);
- rsp = list_first_entry_or_null(&queue->free_rsps,
- struct nvmet_rdma_rsp, free_list);
- if (likely(rsp))
- list_del(&rsp->free_list);
- spin_unlock_irqrestore(&queue->rsps_lock, flags);
+ tag = sbitmap_get(&queue->rsp_tags);
+ if (tag >= 0)
+ rsp = &queue->rsps[tag];
if (unlikely(!rsp)) {
int ret;
@@ -227,13 +225,12 @@ nvmet_rdma_get_rsp(struct nvmet_rdma_queue *queue)
rsp = kzalloc(sizeof(*rsp), GFP_KERNEL);
if (unlikely(!rsp))
return NULL;
- ret = nvmet_rdma_alloc_rsp(queue->dev, rsp);
+ ret = nvmet_rdma_alloc_rsp(queue->dev, rsp,
+ NVMET_RDMA_DISCRETE_RSP_TAG);
if (unlikely(ret)) {
kfree(rsp);
return NULL;
}
-
- rsp->allocated = true;
}
return rsp;
@@ -242,17 +239,13 @@ nvmet_rdma_get_rsp(struct nvmet_rdma_queue *queue)
static inline void
nvmet_rdma_put_rsp(struct nvmet_rdma_rsp *rsp)
{
- unsigned long flags;
-
- if (unlikely(rsp->allocated)) {
+ if (unlikely(rsp->tag == NVMET_RDMA_DISCRETE_RSP_TAG)) {
nvmet_rdma_free_rsp(rsp->queue->dev, rsp);
kfree(rsp);
return;
}
- spin_lock_irqsave(&rsp->queue->rsps_lock, flags);
- list_add_tail(&rsp->free_list, &rsp->queue->free_rsps);
- spin_unlock_irqrestore(&rsp->queue->rsps_lock, flags);
+ sbitmap_clear_bit(&rsp->queue->rsp_tags, rsp->tag);
}
static void nvmet_rdma_free_inline_pages(struct nvmet_rdma_device *ndev,
@@ -405,7 +398,7 @@ static void nvmet_rdma_free_cmds(struct nvmet_rdma_device *ndev,
}
static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev,
- struct nvmet_rdma_rsp *r)
+ struct nvmet_rdma_rsp *r, int tag)
{
/* NVMe CQE / RDMA SEND */
r->req.cqe = kmalloc(sizeof(*r->req.cqe), GFP_KERNEL);
@@ -433,6 +426,7 @@ static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev,
r->read_cqe.done = nvmet_rdma_read_data_done;
/* Data Out / RDMA WRITE */
r->write_cqe.done = nvmet_rdma_write_data_done;
+ r->tag = tag;
return 0;
@@ -455,33 +449,33 @@ nvmet_rdma_alloc_rsps(struct nvmet_rdma_queue *queue)
{
struct nvmet_rdma_device *ndev = queue->dev;
int nr_rsps = queue->recv_queue_size * 2;
- int ret = -EINVAL, i;
+ int ret = -ENOMEM, i;
+
+ if (sbitmap_init_node(&queue->rsp_tags, nr_rsps, -1, GFP_KERNEL,
+ NUMA_NO_NODE, false, true))
+ goto out;
queue->rsps = kcalloc(nr_rsps, sizeof(struct nvmet_rdma_rsp),
GFP_KERNEL);
if (!queue->rsps)
- goto out;
+ goto out_free_sbitmap;
for (i = 0; i < nr_rsps; i++) {
struct nvmet_rdma_rsp *rsp = &queue->rsps[i];
- ret = nvmet_rdma_alloc_rsp(ndev, rsp);
+ ret = nvmet_rdma_alloc_rsp(ndev, rsp, i);
if (ret)
goto out_free;
-
- list_add_tail(&rsp->free_list, &queue->free_rsps);
}
return 0;
out_free:
- while (--i >= 0) {
- struct nvmet_rdma_rsp *rsp = &queue->rsps[i];
-
- list_del(&rsp->free_list);
- nvmet_rdma_free_rsp(ndev, rsp);
- }
+ while (--i >= 0)
+ nvmet_rdma_free_rsp(ndev, &queue->rsps[i]);
kfree(queue->rsps);
+out_free_sbitmap:
+ sbitmap_free(&queue->rsp_tags);
out:
return ret;
}
@@ -491,13 +485,10 @@ static void nvmet_rdma_free_rsps(struct nvmet_rdma_queue *queue)
struct nvmet_rdma_device *ndev = queue->dev;
int i, nr_rsps = queue->recv_queue_size * 2;
- for (i = 0; i < nr_rsps; i++) {
- struct nvmet_rdma_rsp *rsp = &queue->rsps[i];
-
- list_del(&rsp->free_list);
- nvmet_rdma_free_rsp(ndev, rsp);
- }
+ for (i = 0; i < nr_rsps; i++)
+ nvmet_rdma_free_rsp(ndev, &queue->rsps[i]);
kfree(queue->rsps);
+ sbitmap_free(&queue->rsp_tags);
}
static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev,
@@ -587,8 +578,8 @@ static void nvmet_rdma_set_sig_domain(struct blk_integrity *bi,
if (control & NVME_RW_PRINFO_PRCHK_REF)
domain->sig.dif.ref_remap = true;
- domain->sig.dif.app_tag = le16_to_cpu(cmd->rw.apptag);
- domain->sig.dif.apptag_check_mask = le16_to_cpu(cmd->rw.appmask);
+ domain->sig.dif.app_tag = le16_to_cpu(cmd->rw.lbat);
+ domain->sig.dif.apptag_check_mask = le16_to_cpu(cmd->rw.lbatm);
domain->sig.dif.app_escape = true;
if (pi_type == NVME_NS_DPS_PI_TYPE3)
domain->sig.dif.ref_escape = true;
@@ -722,7 +713,7 @@ static void nvmet_rdma_queue_response(struct nvmet_req *req)
struct rdma_cm_id *cm_id = rsp->queue->cm_id;
struct ib_send_wr *first_wr;
- if (rsp->flags & NVMET_RDMA_REQ_INVALIDATE_RKEY) {
+ if (rsp->invalidate_rkey) {
rsp->send_wr.opcode = IB_WR_SEND_WITH_INV;
rsp->send_wr.ex.invalidate_rkey = rsp->invalidate_rkey;
} else {
@@ -861,12 +852,12 @@ static u16 nvmet_rdma_map_sgl_inline(struct nvmet_rdma_rsp *rsp)
if (!nvme_is_write(rsp->req.cmd)) {
rsp->req.error_loc =
offsetof(struct nvme_common_command, opcode);
- return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
}
if (off + len > rsp->queue->dev->inline_data_size) {
pr_err("invalid inline data offset!\n");
- return NVME_SC_SGL_INVALID_OFFSET | NVME_SC_DNR;
+ return NVME_SC_SGL_INVALID_OFFSET | NVME_STATUS_DNR;
}
/* no data command? */
@@ -905,10 +896,8 @@ static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp,
goto error_out;
rsp->n_rdma += ret;
- if (invalidate) {
+ if (invalidate)
rsp->invalidate_rkey = key;
- rsp->flags |= NVMET_RDMA_REQ_INVALIDATE_RKEY;
- }
return 0;
@@ -930,7 +919,7 @@ static u16 nvmet_rdma_map_sgl(struct nvmet_rdma_rsp *rsp)
pr_err("invalid SGL subtype: %#x\n", sgl->type);
rsp->req.error_loc =
offsetof(struct nvme_common_command, dptr);
- return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
}
case NVME_KEY_SGL_FMT_DATA_DESC:
switch (sgl->type & 0xf) {
@@ -942,12 +931,12 @@ static u16 nvmet_rdma_map_sgl(struct nvmet_rdma_rsp *rsp)
pr_err("invalid SGL subtype: %#x\n", sgl->type);
rsp->req.error_loc =
offsetof(struct nvme_common_command, dptr);
- return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
}
default:
pr_err("invalid SGL type: %#x\n", sgl->type);
rsp->req.error_loc = offsetof(struct nvme_common_command, dptr);
- return NVME_SC_SGL_INVALID_TYPE | NVME_SC_DNR;
+ return NVME_SC_SGL_INVALID_TYPE | NVME_STATUS_DNR;
}
}
@@ -1007,6 +996,27 @@ out_err:
nvmet_req_complete(&cmd->req, status);
}
+static bool nvmet_rdma_recv_not_live(struct nvmet_rdma_queue *queue,
+ struct nvmet_rdma_rsp *rsp)
+{
+ unsigned long flags;
+ bool ret = true;
+
+ spin_lock_irqsave(&queue->state_lock, flags);
+ /*
+ * recheck queue state is not live to prevent a race condition
+ * with RDMA_CM_EVENT_ESTABLISHED handler.
+ */
+ if (queue->state == NVMET_RDMA_Q_LIVE)
+ ret = false;
+ else if (queue->state == NVMET_RDMA_Q_CONNECTING)
+ list_add_tail(&rsp->wait_list, &queue->rsp_wait_list);
+ else
+ nvmet_rdma_put_rsp(rsp);
+ spin_unlock_irqrestore(&queue->state_lock, flags);
+ return ret;
+}
+
static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct nvmet_rdma_cmd *cmd =
@@ -1047,18 +1057,11 @@ static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
rsp->req.cmd = cmd->nvme_cmd;
rsp->req.port = queue->port;
rsp->n_rdma = 0;
+ rsp->invalidate_rkey = 0;
- if (unlikely(queue->state != NVMET_RDMA_Q_LIVE)) {
- unsigned long flags;
-
- spin_lock_irqsave(&queue->state_lock, flags);
- if (queue->state == NVMET_RDMA_Q_CONNECTING)
- list_add_tail(&rsp->wait_list, &queue->rsp_wait_list);
- else
- nvmet_rdma_put_rsp(rsp);
- spin_unlock_irqrestore(&queue->state_lock, flags);
+ if (unlikely(queue->state != NVMET_RDMA_Q_LIVE) &&
+ nvmet_rdma_recv_not_live(queue, rsp))
return;
- }
nvmet_rdma_handle_command(queue, rsp);
}
@@ -1457,8 +1460,6 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev,
INIT_LIST_HEAD(&queue->rsp_wait_list);
INIT_LIST_HEAD(&queue->rsp_wr_wait_list);
spin_lock_init(&queue->rsp_wr_wait_lock);
- INIT_LIST_HEAD(&queue->free_rsps);
- spin_lock_init(&queue->rsps_lock);
INIT_LIST_HEAD(&queue->queue_list);
queue->idx = ida_alloc(&nvmet_rdma_queue_ida, GFP_KERNEL);
@@ -1816,18 +1817,14 @@ static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id,
static void nvmet_rdma_delete_ctrl(struct nvmet_ctrl *ctrl)
{
- struct nvmet_rdma_queue *queue;
+ struct nvmet_rdma_queue *queue, *n;
-restart:
mutex_lock(&nvmet_rdma_queue_mutex);
- list_for_each_entry(queue, &nvmet_rdma_queue_list, queue_list) {
- if (queue->nvme_sq.ctrl == ctrl) {
- list_del_init(&queue->queue_list);
- mutex_unlock(&nvmet_rdma_queue_mutex);
-
- __nvmet_rdma_queue_disconnect(queue);
- goto restart;
- }
+ list_for_each_entry_safe(queue, n, &nvmet_rdma_queue_list, queue_list) {
+ if (queue->nvme_sq.ctrl != ctrl)
+ continue;
+ list_del_init(&queue->queue_list);
+ __nvmet_rdma_queue_disconnect(queue);
}
mutex_unlock(&nvmet_rdma_queue_mutex);
}
@@ -1956,6 +1953,14 @@ static int nvmet_rdma_add_port(struct nvmet_port *nport)
nport->inline_data_size = NVMET_RDMA_MAX_INLINE_DATA_SIZE;
}
+ if (nport->max_queue_size < 0) {
+ nport->max_queue_size = NVME_RDMA_DEFAULT_QUEUE_SIZE;
+ } else if (nport->max_queue_size > NVME_RDMA_MAX_QUEUE_SIZE) {
+ pr_warn("max_queue_size %u is too large, reducing to %u\n",
+ nport->max_queue_size, NVME_RDMA_MAX_QUEUE_SIZE);
+ nport->max_queue_size = NVME_RDMA_MAX_QUEUE_SIZE;
+ }
+
ret = inet_pton_with_scope(&init_net, af, nport->disc_addr.traddr,
nport->disc_addr.trsvcid, &port->addr);
if (ret) {
@@ -2006,6 +2011,17 @@ static void nvmet_rdma_disc_port_addr(struct nvmet_req *req,
}
}
+static ssize_t nvmet_rdma_host_port_addr(struct nvmet_ctrl *ctrl,
+ char *traddr, size_t traddr_len)
+{
+ struct nvmet_sq *nvme_sq = ctrl->sqs[0];
+ struct nvmet_rdma_queue *queue =
+ container_of(nvme_sq, struct nvmet_rdma_queue, nvme_sq);
+
+ return snprintf(traddr, traddr_len, "%pISc",
+ (struct sockaddr *)&queue->cm_id->route.addr.dst_addr);
+}
+
static u8 nvmet_rdma_get_mdts(const struct nvmet_ctrl *ctrl)
{
if (ctrl->pi_support)
@@ -2015,6 +2031,8 @@ static u8 nvmet_rdma_get_mdts(const struct nvmet_ctrl *ctrl)
static u16 nvmet_rdma_get_max_queue_size(const struct nvmet_ctrl *ctrl)
{
+ if (ctrl->pi_support)
+ return NVME_RDMA_MAX_METADATA_QUEUE_SIZE;
return NVME_RDMA_MAX_QUEUE_SIZE;
}
@@ -2028,6 +2046,7 @@ static const struct nvmet_fabrics_ops nvmet_rdma_ops = {
.queue_response = nvmet_rdma_queue_response,
.delete_ctrl = nvmet_rdma_delete_ctrl,
.disc_traddr = nvmet_rdma_disc_port_addr,
+ .host_traddr = nvmet_rdma_host_port_addr,
.get_mdts = nvmet_rdma_get_mdts,
.get_max_queue_size = nvmet_rdma_get_max_queue_size,
};