diff options
Diffstat (limited to 'drivers/infiniband/ulp/rtrs/rtrs-srv.c')
| -rw-r--r-- | drivers/infiniband/ulp/rtrs/rtrs-srv.c | 1386 |
1 files changed, 777 insertions, 609 deletions
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index 0d9241f5d9e6..9ecc6343455d 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -11,11 +11,12 @@ #define pr_fmt(fmt) KBUILD_MODNAME " L" __stringify(__LINE__) ": " fmt #include <linux/module.h> -#include <linux/mempool.h> #include "rtrs-srv.h" #include "rtrs-log.h" #include <rdma/ib_cm.h> +#include <rdma/ib_verbs.h> +#include "rtrs-srv-trace.h" MODULE_DESCRIPTION("RDMA Transport Server"); MODULE_LICENSE("GPL"); @@ -25,12 +26,14 @@ MODULE_LICENSE("GPL"); #define DEFAULT_SESS_QUEUE_DEPTH 512 #define MAX_HDR_SIZE PAGE_SIZE -/* We guarantee to serve 10 paths at least */ -#define CHUNK_POOL_SZ 10 - -static struct rtrs_rdma_dev_pd dev_pd; -static mempool_t *chunk_pool; -struct class *rtrs_dev_class; +static const struct rtrs_rdma_dev_pd_ops dev_pd_ops; +static struct rtrs_rdma_dev_pd dev_pd = { + .ops = &dev_pd_ops +}; +const struct class rtrs_dev_class = { + .name = "rtrs-server", +}; +static struct rtrs_srv_ib_ctx ib_ctx; static int __read_mostly max_chunk_size = DEFAULT_MAX_CHUNK_SIZE; static int __read_mostly sess_queue_depth = DEFAULT_SESS_QUEUE_DEPTH; @@ -60,79 +63,39 @@ static inline struct rtrs_srv_con *to_srv_con(struct rtrs_con *c) return container_of(c, struct rtrs_srv_con, c); } -static inline struct rtrs_srv_sess *to_srv_sess(struct rtrs_sess *s) -{ - return container_of(s, struct rtrs_srv_sess, s); -} - -static bool __rtrs_srv_change_state(struct rtrs_srv_sess *sess, - enum rtrs_srv_state new_state) +static bool rtrs_srv_change_state(struct rtrs_srv_path *srv_path, + enum rtrs_srv_state new_state) { enum rtrs_srv_state old_state; bool changed = false; + unsigned long flags; - lockdep_assert_held(&sess->state_lock); - old_state = sess->state; + spin_lock_irqsave(&srv_path->state_lock, flags); + old_state = srv_path->state; switch (new_state) { case RTRS_SRV_CONNECTED: - switch (old_state) { - case RTRS_SRV_CONNECTING: + if (old_state == RTRS_SRV_CONNECTING) changed = true; - fallthrough; - default: - break; - } break; case RTRS_SRV_CLOSING: - switch (old_state) { - case RTRS_SRV_CONNECTING: - case RTRS_SRV_CONNECTED: + if (old_state == RTRS_SRV_CONNECTING || + old_state == RTRS_SRV_CONNECTED) changed = true; - fallthrough; - default: - break; - } break; case RTRS_SRV_CLOSED: - switch (old_state) { - case RTRS_SRV_CLOSING: + if (old_state == RTRS_SRV_CLOSING) changed = true; - fallthrough; - default: - break; - } break; default: break; } if (changed) - sess->state = new_state; - - return changed; -} - -static bool rtrs_srv_change_state_get_old(struct rtrs_srv_sess *sess, - enum rtrs_srv_state new_state, - enum rtrs_srv_state *old_state) -{ - bool changed; - - spin_lock_irq(&sess->state_lock); - *old_state = sess->state; - changed = __rtrs_srv_change_state(sess, new_state); - spin_unlock_irq(&sess->state_lock); + srv_path->state = new_state; + spin_unlock_irqrestore(&srv_path->state_lock, flags); return changed; } -static bool rtrs_srv_change_state(struct rtrs_srv_sess *sess, - enum rtrs_srv_state new_state) -{ - enum rtrs_srv_state old_state; - - return rtrs_srv_change_state_get_old(sess, new_state, &old_state); -} - static void free_id(struct rtrs_srv_op *id) { if (!id) @@ -140,17 +103,16 @@ static void free_id(struct rtrs_srv_op *id) kfree(id); } -static void rtrs_srv_free_ops_ids(struct rtrs_srv_sess *sess) +static void rtrs_srv_free_ops_ids(struct rtrs_srv_path *srv_path) { - struct rtrs_srv *srv = sess->srv; + struct rtrs_srv_sess *srv = srv_path->srv; int i; - WARN_ON(atomic_read(&sess->ids_inflight)); - if (sess->ops_ids) { + if (srv_path->ops_ids) { for (i = 0; i < srv->queue_depth; i++) - free_id(sess->ops_ids[i]); - kfree(sess->ops_ids); - sess->ops_ids = NULL; + free_id(srv_path->ops_ids[i]); + kfree(srv_path->ops_ids); + srv_path->ops_ids = NULL; } } @@ -160,15 +122,26 @@ static struct ib_cqe io_comp_cqe = { .done = rtrs_srv_rdma_done }; -static int rtrs_srv_alloc_ops_ids(struct rtrs_srv_sess *sess) +static inline void rtrs_srv_inflight_ref_release(struct percpu_ref *ref) { - struct rtrs_srv *srv = sess->srv; + struct rtrs_srv_path *srv_path = container_of(ref, + struct rtrs_srv_path, + ids_inflight_ref); + + percpu_ref_exit(&srv_path->ids_inflight_ref); + complete(&srv_path->complete_done); +} + +static int rtrs_srv_alloc_ops_ids(struct rtrs_srv_path *srv_path) +{ + struct rtrs_srv_sess *srv = srv_path->srv; struct rtrs_srv_op *id; - int i; + int i, ret; - sess->ops_ids = kcalloc(srv->queue_depth, sizeof(*sess->ops_ids), - GFP_KERNEL); - if (!sess->ops_ids) + srv_path->ops_ids = kcalloc(srv->queue_depth, + sizeof(*srv_path->ops_ids), + GFP_KERNEL); + if (!srv_path->ops_ids) goto err; for (i = 0; i < srv->queue_depth; ++i) { @@ -176,45 +149,44 @@ static int rtrs_srv_alloc_ops_ids(struct rtrs_srv_sess *sess) if (!id) goto err; - sess->ops_ids[i] = id; + srv_path->ops_ids[i] = id; + } + + ret = percpu_ref_init(&srv_path->ids_inflight_ref, + rtrs_srv_inflight_ref_release, 0, GFP_KERNEL); + if (ret) { + pr_err("Percpu reference init failed\n"); + goto err; } - init_waitqueue_head(&sess->ids_waitq); - atomic_set(&sess->ids_inflight, 0); + init_completion(&srv_path->complete_done); return 0; err: - rtrs_srv_free_ops_ids(sess); + rtrs_srv_free_ops_ids(srv_path); return -ENOMEM; } -static inline void rtrs_srv_get_ops_ids(struct rtrs_srv_sess *sess) -{ - atomic_inc(&sess->ids_inflight); -} - -static inline void rtrs_srv_put_ops_ids(struct rtrs_srv_sess *sess) +static inline void rtrs_srv_get_ops_ids(struct rtrs_srv_path *srv_path) { - if (atomic_dec_and_test(&sess->ids_inflight)) - wake_up(&sess->ids_waitq); + percpu_ref_get(&srv_path->ids_inflight_ref); } -static void rtrs_srv_wait_ops_ids(struct rtrs_srv_sess *sess) +static inline void rtrs_srv_put_ops_ids(struct rtrs_srv_path *srv_path) { - wait_event(sess->ids_waitq, !atomic_read(&sess->ids_inflight)); + percpu_ref_put(&srv_path->ids_inflight_ref); } - static void rtrs_srv_reg_mr_done(struct ib_cq *cq, struct ib_wc *wc) { - struct rtrs_srv_con *con = cq->cq_context; - struct rtrs_sess *s = con->c.sess; - struct rtrs_srv_sess *sess = to_srv_sess(s); + struct rtrs_srv_con *con = to_srv_con(wc->qp->qp_context); + struct rtrs_path *s = con->c.path; + struct rtrs_srv_path *srv_path = to_srv_path(s); - if (unlikely(wc->status != IB_WC_SUCCESS)) { + if (wc->status != IB_WC_SUCCESS) { rtrs_err(s, "REG MR failed: %s\n", ib_wc_status_msg(wc->status)); - close_sess(sess); + close_path(srv_path); return; } } @@ -225,12 +197,12 @@ static struct ib_cqe local_reg_cqe = { static int rdma_write_sg(struct rtrs_srv_op *id) { - struct rtrs_sess *s = id->con->c.sess; - struct rtrs_srv_sess *sess = to_srv_sess(s); - dma_addr_t dma_addr = sess->dma_addr[id->msg_id]; + struct rtrs_path *s = id->con->c.path; + struct rtrs_srv_path *srv_path = to_srv_path(s); + dma_addr_t dma_addr = srv_path->dma_addr[id->msg_id]; struct rtrs_srv_mr *srv_mr; - struct rtrs_srv *srv = sess->srv; - struct ib_send_wr inv_wr, imm_wr; + struct ib_send_wr inv_wr; + struct ib_rdma_wr imm_wr; struct ib_rdma_wr *wr = NULL; enum ib_send_flags flags; size_t sg_cnt; @@ -243,7 +215,7 @@ static int rdma_write_sg(struct rtrs_srv_op *id) sg_cnt = le16_to_cpu(id->rd_msg->sg_cnt); need_inval = le16_to_cpu(id->rd_msg->flags) & RTRS_MSG_NEED_INVAL_F; - if (unlikely(sg_cnt != 1)) + if (sg_cnt != 1) return -EINVAL; offset = 0; @@ -256,12 +228,12 @@ static int rdma_write_sg(struct rtrs_srv_op *id) /* WR will fail with length error * if this is 0 */ - if (unlikely(plist->length == 0)) { + if (plist->length == 0) { rtrs_err(s, "Invalid RDMA-Write sg list length 0\n"); return -EINVAL; } - plist->lkey = sess->s.dev->ib_pd->local_dma_lkey; + plist->lkey = srv_path->s.dev->ib_pd->local_dma_lkey; offset += plist->length; wr->wr.sg_list = plist; @@ -275,43 +247,46 @@ static int rdma_write_sg(struct rtrs_srv_op *id) WARN_ON_ONCE(rkey != wr->rkey); wr->wr.opcode = IB_WR_RDMA_WRITE; + wr->wr.wr_cqe = &io_comp_cqe; wr->wr.ex.imm_data = 0; wr->wr.send_flags = 0; if (need_inval && always_invalidate) { wr->wr.next = &rwr.wr; rwr.wr.next = &inv_wr; - inv_wr.next = &imm_wr; + inv_wr.next = &imm_wr.wr; } else if (always_invalidate) { wr->wr.next = &rwr.wr; - rwr.wr.next = &imm_wr; + rwr.wr.next = &imm_wr.wr; } else if (need_inval) { wr->wr.next = &inv_wr; - inv_wr.next = &imm_wr; + inv_wr.next = &imm_wr.wr; } else { - wr->wr.next = &imm_wr; + wr->wr.next = &imm_wr.wr; } /* * From time to time we have to post signaled sends, * or send queue will fill up and only QP reset can help. */ - flags = (atomic_inc_return(&id->con->wr_cnt) % srv->queue_depth) ? + flags = (atomic_inc_return(&id->con->c.wr_cnt) % s->signal_interval) ? 0 : IB_SEND_SIGNALED; if (need_inval) { inv_wr.sg_list = NULL; inv_wr.num_sge = 0; inv_wr.opcode = IB_WR_SEND_WITH_INV; + inv_wr.wr_cqe = &io_comp_cqe; inv_wr.send_flags = 0; inv_wr.ex.invalidate_rkey = rkey; } - imm_wr.next = NULL; + imm_wr.wr.next = NULL; if (always_invalidate) { struct rtrs_msg_rkey_rsp *msg; - srv_mr = &sess->mrs[id->msg_id]; + srv_mr = &srv_path->mrs[id->msg_id]; rwr.wr.opcode = IB_WR_REG_MR; + rwr.wr.wr_cqe = &local_reg_cqe; rwr.wr.num_sge = 0; rwr.mr = srv_mr->mr; rwr.wr.send_flags = 0; @@ -325,28 +300,28 @@ static int rdma_write_sg(struct rtrs_srv_op *id) list.addr = srv_mr->iu->dma_addr; list.length = sizeof(*msg); - list.lkey = sess->s.dev->ib_pd->local_dma_lkey; - imm_wr.sg_list = &list; - imm_wr.num_sge = 1; - imm_wr.opcode = IB_WR_SEND_WITH_IMM; - ib_dma_sync_single_for_device(sess->s.dev->ib_dev, + list.lkey = srv_path->s.dev->ib_pd->local_dma_lkey; + imm_wr.wr.sg_list = &list; + imm_wr.wr.num_sge = 1; + imm_wr.wr.opcode = IB_WR_SEND_WITH_IMM; + ib_dma_sync_single_for_device(srv_path->s.dev->ib_dev, srv_mr->iu->dma_addr, srv_mr->iu->size, DMA_TO_DEVICE); } else { - imm_wr.sg_list = NULL; - imm_wr.num_sge = 0; - imm_wr.opcode = IB_WR_RDMA_WRITE_WITH_IMM; + imm_wr.wr.sg_list = NULL; + imm_wr.wr.num_sge = 0; + imm_wr.wr.opcode = IB_WR_RDMA_WRITE_WITH_IMM; } - imm_wr.send_flags = flags; - imm_wr.ex.imm_data = cpu_to_be32(rtrs_to_io_rsp_imm(id->msg_id, + imm_wr.wr.send_flags = flags; + imm_wr.wr.ex.imm_data = cpu_to_be32(rtrs_to_io_rsp_imm(id->msg_id, 0, need_inval)); - imm_wr.wr_cqe = &io_comp_cqe; - ib_dma_sync_single_for_device(sess->s.dev->ib_dev, dma_addr, + imm_wr.wr.wr_cqe = &io_comp_cqe; + ib_dma_sync_single_for_device(srv_path->s.dev->ib_dev, dma_addr, offset, DMA_BIDIRECTIONAL); err = ib_post_send(id->con->c.qp, &id->tx_wr.wr, NULL); - if (unlikely(err)) + if (err) rtrs_err(s, "Posting RDMA-Write-Request to QP failed, err: %d\n", err); @@ -366,14 +341,15 @@ static int rdma_write_sg(struct rtrs_srv_op *id) static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id, int errno) { - struct rtrs_sess *s = con->c.sess; - struct rtrs_srv_sess *sess = to_srv_sess(s); - struct ib_send_wr inv_wr, imm_wr, *wr = NULL; + struct rtrs_path *s = con->c.path; + struct rtrs_srv_path *srv_path = to_srv_path(s); + struct ib_send_wr inv_wr, *wr = NULL; + struct ib_rdma_wr imm_wr; struct ib_reg_wr rwr; - struct rtrs_srv *srv = sess->srv; struct rtrs_srv_mr *srv_mr; bool need_inval = false; enum ib_send_flags flags; + struct ib_sge list; u32 imm; int err; @@ -386,7 +362,8 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id, sg_cnt = le16_to_cpu(rd_msg->sg_cnt); if (need_inval) { - if (likely(sg_cnt)) { + if (sg_cnt) { + inv_wr.wr_cqe = &io_comp_cqe; inv_wr.sg_list = NULL; inv_wr.num_sge = 0; inv_wr.opcode = IB_WR_SEND_WITH_INV; @@ -401,34 +378,36 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id, } } + trace_send_io_resp_imm(id, need_inval, always_invalidate, errno); + if (need_inval && always_invalidate) { wr = &inv_wr; inv_wr.next = &rwr.wr; - rwr.wr.next = &imm_wr; + rwr.wr.next = &imm_wr.wr; } else if (always_invalidate) { wr = &rwr.wr; - rwr.wr.next = &imm_wr; + rwr.wr.next = &imm_wr.wr; } else if (need_inval) { wr = &inv_wr; - inv_wr.next = &imm_wr; + inv_wr.next = &imm_wr.wr; } else { - wr = &imm_wr; + wr = &imm_wr.wr; } /* * From time to time we have to post signalled sends, * or send queue will fill up and only QP reset can help. */ - flags = (atomic_inc_return(&con->wr_cnt) % srv->queue_depth) ? + flags = (atomic_inc_return(&con->c.wr_cnt) % s->signal_interval) ? 0 : IB_SEND_SIGNALED; imm = rtrs_to_io_rsp_imm(id->msg_id, errno, need_inval); - imm_wr.next = NULL; + imm_wr.wr.next = NULL; if (always_invalidate) { - struct ib_sge list; struct rtrs_msg_rkey_rsp *msg; - srv_mr = &sess->mrs[id->msg_id]; - rwr.wr.next = &imm_wr; + srv_mr = &srv_path->mrs[id->msg_id]; + rwr.wr.next = &imm_wr.wr; rwr.wr.opcode = IB_WR_REG_MR; + rwr.wr.wr_cqe = &local_reg_cqe; rwr.wr.num_sge = 0; rwr.wr.send_flags = 0; rwr.mr = srv_mr->mr; @@ -442,39 +421,36 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id, list.addr = srv_mr->iu->dma_addr; list.length = sizeof(*msg); - list.lkey = sess->s.dev->ib_pd->local_dma_lkey; - imm_wr.sg_list = &list; - imm_wr.num_sge = 1; - imm_wr.opcode = IB_WR_SEND_WITH_IMM; - ib_dma_sync_single_for_device(sess->s.dev->ib_dev, + list.lkey = srv_path->s.dev->ib_pd->local_dma_lkey; + imm_wr.wr.sg_list = &list; + imm_wr.wr.num_sge = 1; + imm_wr.wr.opcode = IB_WR_SEND_WITH_IMM; + ib_dma_sync_single_for_device(srv_path->s.dev->ib_dev, srv_mr->iu->dma_addr, srv_mr->iu->size, DMA_TO_DEVICE); } else { - imm_wr.sg_list = NULL; - imm_wr.num_sge = 0; - imm_wr.opcode = IB_WR_RDMA_WRITE_WITH_IMM; + imm_wr.wr.sg_list = NULL; + imm_wr.wr.num_sge = 0; + imm_wr.wr.opcode = IB_WR_RDMA_WRITE_WITH_IMM; } - imm_wr.send_flags = flags; - imm_wr.wr_cqe = &io_comp_cqe; + imm_wr.wr.send_flags = flags; + imm_wr.wr.wr_cqe = &io_comp_cqe; - imm_wr.ex.imm_data = cpu_to_be32(imm); + imm_wr.wr.ex.imm_data = cpu_to_be32(imm); err = ib_post_send(id->con->c.qp, wr, NULL); - if (unlikely(err)) + if (err) rtrs_err_rl(s, "Posting RDMA-Reply to QP failed, err: %d\n", err); return err; } -void close_sess(struct rtrs_srv_sess *sess) +void close_path(struct rtrs_srv_path *srv_path) { - enum rtrs_srv_state old_state; - - if (rtrs_srv_change_state_get_old(sess, RTRS_SRV_CLOSING, - &old_state)) - queue_work(rtrs_wq, &sess->close_work); - WARN_ON(sess->state != RTRS_SRV_CLOSING); + if (rtrs_srv_change_state(srv_path, RTRS_SRV_CLOSING)) + queue_work(rtrs_wq, &srv_path->close_work); + WARN_ON(srv_path->state != RTRS_SRV_CLOSING); } static inline const char *rtrs_srv_state_str(enum rtrs_srv_state state) @@ -506,35 +482,37 @@ static inline const char *rtrs_srv_state_str(enum rtrs_srv_state state) */ bool rtrs_srv_resp_rdma(struct rtrs_srv_op *id, int status) { - struct rtrs_srv_sess *sess; + struct rtrs_srv_path *srv_path; struct rtrs_srv_con *con; - struct rtrs_sess *s; + struct rtrs_path *s; int err; if (WARN_ON(!id)) return true; con = id->con; - s = con->c.sess; - sess = to_srv_sess(s); + s = con->c.path; + srv_path = to_srv_path(s); id->status = status; - if (unlikely(sess->state != RTRS_SRV_CONNECTED)) { + if (srv_path->state != RTRS_SRV_CONNECTED) { rtrs_err_rl(s, - "Sending I/O response failed, session is disconnected, sess state %s\n", - rtrs_srv_state_str(sess->state)); + "Sending I/O response failed, server path %s is disconnected, path state %s\n", + kobject_name(&srv_path->kobj), + rtrs_srv_state_str(srv_path->state)); goto out; } if (always_invalidate) { - struct rtrs_srv_mr *mr = &sess->mrs[id->msg_id]; + struct rtrs_srv_mr *mr = &srv_path->mrs[id->msg_id]; ib_update_fast_reg_key(mr->mr, ib_inc_rkey(mr->mr->rkey)); } - if (unlikely(atomic_sub_return(1, - &con->sq_wr_avail) < 0)) { - pr_err("IB send queue full\n"); - atomic_add(1, &con->sq_wr_avail); + if (atomic_sub_return(1, &con->c.sq_wr_avail) < 0) { + rtrs_err(s, "IB send queue full: srv_path=%s cid=%d\n", + kobject_name(&srv_path->kobj), + con->c.cid); + atomic_add(1, &con->c.sq_wr_avail); spin_lock(&con->rsp_wr_wait_lock); list_add_tail(&id->wait_list, &con->rsp_wr_wait_list); spin_unlock(&con->rsp_wr_wait_lock); @@ -546,12 +524,13 @@ bool rtrs_srv_resp_rdma(struct rtrs_srv_op *id, int status) else err = rdma_write_sg(id); - if (unlikely(err)) { - rtrs_err_rl(s, "IO response failed: %d\n", err); - close_sess(sess); + if (err) { + rtrs_err_rl(s, "IO response failed: %d: srv_path=%s\n", err, + kobject_name(&srv_path->kobj)); + close_path(srv_path); } out: - rtrs_srv_put_ops_ids(sess); + rtrs_srv_put_ops_ids(srv_path); return true; } EXPORT_SYMBOL(rtrs_srv_resp_rdma); @@ -561,37 +540,41 @@ EXPORT_SYMBOL(rtrs_srv_resp_rdma); * @srv: Session pointer * @priv: The private pointer that is associated with the session. */ -void rtrs_srv_set_sess_priv(struct rtrs_srv *srv, void *priv) +void rtrs_srv_set_sess_priv(struct rtrs_srv_sess *srv, void *priv) { srv->priv = priv; } EXPORT_SYMBOL(rtrs_srv_set_sess_priv); -static void unmap_cont_bufs(struct rtrs_srv_sess *sess) +static void unmap_cont_bufs(struct rtrs_srv_path *srv_path) { int i; - for (i = 0; i < sess->mrs_num; i++) { + for (i = 0; i < srv_path->mrs_num; i++) { struct rtrs_srv_mr *srv_mr; - srv_mr = &sess->mrs[i]; - rtrs_iu_free(srv_mr->iu, DMA_TO_DEVICE, - sess->s.dev->ib_dev, 1); + srv_mr = &srv_path->mrs[i]; + + if (always_invalidate) + rtrs_iu_free(srv_mr->iu, srv_path->s.dev->ib_dev, 1); + ib_dereg_mr(srv_mr->mr); - ib_dma_unmap_sg(sess->s.dev->ib_dev, srv_mr->sgt.sgl, + ib_dma_unmap_sg(srv_path->s.dev->ib_dev, srv_mr->sgt.sgl, srv_mr->sgt.nents, DMA_BIDIRECTIONAL); sg_free_table(&srv_mr->sgt); } - kfree(sess->mrs); + kfree(srv_path->mrs); } -static int map_cont_bufs(struct rtrs_srv_sess *sess) +static int map_cont_bufs(struct rtrs_srv_path *srv_path) { - struct rtrs_srv *srv = sess->srv; - struct rtrs_sess *ss = &sess->s; - int i, mri, err, mrs_num; + struct rtrs_srv_sess *srv = srv_path->srv; + struct rtrs_path *ss = &srv_path->s; + int i, err, mrs_num; unsigned int chunk_bits; int chunks_per_mr = 1; + struct ib_mr *mr; + struct sg_table *sgt; /* * Here we map queue_depth chunks to MR. Firstly we have to @@ -605,25 +588,23 @@ static int map_cont_bufs(struct rtrs_srv_sess *sess) mrs_num = srv->queue_depth; } else { chunks_per_mr = - sess->s.dev->ib_dev->attrs.max_fast_reg_page_list_len; + srv_path->s.dev->ib_dev->attrs.max_fast_reg_page_list_len; mrs_num = DIV_ROUND_UP(srv->queue_depth, chunks_per_mr); chunks_per_mr = DIV_ROUND_UP(srv->queue_depth, mrs_num); } - sess->mrs = kcalloc(mrs_num, sizeof(*sess->mrs), GFP_KERNEL); - if (!sess->mrs) + srv_path->mrs = kcalloc(mrs_num, sizeof(*srv_path->mrs), GFP_KERNEL); + if (!srv_path->mrs) return -ENOMEM; - sess->mrs_num = mrs_num; - - for (mri = 0; mri < mrs_num; mri++) { - struct rtrs_srv_mr *srv_mr = &sess->mrs[mri]; - struct sg_table *sgt = &srv_mr->sgt; + for (srv_path->mrs_num = 0; srv_path->mrs_num < mrs_num; + srv_path->mrs_num++) { + struct rtrs_srv_mr *srv_mr = &srv_path->mrs[srv_path->mrs_num]; struct scatterlist *s; - struct ib_mr *mr; - int nr, chunks; + int nr, nr_sgt, chunks; - chunks = chunks_per_mr * mri; + sgt = &srv_mr->sgt; + chunks = chunks_per_mr * srv_path->mrs_num; if (!always_invalidate) chunks_per_mr = min_t(int, chunks_per_mr, srv->queue_depth - chunks); @@ -636,21 +617,21 @@ static int map_cont_bufs(struct rtrs_srv_sess *sess) sg_set_page(s, srv->chunks[chunks + i], max_chunk_size, 0); - nr = ib_dma_map_sg(sess->s.dev->ib_dev, sgt->sgl, + nr_sgt = ib_dma_map_sg(srv_path->s.dev->ib_dev, sgt->sgl, sgt->nents, DMA_BIDIRECTIONAL); - if (nr < sgt->nents) { - err = nr < 0 ? nr : -EINVAL; + if (!nr_sgt) { + err = -EINVAL; goto free_sg; } - mr = ib_alloc_mr(sess->s.dev->ib_pd, IB_MR_TYPE_MEM_REG, - sgt->nents); + mr = ib_alloc_mr(srv_path->s.dev->ib_pd, IB_MR_TYPE_MEM_REG, + nr_sgt); if (IS_ERR(mr)) { err = PTR_ERR(mr); goto unmap_sg; } - nr = ib_map_mr_sg(mr, sgt->sgl, sgt->nents, + nr = ib_map_mr_sg(mr, sgt->sgl, nr_sgt, NULL, max_chunk_size); - if (nr < 0 || nr < sgt->nents) { + if (nr != nr_sgt) { err = nr < 0 ? nr : -EINVAL; goto dereg_mr; } @@ -658,117 +639,115 @@ static int map_cont_bufs(struct rtrs_srv_sess *sess) if (always_invalidate) { srv_mr->iu = rtrs_iu_alloc(1, sizeof(struct rtrs_msg_rkey_rsp), - GFP_KERNEL, sess->s.dev->ib_dev, + GFP_KERNEL, srv_path->s.dev->ib_dev, DMA_TO_DEVICE, rtrs_srv_rdma_done); if (!srv_mr->iu) { err = -ENOMEM; rtrs_err(ss, "rtrs_iu_alloc(), err: %d\n", err); - goto free_iu; + goto dereg_mr; } } /* Eventually dma addr for each chunk can be cached */ - for_each_sg(sgt->sgl, s, sgt->orig_nents, i) - sess->dma_addr[chunks + i] = sg_dma_address(s); + for_each_sg(sgt->sgl, s, nr_sgt, i) + srv_path->dma_addr[chunks + i] = sg_dma_address(s); ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey)); srv_mr->mr = mr; - - continue; -err: - while (mri--) { - srv_mr = &sess->mrs[mri]; - sgt = &srv_mr->sgt; - mr = srv_mr->mr; -free_iu: - rtrs_iu_free(srv_mr->iu, DMA_TO_DEVICE, - sess->s.dev->ib_dev, 1); -dereg_mr: - ib_dereg_mr(mr); -unmap_sg: - ib_dma_unmap_sg(sess->s.dev->ib_dev, sgt->sgl, - sgt->nents, DMA_BIDIRECTIONAL); -free_sg: - sg_free_table(sgt); - } - kfree(sess->mrs); - - return err; } chunk_bits = ilog2(srv->queue_depth - 1) + 1; - sess->mem_bits = (MAX_IMM_PAYL_BITS - chunk_bits); + srv_path->mem_bits = (MAX_IMM_PAYL_BITS - chunk_bits); return 0; + +dereg_mr: + ib_dereg_mr(mr); +unmap_sg: + ib_dma_unmap_sg(srv_path->s.dev->ib_dev, sgt->sgl, + sgt->nents, DMA_BIDIRECTIONAL); +free_sg: + sg_free_table(sgt); +err: + unmap_cont_bufs(srv_path); + + return err; } static void rtrs_srv_hb_err_handler(struct rtrs_con *c) { - close_sess(to_srv_sess(c->sess)); + struct rtrs_srv_con *con = container_of(c, typeof(*con), c); + struct rtrs_srv_path *srv_path = to_srv_path(con->c.path); + + rtrs_err(con->c.path, "HB err handler for path=%s\n", kobject_name(&srv_path->kobj)); + close_path(to_srv_path(c->path)); } -static void rtrs_srv_init_hb(struct rtrs_srv_sess *sess) +static void rtrs_srv_init_hb(struct rtrs_srv_path *srv_path) { - rtrs_init_hb(&sess->s, &io_comp_cqe, + rtrs_init_hb(&srv_path->s, &io_comp_cqe, RTRS_HB_INTERVAL_MS, RTRS_HB_MISSED_MAX, rtrs_srv_hb_err_handler, rtrs_wq); } -static void rtrs_srv_start_hb(struct rtrs_srv_sess *sess) +static void rtrs_srv_start_hb(struct rtrs_srv_path *srv_path) { - rtrs_start_hb(&sess->s); + rtrs_start_hb(&srv_path->s); } -static void rtrs_srv_stop_hb(struct rtrs_srv_sess *sess) +static void rtrs_srv_stop_hb(struct rtrs_srv_path *srv_path) { - rtrs_stop_hb(&sess->s); + rtrs_stop_hb(&srv_path->s); } static void rtrs_srv_info_rsp_done(struct ib_cq *cq, struct ib_wc *wc) { - struct rtrs_srv_con *con = cq->cq_context; - struct rtrs_sess *s = con->c.sess; - struct rtrs_srv_sess *sess = to_srv_sess(s); + struct rtrs_srv_con *con = to_srv_con(wc->qp->qp_context); + struct rtrs_path *s = con->c.path; + struct rtrs_srv_path *srv_path = to_srv_path(s); struct rtrs_iu *iu; iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe); - rtrs_iu_free(iu, DMA_TO_DEVICE, sess->s.dev->ib_dev, 1); + rtrs_iu_free(iu, srv_path->s.dev->ib_dev, 1); - if (unlikely(wc->status != IB_WC_SUCCESS)) { + if (wc->status != IB_WC_SUCCESS) { rtrs_err(s, "Sess info response send failed: %s\n", ib_wc_status_msg(wc->status)); - close_sess(sess); + close_path(srv_path); return; } WARN_ON(wc->opcode != IB_WC_SEND); } -static void rtrs_srv_sess_up(struct rtrs_srv_sess *sess) +static int rtrs_srv_path_up(struct rtrs_srv_path *srv_path) { - struct rtrs_srv *srv = sess->srv; + struct rtrs_srv_sess *srv = srv_path->srv; struct rtrs_srv_ctx *ctx = srv->ctx; - int up; + int up, ret = 0; mutex_lock(&srv->paths_ev_mutex); up = ++srv->paths_up; if (up == 1) - ctx->ops.link_ev(srv, RTRS_SRV_LINK_EV_CONNECTED, NULL); + ret = ctx->ops.link_ev(srv, RTRS_SRV_LINK_EV_CONNECTED, NULL); mutex_unlock(&srv->paths_ev_mutex); /* Mark session as established */ - sess->established = true; + if (!ret) + srv_path->established = true; + + return ret; } -static void rtrs_srv_sess_down(struct rtrs_srv_sess *sess) +static void rtrs_srv_path_down(struct rtrs_srv_path *srv_path) { - struct rtrs_srv *srv = sess->srv; + struct rtrs_srv_sess *srv = srv_path->srv; struct rtrs_srv_ctx *ctx = srv->ctx; - if (!sess->established) + if (!srv_path->established) return; - sess->established = false; + srv_path->established = false; mutex_lock(&srv->paths_ev_mutex); WARN_ON(!srv->paths_up); if (--srv->paths_up == 0) @@ -776,13 +755,46 @@ static void rtrs_srv_sess_down(struct rtrs_srv_sess *sess) mutex_unlock(&srv->paths_ev_mutex); } -static int post_recv_sess(struct rtrs_srv_sess *sess); +static bool exist_pathname(struct rtrs_srv_ctx *ctx, + const char *pathname, const uuid_t *path_uuid) +{ + struct rtrs_srv_sess *srv; + struct rtrs_srv_path *srv_path; + bool found = false; + + mutex_lock(&ctx->srv_mutex); + list_for_each_entry(srv, &ctx->srv_list, ctx_list) { + mutex_lock(&srv->paths_mutex); + + /* when a client with same uuid and same sessname tried to add a path */ + if (uuid_equal(&srv->paths_uuid, path_uuid)) { + mutex_unlock(&srv->paths_mutex); + continue; + } + + list_for_each_entry(srv_path, &srv->paths_list, s.entry) { + if (strlen(srv_path->s.sessname) == strlen(pathname) && + !strcmp(srv_path->s.sessname, pathname)) { + found = true; + break; + } + } + mutex_unlock(&srv->paths_mutex); + if (found) + break; + } + mutex_unlock(&ctx->srv_mutex); + return found; +} + +static int post_recv_path(struct rtrs_srv_path *srv_path); +static int rtrs_rdma_do_reject(struct rdma_cm_id *cm_id, int errno); static int process_info_req(struct rtrs_srv_con *con, struct rtrs_msg_info_req *msg) { - struct rtrs_sess *s = con->c.sess; - struct rtrs_srv_sess *sess = to_srv_sess(s); + struct rtrs_path *s = con->c.path; + struct rtrs_srv_path *srv_path = to_srv_path(s); struct ib_send_wr *reg_wr = NULL; struct rtrs_msg_info_rsp *rsp; struct rtrs_iu *tx_iu; @@ -790,31 +802,44 @@ static int process_info_req(struct rtrs_srv_con *con, int mri, err; size_t tx_sz; - err = post_recv_sess(sess); - if (unlikely(err)) { - rtrs_err(s, "post_recv_sess(), err: %d\n", err); + err = post_recv_path(srv_path); + if (err) { + rtrs_err(s, "post_recv_path(), err: %d\n", err); return err; } - rwr = kcalloc(sess->mrs_num, sizeof(*rwr), GFP_KERNEL); - if (unlikely(!rwr)) + + if (strchr(msg->pathname, '/') || strchr(msg->pathname, '.')) { + rtrs_err(s, "pathname cannot contain / and .\n"); + return -EINVAL; + } + + if (exist_pathname(srv_path->srv->ctx, + msg->pathname, &srv_path->srv->paths_uuid)) { + rtrs_err(s, "pathname is duplicated: %s\n", msg->pathname); + return -EPERM; + } + strscpy(srv_path->s.sessname, msg->pathname, + sizeof(srv_path->s.sessname)); + + rwr = kcalloc(srv_path->mrs_num, sizeof(*rwr), GFP_KERNEL); + if (!rwr) return -ENOMEM; - strlcpy(sess->s.sessname, msg->sessname, sizeof(sess->s.sessname)); tx_sz = sizeof(*rsp); - tx_sz += sizeof(rsp->desc[0]) * sess->mrs_num; - tx_iu = rtrs_iu_alloc(1, tx_sz, GFP_KERNEL, sess->s.dev->ib_dev, + tx_sz += sizeof(rsp->desc[0]) * srv_path->mrs_num; + tx_iu = rtrs_iu_alloc(1, tx_sz, GFP_KERNEL, srv_path->s.dev->ib_dev, DMA_TO_DEVICE, rtrs_srv_info_rsp_done); - if (unlikely(!tx_iu)) { + if (!tx_iu) { err = -ENOMEM; goto rwr_free; } rsp = tx_iu->buf; rsp->type = cpu_to_le16(RTRS_MSG_INFO_RSP); - rsp->sg_cnt = cpu_to_le16(sess->mrs_num); + rsp->sg_cnt = cpu_to_le16(srv_path->mrs_num); - for (mri = 0; mri < sess->mrs_num; mri++) { - struct ib_mr *mr = sess->mrs[mri].mr; + for (mri = 0; mri < srv_path->mrs_num; mri++) { + struct ib_mr *mr = srv_path->mrs[mri].mr; rsp->desc[mri].addr = cpu_to_le64(mr->iova); rsp->desc[mri].key = cpu_to_le32(mr->rkey); @@ -827,7 +852,7 @@ static int process_info_req(struct rtrs_srv_con *con, rwr[mri].wr.opcode = IB_WR_REG_MR; rwr[mri].wr.wr_cqe = &local_reg_cqe; rwr[mri].wr.num_sge = 0; - rwr[mri].wr.send_flags = mri ? 0 : IB_SEND_SIGNALED; + rwr[mri].wr.send_flags = 0; rwr[mri].mr = mr; rwr[mri].key = mr->rkey; rwr[mri].access = (IB_ACCESS_LOCAL_WRITE | @@ -835,13 +860,18 @@ static int process_info_req(struct rtrs_srv_con *con, reg_wr = &rwr[mri].wr; } - err = rtrs_srv_create_sess_files(sess); - if (unlikely(err)) + err = rtrs_srv_create_path_files(srv_path); + if (err) + goto iu_free; + kobject_get(&srv_path->kobj); + get_device(&srv_path->srv->dev); + err = rtrs_srv_change_state(srv_path, RTRS_SRV_CONNECTED); + if (!err) { + rtrs_err(s, "rtrs_srv_change_state(), err: %d\n", err); goto iu_free; - kobject_get(&sess->kobj); - get_device(&sess->srv->dev); - rtrs_srv_change_state(sess, RTRS_SRV_CONNECTED); - rtrs_srv_start_hb(sess); + } + + rtrs_srv_start_hb(srv_path); /* * We do not account number of established connections at the current @@ -849,17 +879,22 @@ static int process_info_req(struct rtrs_srv_con *con, * all connections are successfully established. Thus, simply notify * listener with a proper event if we are the first path. */ - rtrs_srv_sess_up(sess); + err = rtrs_srv_path_up(srv_path); + if (err) { + rtrs_err(s, "rtrs_srv_path_up(), err: %d\n", err); + goto iu_free; + } - ib_dma_sync_single_for_device(sess->s.dev->ib_dev, tx_iu->dma_addr, + ib_dma_sync_single_for_device(srv_path->s.dev->ib_dev, + tx_iu->dma_addr, tx_iu->size, DMA_TO_DEVICE); /* Send info response */ err = rtrs_iu_post_send(&con->c, tx_iu, tx_sz, reg_wr); - if (unlikely(err)) { + if (err) { rtrs_err(s, "rtrs_iu_post_send(), err: %d\n", err); iu_free: - rtrs_iu_free(tx_iu, DMA_TO_DEVICE, sess->s.dev->ib_dev, 1); + rtrs_iu_free(tx_iu, srv_path->s.dev->ib_dev, 1); } rwr_free: kfree(rwr); @@ -869,9 +904,9 @@ rwr_free: static void rtrs_srv_info_req_done(struct ib_cq *cq, struct ib_wc *wc) { - struct rtrs_srv_con *con = cq->cq_context; - struct rtrs_sess *s = con->c.sess; - struct rtrs_srv_sess *sess = to_srv_sess(s); + struct rtrs_srv_con *con = to_srv_con(wc->qp->qp_context); + struct rtrs_path *s = con->c.path; + struct rtrs_srv_path *srv_path = to_srv_path(s); struct rtrs_msg_info_req *msg; struct rtrs_iu *iu; int err; @@ -879,55 +914,54 @@ static void rtrs_srv_info_req_done(struct ib_cq *cq, struct ib_wc *wc) WARN_ON(con->c.cid); iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe); - if (unlikely(wc->status != IB_WC_SUCCESS)) { + if (wc->status != IB_WC_SUCCESS) { rtrs_err(s, "Sess info request receive failed: %s\n", ib_wc_status_msg(wc->status)); goto close; } WARN_ON(wc->opcode != IB_WC_RECV); - if (unlikely(wc->byte_len < sizeof(*msg))) { + if (wc->byte_len < sizeof(*msg)) { rtrs_err(s, "Sess info request is malformed: size %d\n", wc->byte_len); goto close; } - ib_dma_sync_single_for_cpu(sess->s.dev->ib_dev, iu->dma_addr, + ib_dma_sync_single_for_cpu(srv_path->s.dev->ib_dev, iu->dma_addr, iu->size, DMA_FROM_DEVICE); msg = iu->buf; - if (unlikely(le16_to_cpu(msg->type) != RTRS_MSG_INFO_REQ)) { + if (le16_to_cpu(msg->type) != RTRS_MSG_INFO_REQ) { rtrs_err(s, "Sess info request is malformed: type %d\n", le16_to_cpu(msg->type)); goto close; } err = process_info_req(con, msg); - if (unlikely(err)) + if (err) goto close; -out: - rtrs_iu_free(iu, DMA_FROM_DEVICE, sess->s.dev->ib_dev, 1); + rtrs_iu_free(iu, srv_path->s.dev->ib_dev, 1); return; close: - close_sess(sess); - goto out; + rtrs_iu_free(iu, srv_path->s.dev->ib_dev, 1); + close_path(srv_path); } static int post_recv_info_req(struct rtrs_srv_con *con) { - struct rtrs_sess *s = con->c.sess; - struct rtrs_srv_sess *sess = to_srv_sess(s); + struct rtrs_path *s = con->c.path; + struct rtrs_srv_path *srv_path = to_srv_path(s); struct rtrs_iu *rx_iu; int err; rx_iu = rtrs_iu_alloc(1, sizeof(struct rtrs_msg_info_req), - GFP_KERNEL, sess->s.dev->ib_dev, + GFP_KERNEL, srv_path->s.dev->ib_dev, DMA_FROM_DEVICE, rtrs_srv_info_req_done); - if (unlikely(!rx_iu)) + if (!rx_iu) return -ENOMEM; /* Prepare for getting info response */ err = rtrs_iu_post_recv(&con->c, rx_iu); - if (unlikely(err)) { + if (err) { rtrs_err(s, "rtrs_iu_post_recv(), err: %d\n", err); - rtrs_iu_free(rx_iu, DMA_FROM_DEVICE, sess->s.dev->ib_dev, 1); + rtrs_iu_free(rx_iu, srv_path->s.dev->ib_dev, 1); return err; } @@ -940,28 +974,38 @@ static int post_recv_io(struct rtrs_srv_con *con, size_t q_size) for (i = 0; i < q_size; i++) { err = rtrs_post_recv_empty(&con->c, &io_comp_cqe); - if (unlikely(err)) + if (err) return err; } return 0; } -static int post_recv_sess(struct rtrs_srv_sess *sess) +static int post_recv_path(struct rtrs_srv_path *srv_path) { - struct rtrs_srv *srv = sess->srv; - struct rtrs_sess *s = &sess->s; + struct rtrs_srv_sess *srv = srv_path->srv; + struct rtrs_path *s = &srv_path->s; size_t q_size; int err, cid; - for (cid = 0; cid < sess->s.con_num; cid++) { + for (cid = 0; cid < srv_path->s.con_num; cid++) { if (cid == 0) q_size = SERVICE_CON_QUEUE_DEPTH; else q_size = srv->queue_depth; + if (srv_path->state != RTRS_SRV_CONNECTING) { + rtrs_err(s, "Path state invalid. state %s\n", + rtrs_srv_state_str(srv_path->state)); + return -EIO; + } - err = post_recv_io(to_srv_con(sess->s.con[cid]), q_size); - if (unlikely(err)) { + if (!srv_path->s.con[cid]) { + rtrs_err(s, "Conn not set for %d\n", cid); + return -EIO; + } + + err = post_recv_io(to_srv_con(srv_path->s.con[cid]), q_size); + if (err) { rtrs_err(s, "post_recv_io(), err: %d\n", err); return err; } @@ -974,9 +1018,9 @@ static void process_read(struct rtrs_srv_con *con, struct rtrs_msg_rdma_read *msg, u32 buf_id, u32 off) { - struct rtrs_sess *s = con->c.sess; - struct rtrs_srv_sess *sess = to_srv_sess(s); - struct rtrs_srv *srv = sess->srv; + struct rtrs_path *s = con->c.path; + struct rtrs_srv_path *srv_path = to_srv_path(s); + struct rtrs_srv_sess *srv = srv_path->srv; struct rtrs_srv_ctx *ctx = srv->ctx; struct rtrs_srv_op *id; @@ -984,20 +1028,20 @@ static void process_read(struct rtrs_srv_con *con, void *data; int ret; - if (unlikely(sess->state != RTRS_SRV_CONNECTED)) { + if (srv_path->state != RTRS_SRV_CONNECTED) { rtrs_err_rl(s, "Processing read request failed, session is disconnected, sess state %s\n", - rtrs_srv_state_str(sess->state)); + rtrs_srv_state_str(srv_path->state)); return; } - if (unlikely(msg->sg_cnt != 1 && msg->sg_cnt != 0)) { + if (msg->sg_cnt != 1 && msg->sg_cnt != 0) { rtrs_err_rl(s, "Processing read request failed, invalid message\n"); return; } - rtrs_srv_get_ops_ids(sess); - rtrs_srv_update_rdma_stats(sess->stats, off, READ); - id = sess->ops_ids[buf_id]; + rtrs_srv_get_ops_ids(srv_path); + rtrs_srv_update_rdma_stats(srv_path->stats, off, READ); + id = srv_path->ops_ids[buf_id]; id->con = con; id->dir = READ; id->msg_id = buf_id; @@ -1005,10 +1049,10 @@ static void process_read(struct rtrs_srv_con *con, usr_len = le16_to_cpu(msg->usr_len); data_len = off - usr_len; data = page_address(srv->chunks[buf_id]); - ret = ctx->ops.rdma_ev(srv, srv->priv, id, READ, data, data_len, + ret = ctx->ops.rdma_ev(srv->priv, id, data, data_len, data + data_len, usr_len); - if (unlikely(ret)) { + if (ret) { rtrs_err_rl(s, "Processing read request failed, user module cb reported for msg_id %d, err: %d\n", buf_id, ret); @@ -1023,18 +1067,18 @@ send_err_msg: rtrs_err_rl(s, "Sending err msg for failed RDMA-Write-Req failed, msg_id %d, err: %d\n", buf_id, ret); - close_sess(sess); + close_path(srv_path); } - rtrs_srv_put_ops_ids(sess); + rtrs_srv_put_ops_ids(srv_path); } static void process_write(struct rtrs_srv_con *con, struct rtrs_msg_rdma_write *req, u32 buf_id, u32 off) { - struct rtrs_sess *s = con->c.sess; - struct rtrs_srv_sess *sess = to_srv_sess(s); - struct rtrs_srv *srv = sess->srv; + struct rtrs_path *s = con->c.path; + struct rtrs_srv_path *srv_path = to_srv_path(s); + struct rtrs_srv_sess *srv = srv_path->srv; struct rtrs_srv_ctx *ctx = srv->ctx; struct rtrs_srv_op *id; @@ -1042,15 +1086,15 @@ static void process_write(struct rtrs_srv_con *con, void *data; int ret; - if (unlikely(sess->state != RTRS_SRV_CONNECTED)) { + if (srv_path->state != RTRS_SRV_CONNECTED) { rtrs_err_rl(s, "Processing write request failed, session is disconnected, sess state %s\n", - rtrs_srv_state_str(sess->state)); + rtrs_srv_state_str(srv_path->state)); return; } - rtrs_srv_get_ops_ids(sess); - rtrs_srv_update_rdma_stats(sess->stats, off, WRITE); - id = sess->ops_ids[buf_id]; + rtrs_srv_get_ops_ids(srv_path); + rtrs_srv_update_rdma_stats(srv_path->stats, off, WRITE); + id = srv_path->ops_ids[buf_id]; id->con = con; id->dir = WRITE; id->msg_id = buf_id; @@ -1058,9 +1102,9 @@ static void process_write(struct rtrs_srv_con *con, usr_len = le16_to_cpu(req->usr_len); data_len = off - usr_len; data = page_address(srv->chunks[buf_id]); - ret = ctx->ops.rdma_ev(srv, srv->priv, id, WRITE, data, data_len, - data + data_len, usr_len); - if (unlikely(ret)) { + ret = ctx->ops.rdma_ev(srv->priv, id, data, data_len, + data + data_len, usr_len); + if (ret) { rtrs_err_rl(s, "Processing write request failed, user module callback reports err: %d\n", ret); @@ -1075,20 +1119,21 @@ send_err_msg: rtrs_err_rl(s, "Processing write request failed, sending I/O response failed, msg_id %d, err: %d\n", buf_id, ret); - close_sess(sess); + close_path(srv_path); } - rtrs_srv_put_ops_ids(sess); + rtrs_srv_put_ops_ids(srv_path); } static void process_io_req(struct rtrs_srv_con *con, void *msg, u32 id, u32 off) { - struct rtrs_sess *s = con->c.sess; - struct rtrs_srv_sess *sess = to_srv_sess(s); + struct rtrs_path *s = con->c.path; + struct rtrs_srv_path *srv_path = to_srv_path(s); struct rtrs_msg_rdma_hdr *hdr; unsigned int type; - ib_dma_sync_single_for_cpu(sess->s.dev->ib_dev, sess->dma_addr[id], + ib_dma_sync_single_for_cpu(srv_path->s.dev->ib_dev, + srv_path->dma_addr[id], max_chunk_size, DMA_BIDIRECTIONAL); hdr = msg; type = le16_to_cpu(hdr->type); @@ -1110,24 +1155,24 @@ static void process_io_req(struct rtrs_srv_con *con, void *msg, return; err: - close_sess(sess); + close_path(srv_path); } static void rtrs_srv_inv_rkey_done(struct ib_cq *cq, struct ib_wc *wc) { struct rtrs_srv_mr *mr = container_of(wc->wr_cqe, typeof(*mr), inv_cqe); - struct rtrs_srv_con *con = cq->cq_context; - struct rtrs_sess *s = con->c.sess; - struct rtrs_srv_sess *sess = to_srv_sess(s); - struct rtrs_srv *srv = sess->srv; + struct rtrs_srv_con *con = to_srv_con(wc->qp->qp_context); + struct rtrs_path *s = con->c.path; + struct rtrs_srv_path *srv_path = to_srv_path(s); + struct rtrs_srv_sess *srv = srv_path->srv; u32 msg_id, off; void *data; - if (unlikely(wc->status != IB_WC_SUCCESS)) { + if (wc->status != IB_WC_SUCCESS) { rtrs_err(s, "Failed IB_WR_LOCAL_INV: %s\n", ib_wc_status_msg(wc->status)); - close_sess(sess); + close_path(srv_path); } msg_id = mr->msg_id; off = mr->msg_off; @@ -1174,20 +1219,20 @@ static void rtrs_rdma_process_wr_wait_list(struct rtrs_srv_con *con) static void rtrs_srv_rdma_done(struct ib_cq *cq, struct ib_wc *wc) { - struct rtrs_srv_con *con = cq->cq_context; - struct rtrs_sess *s = con->c.sess; - struct rtrs_srv_sess *sess = to_srv_sess(s); - struct rtrs_srv *srv = sess->srv; + struct rtrs_srv_con *con = to_srv_con(wc->qp->qp_context); + struct rtrs_path *s = con->c.path; + struct rtrs_srv_path *srv_path = to_srv_path(s); + struct rtrs_srv_sess *srv = srv_path->srv; u32 imm_type, imm_payload; int err; - if (unlikely(wc->status != IB_WC_SUCCESS)) { + if (wc->status != IB_WC_SUCCESS) { if (wc->status != IB_WC_WR_FLUSH_ERR) { rtrs_err(s, "%s (wr_cqe: %p, type: %d, vendor_err: 0x%x, len: %u)\n", ib_wc_status_msg(wc->status), wc->wr_cqe, wc->opcode, wc->vendor_err, wc->byte_len); - close_sess(sess); + close_path(srv_path); } return; } @@ -1200,37 +1245,37 @@ static void rtrs_srv_rdma_done(struct ib_cq *cq, struct ib_wc *wc) */ if (WARN_ON(wc->wr_cqe != &io_comp_cqe)) return; + srv_path->s.hb_missed_cnt = 0; err = rtrs_post_recv_empty(&con->c, &io_comp_cqe); - if (unlikely(err)) { + if (err) { rtrs_err(s, "rtrs_post_recv(), err: %d\n", err); - close_sess(sess); + close_path(srv_path); break; } rtrs_from_imm(be32_to_cpu(wc->ex.imm_data), &imm_type, &imm_payload); - if (likely(imm_type == RTRS_IO_REQ_IMM)) { + if (imm_type == RTRS_IO_REQ_IMM) { u32 msg_id, off; void *data; - msg_id = imm_payload >> sess->mem_bits; - off = imm_payload & ((1 << sess->mem_bits) - 1); - if (unlikely(msg_id >= srv->queue_depth || - off >= max_chunk_size)) { + msg_id = imm_payload >> srv_path->mem_bits; + off = imm_payload & ((1 << srv_path->mem_bits) - 1); + if (msg_id >= srv->queue_depth || off >= max_chunk_size) { rtrs_err(s, "Wrong msg_id %u, off %u\n", msg_id, off); - close_sess(sess); + close_path(srv_path); return; } if (always_invalidate) { - struct rtrs_srv_mr *mr = &sess->mrs[msg_id]; + struct rtrs_srv_mr *mr = &srv_path->mrs[msg_id]; mr->msg_off = off; mr->msg_id = msg_id; err = rtrs_srv_inv_rkey(con, mr); - if (unlikely(err)) { + if (err) { rtrs_err(s, "rtrs_post_recv(), err: %d\n", err); - close_sess(sess); + close_path(srv_path); break; } } else { @@ -1239,10 +1284,10 @@ static void rtrs_srv_rdma_done(struct ib_cq *cq, struct ib_wc *wc) } } else if (imm_type == RTRS_HB_MSG_IMM) { WARN_ON(con->c.cid); - rtrs_send_hb_ack(&sess->s); + rtrs_send_hb_ack(&srv_path->s); } else if (imm_type == RTRS_HB_ACK_IMM) { WARN_ON(con->c.cid); - sess->s.hb_missed_cnt = 0; + srv_path->s.hb_missed_cnt = 0; } else { rtrs_wrn(s, "Unknown IMM type %u\n", imm_type); } @@ -1251,11 +1296,11 @@ static void rtrs_srv_rdma_done(struct ib_cq *cq, struct ib_wc *wc) case IB_WC_SEND: /* * post_send() RDMA write completions of IO reqs (read/write) - * and hb + * and hb. */ - atomic_add(srv->queue_depth, &con->sq_wr_avail); + atomic_add(s->signal_interval, &con->c.sq_wr_avail); - if (unlikely(!list_empty_careful(&con->rsp_wr_wait_list))) + if (!list_empty_careful(&con->rsp_wr_wait_list)) rtrs_rdma_process_wr_wait_list(con); break; @@ -1266,22 +1311,23 @@ static void rtrs_srv_rdma_done(struct ib_cq *cq, struct ib_wc *wc) } /** - * rtrs_srv_get_sess_name() - Get rtrs_srv peer hostname. + * rtrs_srv_get_path_name() - Get rtrs_srv peer hostname. * @srv: Session - * @sessname: Sessname buffer + * @pathname: Pathname buffer * @len: Length of sessname buffer */ -int rtrs_srv_get_sess_name(struct rtrs_srv *srv, char *sessname, size_t len) +int rtrs_srv_get_path_name(struct rtrs_srv_sess *srv, char *pathname, + size_t len) { - struct rtrs_srv_sess *sess; + struct rtrs_srv_path *srv_path; int err = -ENOTCONN; mutex_lock(&srv->paths_mutex); - list_for_each_entry(sess, &srv->paths_list, s.entry) { - if (sess->state != RTRS_SRV_CONNECTED) + list_for_each_entry(srv_path, &srv->paths_list, s.entry) { + if (srv_path->state != RTRS_SRV_CONNECTED) continue; - strlcpy(sessname, sess->s.sessname, - min_t(size_t, sizeof(sess->s.sessname), len)); + strscpy(pathname, srv_path->s.sessname, + min_t(size_t, sizeof(srv_path->s.sessname), len)); err = 0; break; } @@ -1289,53 +1335,96 @@ int rtrs_srv_get_sess_name(struct rtrs_srv *srv, char *sessname, size_t len) return err; } -EXPORT_SYMBOL(rtrs_srv_get_sess_name); +EXPORT_SYMBOL(rtrs_srv_get_path_name); /** - * rtrs_srv_get_sess_qdepth() - Get rtrs_srv qdepth. + * rtrs_srv_get_queue_depth() - Get rtrs_srv qdepth. * @srv: Session */ -int rtrs_srv_get_queue_depth(struct rtrs_srv *srv) +int rtrs_srv_get_queue_depth(struct rtrs_srv_sess *srv) { return srv->queue_depth; } EXPORT_SYMBOL(rtrs_srv_get_queue_depth); -static int find_next_bit_ring(struct rtrs_srv_sess *sess) +static int find_next_bit_ring(struct rtrs_srv_path *srv_path) { - struct ib_device *ib_dev = sess->s.dev->ib_dev; + struct ib_device *ib_dev = srv_path->s.dev->ib_dev; int v; - v = cpumask_next(sess->cur_cq_vector, &cq_affinity_mask); + v = cpumask_next(srv_path->cur_cq_vector, &cq_affinity_mask); if (v >= nr_cpu_ids || v >= ib_dev->num_comp_vectors) v = cpumask_first(&cq_affinity_mask); return v; } -static int rtrs_srv_get_next_cq_vector(struct rtrs_srv_sess *sess) +static int rtrs_srv_get_next_cq_vector(struct rtrs_srv_path *srv_path) { - sess->cur_cq_vector = find_next_bit_ring(sess); + srv_path->cur_cq_vector = find_next_bit_ring(srv_path); - return sess->cur_cq_vector; + return srv_path->cur_cq_vector; } -static struct rtrs_srv *__alloc_srv(struct rtrs_srv_ctx *ctx, - const uuid_t *paths_uuid) +static void rtrs_srv_dev_release(struct device *dev) +{ + struct rtrs_srv_sess *srv = container_of(dev, struct rtrs_srv_sess, + dev); + + kfree(srv); +} + +static void free_srv(struct rtrs_srv_sess *srv) { - struct rtrs_srv *srv; int i; + WARN_ON(refcount_read(&srv->refcount)); + for (i = 0; i < srv->queue_depth; i++) + __free_pages(srv->chunks[i], get_order(max_chunk_size)); + kfree(srv->chunks); + mutex_destroy(&srv->paths_mutex); + mutex_destroy(&srv->paths_ev_mutex); + /* last put to release the srv structure */ + put_device(&srv->dev); +} + +static struct rtrs_srv_sess *get_or_create_srv(struct rtrs_srv_ctx *ctx, + const uuid_t *paths_uuid, + bool first_conn) +{ + struct rtrs_srv_sess *srv; + int i; + + mutex_lock(&ctx->srv_mutex); + list_for_each_entry(srv, &ctx->srv_list, ctx_list) { + if (uuid_equal(&srv->paths_uuid, paths_uuid) && + refcount_inc_not_zero(&srv->refcount)) { + mutex_unlock(&ctx->srv_mutex); + return srv; + } + } + mutex_unlock(&ctx->srv_mutex); + /* + * If this request is not the first connection request from the + * client for this session then fail and return error. + */ + if (!first_conn) { + pr_err_ratelimited("Error: Not the first connection request for this session\n"); + return ERR_PTR(-ENXIO); + } + + /* need to allocate a new srv */ srv = kzalloc(sizeof(*srv), GFP_KERNEL); if (!srv) - return NULL; + return ERR_PTR(-ENOMEM); - refcount_set(&srv->refcount, 1); INIT_LIST_HEAD(&srv->paths_list); mutex_init(&srv->paths_mutex); mutex_init(&srv->paths_ev_mutex); uuid_copy(&srv->paths_uuid, paths_uuid); srv->queue_depth = sess_queue_depth; srv->ctx = ctx; + device_initialize(&srv->dev); + srv->dev.release = rtrs_srv_dev_release; srv->chunks = kcalloc(srv->queue_depth, sizeof(*srv->chunks), GFP_KERNEL); @@ -1343,68 +1432,29 @@ static struct rtrs_srv *__alloc_srv(struct rtrs_srv_ctx *ctx, goto err_free_srv; for (i = 0; i < srv->queue_depth; i++) { - srv->chunks[i] = mempool_alloc(chunk_pool, GFP_KERNEL); + srv->chunks[i] = alloc_pages(GFP_KERNEL, + get_order(max_chunk_size)); if (!srv->chunks[i]) goto err_free_chunks; } + refcount_set(&srv->refcount, 1); + mutex_lock(&ctx->srv_mutex); list_add(&srv->ctx_list, &ctx->srv_list); + mutex_unlock(&ctx->srv_mutex); return srv; err_free_chunks: while (i--) - mempool_free(srv->chunks[i], chunk_pool); + __free_pages(srv->chunks[i], get_order(max_chunk_size)); kfree(srv->chunks); err_free_srv: - kfree(srv); - - return NULL; -} - -static void free_srv(struct rtrs_srv *srv) -{ - int i; - - WARN_ON(refcount_read(&srv->refcount)); - for (i = 0; i < srv->queue_depth; i++) - mempool_free(srv->chunks[i], chunk_pool); - kfree(srv->chunks); - mutex_destroy(&srv->paths_mutex); - mutex_destroy(&srv->paths_ev_mutex); - /* last put to release the srv structure */ put_device(&srv->dev); + return ERR_PTR(-ENOMEM); } -static inline struct rtrs_srv *__find_srv_and_get(struct rtrs_srv_ctx *ctx, - const uuid_t *paths_uuid) -{ - struct rtrs_srv *srv; - - list_for_each_entry(srv, &ctx->srv_list, ctx_list) { - if (uuid_equal(&srv->paths_uuid, paths_uuid) && - refcount_inc_not_zero(&srv->refcount)) - return srv; - } - - return NULL; -} - -static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx, - const uuid_t *paths_uuid) -{ - struct rtrs_srv *srv; - - mutex_lock(&ctx->srv_mutex); - srv = __find_srv_and_get(ctx, paths_uuid); - if (!srv) - srv = __alloc_srv(ctx, paths_uuid); - mutex_unlock(&ctx->srv_mutex); - - return srv; -} - -static void put_srv(struct rtrs_srv *srv) +static void put_srv(struct rtrs_srv_sess *srv) { if (refcount_dec_and_test(&srv->refcount)) { struct rtrs_srv_ctx *ctx = srv->ctx; @@ -1418,23 +1468,23 @@ static void put_srv(struct rtrs_srv *srv) } } -static void __add_path_to_srv(struct rtrs_srv *srv, - struct rtrs_srv_sess *sess) +static void __add_path_to_srv(struct rtrs_srv_sess *srv, + struct rtrs_srv_path *srv_path) { - list_add_tail(&sess->s.entry, &srv->paths_list); + list_add_tail(&srv_path->s.entry, &srv->paths_list); srv->paths_num++; WARN_ON(srv->paths_num >= MAX_PATHS_NUM); } -static void del_path_from_srv(struct rtrs_srv_sess *sess) +static void del_path_from_srv(struct rtrs_srv_path *srv_path) { - struct rtrs_srv *srv = sess->srv; + struct rtrs_srv_sess *srv = srv_path->srv; if (WARN_ON(!srv)) return; mutex_lock(&srv->paths_mutex); - list_del(&sess->s.entry); + list_del(&srv_path->s.entry); WARN_ON(!srv->paths_num); srv->paths_num--; mutex_unlock(&srv->paths_mutex); @@ -1464,80 +1514,92 @@ static int sockaddr_cmp(const struct sockaddr *a, const struct sockaddr *b) } } -static bool __is_path_w_addr_exists(struct rtrs_srv *srv, +static bool __is_path_w_addr_exists(struct rtrs_srv_sess *srv, struct rdma_addr *addr) { - struct rtrs_srv_sess *sess; + struct rtrs_srv_path *srv_path; - list_for_each_entry(sess, &srv->paths_list, s.entry) - if (!sockaddr_cmp((struct sockaddr *)&sess->s.dst_addr, + list_for_each_entry(srv_path, &srv->paths_list, s.entry) + if (!sockaddr_cmp((struct sockaddr *)&srv_path->s.dst_addr, (struct sockaddr *)&addr->dst_addr) && - !sockaddr_cmp((struct sockaddr *)&sess->s.src_addr, + !sockaddr_cmp((struct sockaddr *)&srv_path->s.src_addr, (struct sockaddr *)&addr->src_addr)) return true; return false; } -static void free_sess(struct rtrs_srv_sess *sess) +static void free_path(struct rtrs_srv_path *srv_path) { - if (sess->kobj.state_in_sysfs) - kobject_put(&sess->kobj); - else - kfree(sess); + if (srv_path->kobj.state_in_sysfs) { + kobject_del(&srv_path->kobj); + kobject_put(&srv_path->kobj); + } else { + free_percpu(srv_path->stats->rdma_stats); + kfree(srv_path->stats); + kfree(srv_path); + } } static void rtrs_srv_close_work(struct work_struct *work) { - struct rtrs_srv_sess *sess; + struct rtrs_srv_path *srv_path; struct rtrs_srv_con *con; int i; - sess = container_of(work, typeof(*sess), close_work); + srv_path = container_of(work, typeof(*srv_path), close_work); - rtrs_srv_destroy_sess_files(sess); - rtrs_srv_stop_hb(sess); + rtrs_srv_stop_hb(srv_path); - for (i = 0; i < sess->s.con_num; i++) { - if (!sess->s.con[i]) + for (i = 0; i < srv_path->s.con_num; i++) { + if (!srv_path->s.con[i]) continue; - con = to_srv_con(sess->s.con[i]); + con = to_srv_con(srv_path->s.con[i]); rdma_disconnect(con->c.cm_id); ib_drain_qp(con->c.qp); } - /* Wait for all inflights */ - rtrs_srv_wait_ops_ids(sess); + + /* + * Degrade ref count to the usual model with a single shared + * atomic_t counter + */ + percpu_ref_kill(&srv_path->ids_inflight_ref); + + /* Wait for all completion */ + wait_for_completion(&srv_path->complete_done); + + rtrs_srv_destroy_path_files(srv_path); /* Notify upper layer if we are the last path */ - rtrs_srv_sess_down(sess); + rtrs_srv_path_down(srv_path); - unmap_cont_bufs(sess); - rtrs_srv_free_ops_ids(sess); + unmap_cont_bufs(srv_path); + rtrs_srv_free_ops_ids(srv_path); - for (i = 0; i < sess->s.con_num; i++) { - if (!sess->s.con[i]) + for (i = 0; i < srv_path->s.con_num; i++) { + if (!srv_path->s.con[i]) continue; - con = to_srv_con(sess->s.con[i]); + con = to_srv_con(srv_path->s.con[i]); rtrs_cq_qp_destroy(&con->c); rdma_destroy_id(con->c.cm_id); kfree(con); } - rtrs_ib_dev_put(sess->s.dev); + rtrs_ib_dev_put(srv_path->s.dev); - del_path_from_srv(sess); - put_srv(sess->srv); - sess->srv = NULL; - rtrs_srv_change_state(sess, RTRS_SRV_CLOSED); + del_path_from_srv(srv_path); + put_srv(srv_path->srv); + srv_path->srv = NULL; + rtrs_srv_change_state(srv_path, RTRS_SRV_CLOSED); - kfree(sess->dma_addr); - kfree(sess->s.con); - free_sess(sess); + kfree(srv_path->dma_addr); + kfree(srv_path->s.con); + free_path(srv_path); } -static int rtrs_rdma_do_accept(struct rtrs_srv_sess *sess, +static int rtrs_rdma_do_accept(struct rtrs_srv_path *srv_path, struct rdma_cm_id *cm_id) { - struct rtrs_srv *srv = sess->srv; + struct rtrs_srv_sess *srv = srv_path->srv; struct rtrs_msg_conn_rsp msg; struct rdma_conn_param param; int err; @@ -1585,28 +1647,28 @@ static int rtrs_rdma_do_reject(struct rdma_cm_id *cm_id, int errno) return errno; } -static struct rtrs_srv_sess * -__find_sess(struct rtrs_srv *srv, const uuid_t *sess_uuid) +static struct rtrs_srv_path * +__find_path(struct rtrs_srv_sess *srv, const uuid_t *sess_uuid) { - struct rtrs_srv_sess *sess; + struct rtrs_srv_path *srv_path; - list_for_each_entry(sess, &srv->paths_list, s.entry) { - if (uuid_equal(&sess->s.uuid, sess_uuid)) - return sess; + list_for_each_entry(srv_path, &srv->paths_list, s.entry) { + if (uuid_equal(&srv_path->s.uuid, sess_uuid)) + return srv_path; } return NULL; } -static int create_con(struct rtrs_srv_sess *sess, +static int create_con(struct rtrs_srv_path *srv_path, struct rdma_cm_id *cm_id, unsigned int cid) { - struct rtrs_srv *srv = sess->srv; - struct rtrs_sess *s = &sess->s; + struct rtrs_srv_sess *srv = srv_path->srv; + struct rtrs_path *s = &srv_path->s; struct rtrs_srv_con *con; - u16 cq_size, wr_queue_size; + u32 cq_num, max_send_wr, max_recv_wr, wr_limit; int err, cq_vector; con = kzalloc(sizeof(*con), GFP_KERNEL); @@ -1618,39 +1680,42 @@ static int create_con(struct rtrs_srv_sess *sess, spin_lock_init(&con->rsp_wr_wait_lock); INIT_LIST_HEAD(&con->rsp_wr_wait_list); con->c.cm_id = cm_id; - con->c.sess = &sess->s; + con->c.path = &srv_path->s; con->c.cid = cid; - atomic_set(&con->wr_cnt, 0); + atomic_set(&con->c.wr_cnt, 1); + wr_limit = srv_path->s.dev->ib_dev->attrs.max_qp_wr; if (con->c.cid == 0) { /* * All receive and all send (each requiring invalidate) * + 2 for drain and heartbeat */ - wr_queue_size = SERVICE_CON_QUEUE_DEPTH * 3 + 2; - cq_size = wr_queue_size; + max_send_wr = min_t(int, wr_limit, + SERVICE_CON_QUEUE_DEPTH * 2 + 2); + max_recv_wr = max_send_wr; + s->signal_interval = min_not_zero(srv->queue_depth, + (size_t)SERVICE_CON_QUEUE_DEPTH); } else { - /* - * If we have all receive requests posted and - * all write requests posted and each read request - * requires an invalidate request + drain - * and qp gets into error state. - */ - cq_size = srv->queue_depth * 3 + 1; - /* - * In theory we might have queue_depth * 32 - * outstanding requests if an unsafe global key is used - * and we have queue_depth read requests each consisting - * of 32 different addresses. div 3 for mlx5. - */ - wr_queue_size = sess->s.dev->ib_dev->attrs.max_qp_wr / 3; + /* when always_invlaidate enalbed, we need linv+rinv+mr+imm */ + if (always_invalidate) + max_send_wr = + min_t(int, wr_limit, + srv->queue_depth * (1 + 4) + 1); + else + max_send_wr = + min_t(int, wr_limit, + srv->queue_depth * (1 + 2) + 1); + + max_recv_wr = srv->queue_depth + 1; } - atomic_set(&con->sq_wr_avail, wr_queue_size); - cq_vector = rtrs_srv_get_next_cq_vector(sess); + cq_num = max_send_wr + max_recv_wr; + atomic_set(&con->c.sq_wr_avail, max_send_wr); + cq_vector = rtrs_srv_get_next_cq_vector(srv_path); /* TODO: SOFTIRQ can be faster, but be careful with softirq context */ - err = rtrs_cq_qp_create(&sess->s, &con->c, 1, cq_vector, cq_size, - wr_queue_size, IB_POLL_WORKQUEUE); + err = rtrs_cq_qp_create(&srv_path->s, &con->c, 1, cq_vector, cq_num, + max_send_wr, max_recv_wr, + IB_POLL_WORKQUEUE); if (err) { rtrs_err(s, "rtrs_cq_qp_create(), err: %d\n", err); goto free_con; @@ -1660,8 +1725,8 @@ static int create_con(struct rtrs_srv_sess *sess, if (err) goto free_cqqp; } - WARN_ON(sess->s.con[cid]); - sess->s.con[cid] = &con->c; + WARN_ON(srv_path->s.con[cid]); + srv_path->s.con[cid] = &con->c; /* * Change context from server to current connection. The other @@ -1680,14 +1745,16 @@ err: return err; } -static struct rtrs_srv_sess *__alloc_sess(struct rtrs_srv *srv, +static struct rtrs_srv_path *__alloc_path(struct rtrs_srv_sess *srv, struct rdma_cm_id *cm_id, unsigned int con_num, unsigned int recon_cnt, const uuid_t *uuid) { - struct rtrs_srv_sess *sess; + struct rtrs_srv_path *srv_path; int err = -ENOMEM; + char str[NAME_MAX]; + struct rtrs_addr path; if (srv->paths_num >= MAX_PATHS_NUM) { err = -ECONNRESET; @@ -1698,66 +1765,82 @@ static struct rtrs_srv_sess *__alloc_sess(struct rtrs_srv *srv, pr_err("Path with same addr exists\n"); goto err; } - sess = kzalloc(sizeof(*sess), GFP_KERNEL); - if (!sess) + srv_path = kzalloc(sizeof(*srv_path), GFP_KERNEL); + if (!srv_path) goto err; - sess->stats = kzalloc(sizeof(*sess->stats), GFP_KERNEL); - if (!sess->stats) + srv_path->stats = kzalloc(sizeof(*srv_path->stats), GFP_KERNEL); + if (!srv_path->stats) goto err_free_sess; - sess->stats->sess = sess; - - sess->dma_addr = kcalloc(srv->queue_depth, sizeof(*sess->dma_addr), - GFP_KERNEL); - if (!sess->dma_addr) + srv_path->stats->rdma_stats = alloc_percpu(struct rtrs_srv_stats_rdma_stats); + if (!srv_path->stats->rdma_stats) goto err_free_stats; - sess->s.con = kcalloc(con_num, sizeof(*sess->s.con), GFP_KERNEL); - if (!sess->s.con) + srv_path->stats->srv_path = srv_path; + + srv_path->dma_addr = kcalloc(srv->queue_depth, + sizeof(*srv_path->dma_addr), + GFP_KERNEL); + if (!srv_path->dma_addr) + goto err_free_percpu; + + srv_path->s.con = kcalloc(con_num, sizeof(*srv_path->s.con), + GFP_KERNEL); + if (!srv_path->s.con) goto err_free_dma_addr; - sess->state = RTRS_SRV_CONNECTING; - sess->srv = srv; - sess->cur_cq_vector = -1; - sess->s.dst_addr = cm_id->route.addr.dst_addr; - sess->s.src_addr = cm_id->route.addr.src_addr; - sess->s.con_num = con_num; - sess->s.recon_cnt = recon_cnt; - uuid_copy(&sess->s.uuid, uuid); - spin_lock_init(&sess->state_lock); - INIT_WORK(&sess->close_work, rtrs_srv_close_work); - rtrs_srv_init_hb(sess); - - sess->s.dev = rtrs_ib_dev_find_or_add(cm_id->device, &dev_pd); - if (!sess->s.dev) { + srv_path->state = RTRS_SRV_CONNECTING; + srv_path->srv = srv; + srv_path->cur_cq_vector = -1; + srv_path->s.dst_addr = cm_id->route.addr.dst_addr; + srv_path->s.src_addr = cm_id->route.addr.src_addr; + + /* temporary until receiving session-name from client */ + path.src = &srv_path->s.src_addr; + path.dst = &srv_path->s.dst_addr; + rtrs_addr_to_str(&path, str, sizeof(str)); + strscpy(srv_path->s.sessname, str, sizeof(srv_path->s.sessname)); + + srv_path->s.con_num = con_num; + srv_path->s.irq_con_num = con_num; + srv_path->s.recon_cnt = recon_cnt; + uuid_copy(&srv_path->s.uuid, uuid); + spin_lock_init(&srv_path->state_lock); + INIT_WORK(&srv_path->close_work, rtrs_srv_close_work); + rtrs_srv_init_hb(srv_path); + + srv_path->s.dev = rtrs_ib_dev_find_or_add(cm_id->device, &dev_pd); + if (!srv_path->s.dev) { err = -ENOMEM; goto err_free_con; } - err = map_cont_bufs(sess); + err = map_cont_bufs(srv_path); if (err) goto err_put_dev; - err = rtrs_srv_alloc_ops_ids(sess); + err = rtrs_srv_alloc_ops_ids(srv_path); if (err) goto err_unmap_bufs; - __add_path_to_srv(srv, sess); + __add_path_to_srv(srv, srv_path); - return sess; + return srv_path; err_unmap_bufs: - unmap_cont_bufs(sess); + unmap_cont_bufs(srv_path); err_put_dev: - rtrs_ib_dev_put(sess->s.dev); + rtrs_ib_dev_put(srv_path->s.dev); err_free_con: - kfree(sess->s.con); + kfree(srv_path->s.con); err_free_dma_addr: - kfree(sess->dma_addr); + kfree(srv_path->dma_addr); +err_free_percpu: + free_percpu(srv_path->stats->rdma_stats); err_free_stats: - kfree(sess->stats); + kfree(srv_path->stats); err_free_sess: - kfree(sess); + kfree(srv_path); err: return ERR_PTR(err); } @@ -1767,58 +1850,59 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id, size_t len) { struct rtrs_srv_ctx *ctx = cm_id->context; - struct rtrs_srv_sess *sess; - struct rtrs_srv *srv; + struct rtrs_srv_path *srv_path; + struct rtrs_srv_sess *srv; u16 version, con_num, cid; u16 recon_cnt; - int err; + int err = -ECONNRESET; if (len < sizeof(*msg)) { pr_err("Invalid RTRS connection request\n"); - goto reject_w_econnreset; + goto reject_w_err; } if (le16_to_cpu(msg->magic) != RTRS_MAGIC) { pr_err("Invalid RTRS magic\n"); - goto reject_w_econnreset; + goto reject_w_err; } version = le16_to_cpu(msg->version); if (version >> 8 != RTRS_PROTO_VER_MAJOR) { pr_err("Unsupported major RTRS version: %d, expected %d\n", version >> 8, RTRS_PROTO_VER_MAJOR); - goto reject_w_econnreset; + goto reject_w_err; } con_num = le16_to_cpu(msg->cid_num); if (con_num > 4096) { /* Sanity check */ pr_err("Too many connections requested: %d\n", con_num); - goto reject_w_econnreset; + goto reject_w_err; } cid = le16_to_cpu(msg->cid); if (cid >= con_num) { /* Sanity check */ pr_err("Incorrect cid: %d >= %d\n", cid, con_num); - goto reject_w_econnreset; + goto reject_w_err; } recon_cnt = le16_to_cpu(msg->recon_cnt); - srv = get_or_create_srv(ctx, &msg->paths_uuid); - if (!srv) { - err = -ENOMEM; + srv = get_or_create_srv(ctx, &msg->paths_uuid, msg->first_conn); + if (IS_ERR(srv)) { + err = PTR_ERR(srv); + pr_err("get_or_create_srv(), error %d\n", err); goto reject_w_err; } mutex_lock(&srv->paths_mutex); - sess = __find_sess(srv, &msg->sess_uuid); - if (sess) { - struct rtrs_sess *s = &sess->s; + srv_path = __find_path(srv, &msg->sess_uuid); + if (srv_path) { + struct rtrs_path *s = &srv_path->s; /* Session already holds a reference */ put_srv(srv); - if (sess->state != RTRS_SRV_CONNECTING) { + if (srv_path->state != RTRS_SRV_CONNECTING) { rtrs_err(s, "Session in wrong state: %s\n", - rtrs_srv_state_str(sess->state)); + rtrs_srv_state_str(srv_path->state)); mutex_unlock(&srv->paths_mutex); - goto reject_w_econnreset; + goto reject_w_err; } /* * Sanity checks @@ -1827,27 +1911,29 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id, rtrs_err(s, "Incorrect request: %d, %d\n", cid, con_num); mutex_unlock(&srv->paths_mutex); - goto reject_w_econnreset; + goto reject_w_err; } if (s->con[cid]) { rtrs_err(s, "Connection already exists: %d\n", cid); mutex_unlock(&srv->paths_mutex); - goto reject_w_econnreset; + goto reject_w_err; } } else { - sess = __alloc_sess(srv, cm_id, con_num, recon_cnt, + srv_path = __alloc_path(srv, cm_id, con_num, recon_cnt, &msg->sess_uuid); - if (IS_ERR(sess)) { + if (IS_ERR(srv_path)) { mutex_unlock(&srv->paths_mutex); put_srv(srv); - err = PTR_ERR(sess); + err = PTR_ERR(srv_path); + pr_err("RTRS server session allocation failed: %d\n", err); goto reject_w_err; } } - err = create_con(sess, cm_id, cid); + err = create_con(srv_path, cm_id, cid); if (err) { - (void)rtrs_rdma_do_reject(cm_id, err); + rtrs_err((&srv_path->s), "create_con(), error %d\n", err); + rtrs_rdma_do_reject(cm_id, err); /* * Since session has other connections we follow normal way * through workqueue, but still return an error to tell cma.c @@ -1855,9 +1941,10 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id, */ goto close_and_return_err; } - err = rtrs_rdma_do_accept(sess, cm_id); + err = rtrs_rdma_do_accept(srv_path, cm_id); if (err) { - (void)rtrs_rdma_do_reject(cm_id, err); + rtrs_err((&srv_path->s), "rtrs_rdma_do_accept(), error %d\n", err); + rtrs_rdma_do_reject(cm_id, err); /* * Since current connection was successfully added to the * session we follow normal way through workqueue to close the @@ -1874,12 +1961,9 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id, reject_w_err: return rtrs_rdma_do_reject(cm_id, err); -reject_w_econnreset: - return rtrs_rdma_do_reject(cm_id, -ECONNRESET); - close_and_return_err: - close_sess(sess); mutex_unlock(&srv->paths_mutex); + close_path(srv_path); return err; } @@ -1887,24 +1971,23 @@ close_and_return_err: static int rtrs_srv_rdma_cm_handler(struct rdma_cm_id *cm_id, struct rdma_cm_event *ev) { - struct rtrs_srv_sess *sess = NULL; - struct rtrs_sess *s = NULL; + struct rtrs_srv_path *srv_path = NULL; + struct rtrs_path *s = NULL; + struct rtrs_con *c = NULL; - if (ev->event != RDMA_CM_EVENT_CONNECT_REQUEST) { - struct rtrs_con *c = cm_id->context; - - s = c->sess; - sess = to_srv_sess(s); - } - - switch (ev->event) { - case RDMA_CM_EVENT_CONNECT_REQUEST: + if (ev->event == RDMA_CM_EVENT_CONNECT_REQUEST) /* * In case of error cma.c will destroy cm_id, * see cma_process_remove() */ return rtrs_rdma_connect(cm_id, ev->param.conn.private_data, ev->param.conn.private_data_len); + + c = cm_id->context; + s = c->path; + srv_path = to_srv_path(s); + + switch (ev->event) { case RDMA_CM_EVENT_ESTABLISHED: /* Nothing here */ break; @@ -1913,15 +1996,12 @@ static int rtrs_srv_rdma_cm_handler(struct rdma_cm_id *cm_id, case RDMA_CM_EVENT_UNREACHABLE: rtrs_err(s, "CM error (CM event: %s, err: %d)\n", rdma_event_msg(ev->event), ev->status); - close_sess(sess); - break; + fallthrough; case RDMA_CM_EVENT_DISCONNECTED: case RDMA_CM_EVENT_ADDR_CHANGE: case RDMA_CM_EVENT_TIMEWAIT_EXIT: - close_sess(sess); - break; case RDMA_CM_EVENT_DEVICE_REMOVAL: - close_sess(sess); + close_path(srv_path); break; default: pr_err("Ignoring unexpected CM event %s, err %d\n", @@ -2033,6 +2113,70 @@ static void free_srv_ctx(struct rtrs_srv_ctx *ctx) kfree(ctx); } +static int rtrs_srv_add_one(struct ib_device *device) +{ + struct rtrs_srv_ctx *ctx; + int ret = 0; + + mutex_lock(&ib_ctx.ib_dev_mutex); + if (ib_ctx.ib_dev_count) + goto out; + + /* + * Since our CM IDs are NOT bound to any ib device we will create them + * only once + */ + ctx = ib_ctx.srv_ctx; + ret = rtrs_srv_rdma_init(ctx, ib_ctx.port); + if (ret) { + /* + * We errored out here. + * According to the ib code, if we encounter an error here then the + * error code is ignored, and no more calls to our ops are made. + */ + pr_err("Failed to initialize RDMA connection"); + goto err_out; + } + +out: + /* + * Keep a track on the number of ib devices added + */ + ib_ctx.ib_dev_count++; + +err_out: + mutex_unlock(&ib_ctx.ib_dev_mutex); + return ret; +} + +static void rtrs_srv_remove_one(struct ib_device *device, void *client_data) +{ + struct rtrs_srv_ctx *ctx; + + mutex_lock(&ib_ctx.ib_dev_mutex); + ib_ctx.ib_dev_count--; + + if (ib_ctx.ib_dev_count) + goto out; + + /* + * Since our CM IDs are NOT bound to any ib device we will remove them + * only once, when the last device is removed + */ + ctx = ib_ctx.srv_ctx; + rdma_destroy_id(ctx->cm_id_ip); + rdma_destroy_id(ctx->cm_id_ib); + +out: + mutex_unlock(&ib_ctx.ib_dev_mutex); +} + +static struct ib_client rtrs_srv_client = { + .name = "rtrs_server", + .add = rtrs_srv_add_one, + .remove = rtrs_srv_remove_one +}; + /** * rtrs_srv_open() - open RTRS server context * @ops: callback functions @@ -2051,7 +2195,11 @@ struct rtrs_srv_ctx *rtrs_srv_open(struct rtrs_srv_ops *ops, u16 port) if (!ctx) return ERR_PTR(-ENOMEM); - err = rtrs_srv_rdma_init(ctx, port); + mutex_init(&ib_ctx.ib_dev_mutex); + ib_ctx.srv_ctx = ctx; + ib_ctx.port = port; + + err = ib_register_client(&rtrs_srv_client); if (err) { free_srv_ctx(ctx); return ERR_PTR(err); @@ -2061,23 +2209,23 @@ struct rtrs_srv_ctx *rtrs_srv_open(struct rtrs_srv_ops *ops, u16 port) } EXPORT_SYMBOL(rtrs_srv_open); -static void close_sessions(struct rtrs_srv *srv) +static void close_paths(struct rtrs_srv_sess *srv) { - struct rtrs_srv_sess *sess; + struct rtrs_srv_path *srv_path; mutex_lock(&srv->paths_mutex); - list_for_each_entry(sess, &srv->paths_list, s.entry) - close_sess(sess); + list_for_each_entry(srv_path, &srv->paths_list, s.entry) + close_path(srv_path); mutex_unlock(&srv->paths_mutex); } static void close_ctx(struct rtrs_srv_ctx *ctx) { - struct rtrs_srv *srv; + struct rtrs_srv_sess *srv; mutex_lock(&ctx->srv_mutex); list_for_each_entry(srv, &ctx->srv_list, ctx_list) - close_sessions(srv); + close_paths(srv); mutex_unlock(&ctx->srv_mutex); flush_workqueue(rtrs_wq); } @@ -2090,8 +2238,8 @@ static void close_ctx(struct rtrs_srv_ctx *ctx) */ void rtrs_srv_close(struct rtrs_srv_ctx *ctx) { - rdma_destroy_id(ctx->cm_id_ip); - rdma_destroy_id(ctx->cm_id_ib); + ib_unregister_client(&rtrs_srv_client); + mutex_destroy(&ib_ctx.ib_dev_mutex); close_ctx(ctx); free_srv_ctx(ctx); } @@ -2104,9 +2252,9 @@ static int check_module_params(void) sess_queue_depth, 1, MAX_SESS_QUEUE_DEPTH); return -EINVAL; } - if (max_chunk_size < 4096 || !is_power_of_2(max_chunk_size)) { + if (max_chunk_size < MIN_CHUNK_SIZE || !is_power_of_2(max_chunk_size)) { pr_err("Invalid max_chunk_size value %d, has to be >= %d and should be power of two.\n", - max_chunk_size, 4096); + max_chunk_size, MIN_CHUNK_SIZE); return -EINVAL; } @@ -2124,6 +2272,34 @@ static int check_module_params(void) return 0; } +void rtrs_srv_ib_event_handler(struct ib_event_handler *handler, + struct ib_event *ibevent) +{ + pr_info("Handling event: %s (%d).\n", ib_event_msg(ibevent->event), + ibevent->event); +} + +static int rtrs_srv_ib_dev_init(struct rtrs_ib_dev *dev) +{ + INIT_IB_EVENT_HANDLER(&dev->event_handler, dev->ib_dev, + rtrs_srv_ib_event_handler); + ib_register_event_handler(&dev->event_handler); + + return 0; +} + +static void rtrs_srv_ib_dev_deinit(struct rtrs_ib_dev *dev) +{ + ib_unregister_event_handler(&dev->event_handler); +} + + +static const struct rtrs_rdma_dev_pd_ops dev_pd_ops = { + .init = rtrs_srv_ib_dev_init, + .deinit = rtrs_srv_ib_dev_deinit +}; + + static int __init rtrs_server_init(void) { int err; @@ -2141,16 +2317,11 @@ static int __init rtrs_server_init(void) err); return err; } - chunk_pool = mempool_create_page_pool(sess_queue_depth * CHUNK_POOL_SZ, - get_order(max_chunk_size)); - if (!chunk_pool) - return -ENOMEM; - rtrs_dev_class = class_create(THIS_MODULE, "rtrs-server"); - if (IS_ERR(rtrs_dev_class)) { - err = PTR_ERR(rtrs_dev_class); - goto out_chunk_pool; - } - rtrs_wq = alloc_workqueue("rtrs_server_wq", WQ_MEM_RECLAIM, 0); + err = class_register(&rtrs_dev_class); + if (err) + goto out_err; + + rtrs_wq = alloc_workqueue("rtrs_server_wq", 0, 0); if (!rtrs_wq) { err = -ENOMEM; goto out_dev_class; @@ -2159,18 +2330,15 @@ static int __init rtrs_server_init(void) return 0; out_dev_class: - class_destroy(rtrs_dev_class); -out_chunk_pool: - mempool_destroy(chunk_pool); - + class_unregister(&rtrs_dev_class); +out_err: return err; } static void __exit rtrs_server_exit(void) { destroy_workqueue(rtrs_wq); - class_destroy(rtrs_dev_class); - mempool_destroy(chunk_pool); + class_unregister(&rtrs_dev_class); rtrs_rdma_dev_pd_deinit(&dev_pd); } |
