summaryrefslogtreecommitdiff
path: root/net/sunrpc/xprtrdma
diff options
context:
space:
mode:
authorChuck Lever <chuck.lever@oracle.com>2024-02-04 18:17:47 -0500
committerChuck Lever <chuck.lever@oracle.com>2024-03-01 09:12:30 -0500
commite084ee673c77cade06ab4c2e36b5624c82608b8c (patch)
tree8d33291d88020354c7c06bdd4abd95f0e7c8b56f /net/sunrpc/xprtrdma
parentd2727cefff0204c3074a10e3ad2e1b5c9dfb986c (diff)
svcrdma: Add Write chunk WRs to the RPC's Send WR chain
Chain RDMA Writes that convey Write chunks onto the local Send chain. This means all WRs for an RPC Reply are now posted with a single ib_post_send() call, and there is a single Send completion when all of these are done. That reduces both the per-transport doorbell rate and completion rate. Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Diffstat (limited to 'net/sunrpc/xprtrdma')
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_rw.c86
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c5
2 files changed, 68 insertions, 23 deletions
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 40797114d50a..f2a100c4c81f 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -231,6 +231,28 @@ static void svc_rdma_write_info_free(struct svc_rdma_write_info *info)
}
/**
+ * svc_rdma_write_chunk_release - Release Write chunk I/O resources
+ * @rdma: controlling transport
+ * @ctxt: Send context that is being released
+ */
+void svc_rdma_write_chunk_release(struct svcxprt_rdma *rdma,
+ struct svc_rdma_send_ctxt *ctxt)
+{
+ struct svc_rdma_write_info *info;
+ struct svc_rdma_chunk_ctxt *cc;
+
+ while (!list_empty(&ctxt->sc_write_info_list)) {
+ info = list_first_entry(&ctxt->sc_write_info_list,
+ struct svc_rdma_write_info, wi_list);
+ list_del(&info->wi_list);
+
+ cc = &info->wi_cc;
+ svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount);
+ svc_rdma_write_info_free(info);
+ }
+}
+
+/**
* svc_rdma_reply_chunk_release - Release Reply chunk I/O resources
* @rdma: controlling transport
* @ctxt: Send context that is being released
@@ -286,13 +308,11 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
struct ib_cqe *cqe = wc->wr_cqe;
struct svc_rdma_chunk_ctxt *cc =
container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe);
- struct svc_rdma_write_info *info =
- container_of(cc, struct svc_rdma_write_info, wi_cc);
switch (wc->status) {
case IB_WC_SUCCESS:
trace_svcrdma_wc_write(&cc->cc_cid);
- break;
+ return;
case IB_WC_WR_FLUSH_ERR:
trace_svcrdma_wc_write_flush(wc, &cc->cc_cid);
break;
@@ -300,12 +320,11 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
trace_svcrdma_wc_write_err(wc, &cc->cc_cid);
}
- svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount);
-
- if (unlikely(wc->status != IB_WC_SUCCESS))
- svc_xprt_deferred_close(&rdma->sc_xprt);
-
- svc_rdma_write_info_free(info);
+ /* The RDMA Write has flushed, so the client won't get
+ * some of the outgoing RPC message. Signal the loss
+ * to the client by closing the connection.
+ */
+ svc_xprt_deferred_close(&rdma->sc_xprt);
}
/**
@@ -601,13 +620,19 @@ static int svc_rdma_xb_write(const struct xdr_buf *xdr, void *data)
return xdr->len;
}
-static int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma,
- const struct svc_rdma_chunk *chunk,
- const struct xdr_buf *xdr)
+/* Link Write WRs for @chunk onto @sctxt's WR chain.
+ */
+static int svc_rdma_prepare_write_chunk(struct svcxprt_rdma *rdma,
+ struct svc_rdma_send_ctxt *sctxt,
+ const struct svc_rdma_chunk *chunk,
+ const struct xdr_buf *xdr)
{
struct svc_rdma_write_info *info;
struct svc_rdma_chunk_ctxt *cc;
+ struct ib_send_wr *first_wr;
struct xdr_buf payload;
+ struct list_head *pos;
+ struct ib_cqe *cqe;
int ret;
if (xdr_buf_subsegment(xdr, &payload, chunk->ch_position,
@@ -623,10 +648,25 @@ static int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma,
if (ret != payload.len)
goto out_err;
- trace_svcrdma_post_write_chunk(&cc->cc_cid, cc->cc_sqecount);
- ret = svc_rdma_post_chunk_ctxt(rdma, cc);
- if (ret < 0)
+ ret = -EINVAL;
+ if (unlikely(cc->cc_sqecount > rdma->sc_sq_depth))
goto out_err;
+
+ first_wr = sctxt->sc_wr_chain;
+ cqe = &cc->cc_cqe;
+ list_for_each(pos, &cc->cc_rwctxts) {
+ struct svc_rdma_rw_ctxt *rwc;
+
+ rwc = list_entry(pos, struct svc_rdma_rw_ctxt, rw_list);
+ first_wr = rdma_rw_ctx_wrs(&rwc->rw_ctx, rdma->sc_qp,
+ rdma->sc_port_num, cqe, first_wr);
+ cqe = NULL;
+ }
+ sctxt->sc_wr_chain = first_wr;
+ sctxt->sc_sqecount += cc->cc_sqecount;
+ list_add(&info->wi_list, &sctxt->sc_write_info_list);
+
+ trace_svcrdma_post_write_chunk(&cc->cc_cid, cc->cc_sqecount);
return 0;
out_err:
@@ -635,25 +675,27 @@ out_err:
}
/**
- * svc_rdma_send_write_list - Send all chunks on the Write list
+ * svc_rdma_prepare_write_list - Construct WR chain for sending Write list
* @rdma: controlling RDMA transport
- * @rctxt: Write list provisioned by the client
+ * @write_pcl: Write list provisioned by the client
+ * @sctxt: Send WR resources
* @xdr: xdr_buf containing an RPC Reply message
*
* Returns zero on success, or a negative errno if one or more
* Write chunks could not be sent.
*/
-int svc_rdma_send_write_list(struct svcxprt_rdma *rdma,
- const struct svc_rdma_recv_ctxt *rctxt,
- const struct xdr_buf *xdr)
+int svc_rdma_prepare_write_list(struct svcxprt_rdma *rdma,
+ const struct svc_rdma_pcl *write_pcl,
+ struct svc_rdma_send_ctxt *sctxt,
+ const struct xdr_buf *xdr)
{
struct svc_rdma_chunk *chunk;
int ret;
- pcl_for_each_chunk(chunk, &rctxt->rc_write_pcl) {
+ pcl_for_each_chunk(chunk, write_pcl) {
if (!chunk->ch_payload_length)
break;
- ret = svc_rdma_send_write_chunk(rdma, chunk, xdr);
+ ret = svc_rdma_prepare_write_chunk(rdma, sctxt, chunk, xdr);
if (ret < 0)
return ret;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index bb5436b719e0..dfca39abd16c 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -142,6 +142,7 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
ctxt->sc_send_wr.sg_list = ctxt->sc_sges;
ctxt->sc_send_wr.send_flags = IB_SEND_SIGNALED;
ctxt->sc_cqe.done = svc_rdma_wc_send;
+ INIT_LIST_HEAD(&ctxt->sc_write_info_list);
ctxt->sc_xprt_buf = buffer;
xdr_buf_init(&ctxt->sc_hdrbuf, ctxt->sc_xprt_buf,
rdma->sc_max_req_size);
@@ -227,6 +228,7 @@ static void svc_rdma_send_ctxt_release(struct svcxprt_rdma *rdma,
struct ib_device *device = rdma->sc_cm_id->device;
unsigned int i;
+ svc_rdma_write_chunk_release(rdma, ctxt);
svc_rdma_reply_chunk_release(rdma, ctxt);
if (ctxt->sc_page_count)
@@ -1013,7 +1015,8 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
if (!p)
goto put_ctxt;
- ret = svc_rdma_send_write_list(rdma, rctxt, &rqstp->rq_res);
+ ret = svc_rdma_prepare_write_list(rdma, &rctxt->rc_write_pcl, sctxt,
+ &rqstp->rq_res);
if (ret < 0)
goto put_ctxt;