summaryrefslogtreecommitdiff
path: root/net/sunrpc/xprtrdma/svc_rdma_rw.c
diff options
context:
space:
mode:
authorChuck Lever <chuck.lever@oracle.com>2018-05-07 15:27:43 -0400
committerJ. Bruce Fields <bfields@redhat.com>2018-05-11 15:48:57 -0400
commit3316f0631139c87631f2652c118da1a0354bd40d (patch)
tree553cb2cec530a38f17af633cb2f91e5a47f9bbfb /net/sunrpc/xprtrdma/svc_rdma_rw.c
parent3a88092ee319b88cf30a2dc89b9edf2ef5518750 (diff)
svcrdma: Persistently allocate and DMA-map Receive buffers
The current Receive path uses an array of pages which are allocated and DMA mapped when each Receive WR is posted, and then handed off to the upper layer in rqstp::rq_arg. The page flip releases unused pages in the rq_pages pagelist. This mechanism introduces a significant amount of overhead. So instead, kmalloc the Receive buffer, and leave it DMA-mapped while the transport remains connected. This confers a number of benefits: * Each Receive WR requires only one receive SGE, no matter how large the inline threshold is. This helps the server-side NFS/RDMA transport operate on less capable RDMA devices. * The Receive buffer is left allocated and mapped all the time. This relieves svc_rdma_post_recv from the overhead of allocating and DMA-mapping a fresh buffer. * svc_rdma_wc_receive no longer has to DMA unmap the Receive buffer. It has to DMA sync only the number of bytes that were received. * svc_rdma_build_arg_xdr no longer has to free a page in rq_pages for each page in the Receive buffer, making it a constant-time function. * The Receive buffer is now plugged directly into the rq_arg's head[0].iov_vec, and can be larger than a page without spilling over into rq_arg's page list. This enables simplification of the RDMA Read path in subsequent patches. Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Diffstat (limited to 'net/sunrpc/xprtrdma/svc_rdma_rw.c')
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_rw.c32
1 files changed, 11 insertions, 21 deletions
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 8242aa318ac1..ce3ea8419704 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -718,15 +718,14 @@ static int svc_rdma_build_normal_read_chunk(struct svc_rqst *rqstp,
struct svc_rdma_recv_ctxt *head = info->ri_readctxt;
int ret;
- info->ri_pageno = head->rc_hdr_count;
- info->ri_pageoff = 0;
-
ret = svc_rdma_build_read_chunk(rqstp, info, p);
if (ret < 0)
goto out;
trace_svcrdma_encode_read(info->ri_chunklen, info->ri_position);
+ head->rc_hdr_count = 0;
+
/* Split the Receive buffer between the head and tail
* buffers at Read chunk's position. XDR roundup of the
* chunk is not included in either the pagelist or in
@@ -775,9 +774,6 @@ static int svc_rdma_build_pz_read_chunk(struct svc_rqst *rqstp,
struct svc_rdma_recv_ctxt *head = info->ri_readctxt;
int ret;
- info->ri_pageno = head->rc_hdr_count - 1;
- info->ri_pageoff = offset_in_page(head->rc_byte_len);
-
ret = svc_rdma_build_read_chunk(rqstp, info, p);
if (ret < 0)
goto out;
@@ -787,20 +783,13 @@ static int svc_rdma_build_pz_read_chunk(struct svc_rqst *rqstp,
head->rc_arg.len += info->ri_chunklen;
head->rc_arg.buflen += info->ri_chunklen;
- if (head->rc_arg.buflen <= head->rc_sges[0].length) {
- /* Transport header and RPC message fit entirely
- * in page where head iovec resides.
- */
- head->rc_arg.head[0].iov_len = info->ri_chunklen;
- } else {
- /* Transport header and part of RPC message reside
- * in the head iovec's page.
- */
- head->rc_arg.head[0].iov_len =
- head->rc_sges[0].length - head->rc_byte_len;
- head->rc_arg.page_len =
- info->ri_chunklen - head->rc_arg.head[0].iov_len;
- }
+ head->rc_hdr_count = 1;
+ head->rc_arg.head[0].iov_base = page_address(head->rc_pages[0]);
+ head->rc_arg.head[0].iov_len = min_t(size_t, PAGE_SIZE,
+ info->ri_chunklen);
+
+ head->rc_arg.page_len = info->ri_chunklen -
+ head->rc_arg.head[0].iov_len;
out:
return ret;
@@ -834,7 +823,6 @@ int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma, struct svc_rqst *rqstp,
* head->rc_arg. Pages involved with RDMA Read I/O are
* transferred there.
*/
- head->rc_page_count = head->rc_hdr_count;
head->rc_arg.head[0] = rqstp->rq_arg.head[0];
head->rc_arg.tail[0] = rqstp->rq_arg.tail[0];
head->rc_arg.pages = head->rc_pages;
@@ -847,6 +835,8 @@ int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma, struct svc_rqst *rqstp,
if (!info)
return -ENOMEM;
info->ri_readctxt = head;
+ info->ri_pageno = 0;
+ info->ri_pageoff = 0;
info->ri_position = be32_to_cpup(p + 1);
if (info->ri_position)