diff options
Diffstat (limited to 'net/sunrpc/xprtrdma')
-rw-r--r-- | net/sunrpc/xprtrdma/rpc_rdma.c | 36 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 17 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 244 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_rw.c | 57 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_sendto.c | 512 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_transport.c | 8 |
6 files changed, 496 insertions, 378 deletions
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 28020ec104d4..577513b7642e 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -275,32 +275,6 @@ out: return n; } -static inline int -encode_item_present(struct xdr_stream *xdr) -{ - __be32 *p; - - p = xdr_reserve_space(xdr, sizeof(*p)); - if (unlikely(!p)) - return -EMSGSIZE; - - *p = xdr_one; - return 0; -} - -static inline int -encode_item_not_present(struct xdr_stream *xdr) -{ - __be32 *p; - - p = xdr_reserve_space(xdr, sizeof(*p)); - if (unlikely(!p)) - return -EMSGSIZE; - - *p = xdr_zero; - return 0; -} - static void xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mr *mr) { @@ -414,7 +388,7 @@ static int rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, } while (nsegs); done: - return encode_item_not_present(xdr); + return xdr_stream_encode_item_absent(xdr); } /* Register and XDR encode the Write list. Supports encoding a list @@ -453,7 +427,7 @@ static int rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, if (nsegs < 0) return nsegs; - if (encode_item_present(xdr) < 0) + if (xdr_stream_encode_item_present(xdr) < 0) return -EMSGSIZE; segcount = xdr_reserve_space(xdr, sizeof(*segcount)); if (unlikely(!segcount)) @@ -480,7 +454,7 @@ static int rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, *segcount = cpu_to_be32(nchunks); done: - return encode_item_not_present(xdr); + return xdr_stream_encode_item_absent(xdr); } /* Register and XDR encode the Reply chunk. Supports encoding an array @@ -507,14 +481,14 @@ static int rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, __be32 *segcount; if (wtype != rpcrdma_replych) - return encode_item_not_present(xdr); + return xdr_stream_encode_item_absent(xdr); seg = req->rl_segments; nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg); if (nsegs < 0) return nsegs; - if (encode_item_present(xdr) < 0) + if (xdr_stream_encode_item_present(xdr) < 0) return -EMSGSIZE; segcount = xdr_reserve_space(xdr, sizeof(*segcount)); if (unlikely(!segcount)) diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index 908e78bb87c6..d510a3a15d4b 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -117,7 +117,7 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma, { int ret; - ret = svc_rdma_map_reply_msg(rdma, ctxt, &rqst->rq_snd_buf, NULL); + ret = svc_rdma_map_reply_msg(rdma, ctxt, NULL, &rqst->rq_snd_buf); if (ret < 0) return -EIO; @@ -181,7 +181,9 @@ rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst) if (!ctxt) goto drop_connection; - p = ctxt->sc_xprt_buf; + p = xdr_reserve_space(&ctxt->sc_stream, RPCRDMA_HDRLEN_MIN); + if (!p) + goto put_ctxt; *p++ = rqst->rq_xid; *p++ = rpcrdma_version; *p++ = cpu_to_be32(r_xprt->rx_buf.rb_bc_max_requests); @@ -189,7 +191,6 @@ rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst) *p++ = xdr_zero; *p++ = xdr_zero; *p = xdr_zero; - svc_rdma_sync_reply_hdr(rdma, ctxt, RPCRDMA_HDRLEN_MIN); #ifdef SVCRDMA_BACKCHANNEL_DEBUG pr_info("%s: %*ph\n", __func__, 64, rqst->rq_buffer); @@ -197,12 +198,13 @@ rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst) rqst->rq_xtime = ktime_get(); rc = svc_rdma_bc_sendto(rdma, rqst, ctxt); - if (rc) { - svc_rdma_send_ctxt_put(rdma, ctxt); - goto drop_connection; - } + if (rc) + goto put_ctxt; return 0; +put_ctxt: + svc_rdma_send_ctxt_put(rdma, ctxt); + drop_connection: dprintk("svcrdma: failed to send bc call\n"); return -ENOTCONN; @@ -250,6 +252,7 @@ xprt_rdma_bc_put(struct rpc_xprt *xprt) { dprintk("svcrdma: %s: xprt %p\n", __func__, xprt); + xprt_rdma_free_addresses(xprt); xprt_free(xprt); } diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 96bccd398469..54469b72b25f 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -193,6 +193,7 @@ svc_rdma_recv_ctxt_get(struct svcxprt_rdma *rdma) out: ctxt->rc_page_count = 0; + ctxt->rc_read_payload_length = 0; return ctxt; out_empty: @@ -357,15 +358,14 @@ static void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp, arg->len = ctxt->rc_byte_len; } -/* This accommodates the largest possible Write chunk, - * in one segment. +/* This accommodates the largest possible Write chunk. */ -#define MAX_BYTES_WRITE_SEG ((u32)(RPCSVC_MAXPAGES << PAGE_SHIFT)) +#define MAX_BYTES_WRITE_CHUNK ((u32)(RPCSVC_MAXPAGES << PAGE_SHIFT)) /* This accommodates the largest possible Position-Zero - * Read chunk or Reply chunk, in one segment. + * Read chunk or Reply chunk. */ -#define MAX_BYTES_SPECIAL_SEG ((u32)((RPCSVC_MAXPAGES + 2) << PAGE_SHIFT)) +#define MAX_BYTES_SPECIAL_CHUNK ((u32)((RPCSVC_MAXPAGES + 2) << PAGE_SHIFT)) /* Sanity check the Read list. * @@ -373,7 +373,7 @@ static void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp, * - This implementation supports only one Read chunk. * * Sanity checks: - * - Read list does not overflow buffer. + * - Read list does not overflow Receive buffer. * - Segment size limited by largest NFS data payload. * * The segment count is limited to how many segments can @@ -381,30 +381,44 @@ static void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp, * buffer. That's about 40 Read segments for a 1KB inline * threshold. * - * Returns pointer to the following Write list. + * Return values: + * %true: Read list is valid. @rctxt's xdr_stream is updated + * to point to the first byte past the Read list. + * %false: Read list is corrupt. @rctxt's xdr_stream is left + * in an unknown state. */ -static __be32 *xdr_check_read_list(__be32 *p, const __be32 *end) +static bool xdr_check_read_list(struct svc_rdma_recv_ctxt *rctxt) { - u32 position; + u32 position, len; bool first; + __be32 *p; + + p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p)); + if (!p) + return false; + len = 0; first = true; - while (*p++ != xdr_zero) { + while (*p != xdr_zero) { + p = xdr_inline_decode(&rctxt->rc_stream, + rpcrdma_readseg_maxsz * sizeof(*p)); + if (!p) + return false; + if (first) { - position = be32_to_cpup(p++); + position = be32_to_cpup(p); first = false; - } else if (be32_to_cpup(p++) != position) { - return NULL; + } else if (be32_to_cpup(p) != position) { + return false; } - p++; /* handle */ - if (be32_to_cpup(p++) > MAX_BYTES_SPECIAL_SEG) - return NULL; - p += 2; /* offset */ + p += 2; + len += be32_to_cpup(p); - if (p > end) - return NULL; + p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p)); + if (!p) + return false; } - return p; + return len <= MAX_BYTES_SPECIAL_CHUNK; } /* The segment count is limited to how many segments can @@ -412,67 +426,100 @@ static __be32 *xdr_check_read_list(__be32 *p, const __be32 *end) * buffer. That's about 60 Write segments for a 1KB inline * threshold. */ -static __be32 *xdr_check_write_chunk(__be32 *p, const __be32 *end, - u32 maxlen) +static bool xdr_check_write_chunk(struct svc_rdma_recv_ctxt *rctxt, u32 maxlen) { - u32 i, segcount; + u32 i, segcount, total; + __be32 *p; + + p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p)); + if (!p) + return false; + segcount = be32_to_cpup(p); - segcount = be32_to_cpup(p++); + total = 0; for (i = 0; i < segcount; i++) { - p++; /* handle */ - if (be32_to_cpup(p++) > maxlen) - return NULL; - p += 2; /* offset */ + u32 handle, length; + u64 offset; - if (p > end) - return NULL; - } + p = xdr_inline_decode(&rctxt->rc_stream, + rpcrdma_segment_maxsz * sizeof(*p)); + if (!p) + return false; + + handle = be32_to_cpup(p++); + length = be32_to_cpup(p++); + xdr_decode_hyper(p, &offset); + trace_svcrdma_decode_wseg(handle, length, offset); - return p; + total += length; + } + return total <= maxlen; } /* Sanity check the Write list. * * Implementation limits: - * - This implementation supports only one Write chunk. + * - This implementation currently supports only one Write chunk. * * Sanity checks: - * - Write list does not overflow buffer. - * - Segment size limited by largest NFS data payload. - * - * Returns pointer to the following Reply chunk. + * - Write list does not overflow Receive buffer. + * - Chunk size limited by largest NFS data payload. + * + * Return values: + * %true: Write list is valid. @rctxt's xdr_stream is updated + * to point to the first byte past the Write list. + * %false: Write list is corrupt. @rctxt's xdr_stream is left + * in an unknown state. */ -static __be32 *xdr_check_write_list(__be32 *p, const __be32 *end) +static bool xdr_check_write_list(struct svc_rdma_recv_ctxt *rctxt) { - u32 chcount; + u32 chcount = 0; + __be32 *p; - chcount = 0; - while (*p++ != xdr_zero) { - p = xdr_check_write_chunk(p, end, MAX_BYTES_WRITE_SEG); + p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p)); + if (!p) + return false; + rctxt->rc_write_list = p; + while (*p != xdr_zero) { + if (!xdr_check_write_chunk(rctxt, MAX_BYTES_WRITE_CHUNK)) + return false; + ++chcount; + p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p)); if (!p) - return NULL; - if (chcount++ > 1) - return NULL; + return false; } - return p; + if (!chcount) + rctxt->rc_write_list = NULL; + return chcount < 2; } /* Sanity check the Reply chunk. * * Sanity checks: - * - Reply chunk does not overflow buffer. - * - Segment size limited by largest NFS data payload. - * - * Returns pointer to the following RPC header. + * - Reply chunk does not overflow Receive buffer. + * - Chunk size limited by largest NFS data payload. + * + * Return values: + * %true: Reply chunk is valid. @rctxt's xdr_stream is updated + * to point to the first byte past the Reply chunk. + * %false: Reply chunk is corrupt. @rctxt's xdr_stream is left + * in an unknown state. */ -static __be32 *xdr_check_reply_chunk(__be32 *p, const __be32 *end) +static bool xdr_check_reply_chunk(struct svc_rdma_recv_ctxt *rctxt) { - if (*p++ != xdr_zero) { - p = xdr_check_write_chunk(p, end, MAX_BYTES_SPECIAL_SEG); - if (!p) - return NULL; + __be32 *p; + + p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p)); + if (!p) + return false; + rctxt->rc_reply_chunk = p; + if (*p != xdr_zero) { + if (!xdr_check_write_chunk(rctxt, MAX_BYTES_SPECIAL_CHUNK)) + return false; + } else { + rctxt->rc_reply_chunk = NULL; } - return p; + return true; } /* RPC-over-RDMA Version One private extension: Remote Invalidation. @@ -537,60 +584,61 @@ static void svc_rdma_get_inv_rkey(struct svcxprt_rdma *rdma, ctxt->rc_inv_rkey = be32_to_cpu(inv_rkey); } -/* On entry, xdr->head[0].iov_base points to first byte in the - * RPC-over-RDMA header. +/** + * svc_rdma_xdr_decode_req - Decode the transport header + * @rq_arg: xdr_buf containing ingress RPC/RDMA message + * @rctxt: state of decoding + * + * On entry, xdr->head[0].iov_base points to first byte of the + * RPC-over-RDMA transport header. * * On successful exit, head[0] points to first byte past the * RPC-over-RDMA header. For RDMA_MSG, this is the RPC message. + * * The length of the RPC-over-RDMA header is returned. * * Assumptions: * - The transport header is entirely contained in the head iovec. */ -static int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg) +static int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg, + struct svc_rdma_recv_ctxt *rctxt) { - __be32 *p, *end, *rdma_argp; + __be32 *p, *rdma_argp; unsigned int hdr_len; - /* Verify that there's enough bytes for header + something */ - if (rq_arg->len <= RPCRDMA_HDRLEN_ERR) - goto out_short; - rdma_argp = rq_arg->head[0].iov_base; - if (*(rdma_argp + 1) != rpcrdma_version) - goto out_version; + xdr_init_decode(&rctxt->rc_stream, rq_arg, rdma_argp, NULL); - switch (*(rdma_argp + 3)) { + p = xdr_inline_decode(&rctxt->rc_stream, + rpcrdma_fixed_maxsz * sizeof(*p)); + if (unlikely(!p)) + goto out_short; + p++; + if (*p != rpcrdma_version) + goto out_version; + p += 2; + switch (*p) { case rdma_msg: break; case rdma_nomsg: break; - case rdma_done: goto out_drop; - case rdma_error: goto out_drop; - default: goto out_proc; } - end = (__be32 *)((unsigned long)rdma_argp + rq_arg->len); - p = xdr_check_read_list(rdma_argp + 4, end); - if (!p) - goto out_inval; - p = xdr_check_write_list(p, end); - if (!p) + if (!xdr_check_read_list(rctxt)) goto out_inval; - p = xdr_check_reply_chunk(p, end); - if (!p) + if (!xdr_check_write_list(rctxt)) goto out_inval; - if (p > end) + if (!xdr_check_reply_chunk(rctxt)) goto out_inval; - rq_arg->head[0].iov_base = p; - hdr_len = (unsigned long)p - (unsigned long)rdma_argp; + rq_arg->head[0].iov_base = rctxt->rc_stream.p; + hdr_len = xdr_stream_pos(&rctxt->rc_stream); rq_arg->head[0].iov_len -= hdr_len; rq_arg->len -= hdr_len; trace_svcrdma_decode_rqst(rdma_argp, hdr_len); @@ -650,7 +698,6 @@ static void svc_rdma_send_error(struct svcxprt_rdma *xprt, __be32 *rdma_argp, int status) { struct svc_rdma_send_ctxt *ctxt; - unsigned int length; __be32 *p; int ret; @@ -658,29 +705,46 @@ static void svc_rdma_send_error(struct svcxprt_rdma *xprt, if (!ctxt) return; - p = ctxt->sc_xprt_buf; + p = xdr_reserve_space(&ctxt->sc_stream, + rpcrdma_fixed_maxsz * sizeof(*p)); + if (!p) + goto put_ctxt; + *p++ = *rdma_argp; *p++ = *(rdma_argp + 1); *p++ = xprt->sc_fc_credits; - *p++ = rdma_error; + *p = rdma_error; + switch (status) { case -EPROTONOSUPPORT: + p = xdr_reserve_space(&ctxt->sc_stream, 3 * sizeof(*p)); + if (!p) + goto put_ctxt; + *p++ = err_vers; *p++ = rpcrdma_version; - *p++ = rpcrdma_version; + *p = rpcrdma_version; trace_svcrdma_err_vers(*rdma_argp); break; default: - *p++ = err_chunk; + p = xdr_reserve_space(&ctxt->sc_stream, sizeof(*p)); + if (!p) + goto put_ctxt; + + *p = err_chunk; trace_svcrdma_err_chunk(*rdma_argp); } - length = (unsigned long)p - (unsigned long)ctxt->sc_xprt_buf; - svc_rdma_sync_reply_hdr(xprt, ctxt, length); + ctxt->sc_send_wr.num_sge = 1; ctxt->sc_send_wr.opcode = IB_WR_SEND; + ctxt->sc_sges[0].length = ctxt->sc_hdrbuf.len; ret = svc_rdma_send(xprt, &ctxt->sc_send_wr); if (ret) - svc_rdma_send_ctxt_put(xprt, ctxt); + goto put_ctxt; + return; + +put_ctxt: + svc_rdma_send_ctxt_put(xprt, ctxt); } /* By convention, backchannel calls arrive via rdma_msg type @@ -785,7 +849,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) rqstp->rq_next_page = rqstp->rq_respages; p = (__be32 *)rqstp->rq_arg.head[0].iov_base; - ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg); + ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg, ctxt); if (ret < 0) goto out_err; if (ret == 0) diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index 48fe3b16b0d9..bd7c195d872e 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -41,7 +41,7 @@ struct svc_rdma_rw_ctxt { struct rdma_rw_ctx rw_ctx; int rw_nents; struct sg_table rw_sg_table; - struct scatterlist rw_first_sgl[0]; + struct scatterlist rw_first_sgl[]; }; static inline struct svc_rdma_rw_ctxt * @@ -439,7 +439,8 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info, if (ret < 0) goto out_initerr; - trace_svcrdma_encode_wseg(seg_handle, write_len, seg_offset); + trace_svcrdma_send_wseg(seg_handle, write_len, seg_offset); + list_add(&ctxt->rw_list, &cc->cc_rwctxts); cc->cc_sqecount += ret; if (write_len == seg_length - info->wi_seg_off) { @@ -482,18 +483,19 @@ static int svc_rdma_send_xdr_kvec(struct svc_rdma_write_info *info, vec->iov_len); } -/* Send an xdr_buf's page list by itself. A Write chunk is - * just the page list. a Reply chunk is the head, page list, - * and tail. This function is shared between the two types - * of chunk. +/* Send an xdr_buf's page list by itself. A Write chunk is just + * the page list. A Reply chunk is @xdr's head, page list, and + * tail. This function is shared between the two types of chunk. */ static int svc_rdma_send_xdr_pagelist(struct svc_rdma_write_info *info, - struct xdr_buf *xdr) + struct xdr_buf *xdr, + unsigned int offset, + unsigned long length) { info->wi_xdr = xdr; - info->wi_next_off = 0; + info->wi_next_off = offset - xdr->head[0].iov_len; return svc_rdma_build_writes(info, svc_rdma_pagelist_to_sg, - xdr->page_len); + length); } /** @@ -501,6 +503,8 @@ static int svc_rdma_send_xdr_pagelist(struct svc_rdma_write_info *info, * @rdma: controlling RDMA transport * @wr_ch: Write chunk provided by client * @xdr: xdr_buf containing the data payload + * @offset: payload's byte offset in @xdr + * @length: size of payload, in bytes * * Returns a non-negative number of bytes the chunk consumed, or * %-E2BIG if the payload was larger than the Write chunk, @@ -510,19 +514,20 @@ static int svc_rdma_send_xdr_pagelist(struct svc_rdma_write_info *info, * %-EIO if rdma_rw initialization failed (DMA mapping, etc). */ int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, __be32 *wr_ch, - struct xdr_buf *xdr) + struct xdr_buf *xdr, + unsigned int offset, unsigned long length) { struct svc_rdma_write_info *info; int ret; - if (!xdr->page_len) + if (!length) return 0; info = svc_rdma_write_info_alloc(rdma, wr_ch); if (!info) return -ENOMEM; - ret = svc_rdma_send_xdr_pagelist(info, xdr); + ret = svc_rdma_send_xdr_pagelist(info, xdr, offset, length); if (ret < 0) goto out_err; @@ -530,8 +535,8 @@ int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, __be32 *wr_ch, if (ret < 0) goto out_err; - trace_svcrdma_encode_write(xdr->page_len); - return xdr->page_len; + trace_svcrdma_send_write_chunk(xdr->page_len); + return length; out_err: svc_rdma_write_info_free(info); @@ -541,8 +546,7 @@ out_err: /** * svc_rdma_send_reply_chunk - Write all segments in the Reply chunk * @rdma: controlling RDMA transport - * @rp_ch: Reply chunk provided by client - * @writelist: true if client provided a Write list + * @rctxt: Write and Reply chunks from client * @xdr: xdr_buf containing an RPC Reply * * Returns a non-negative number of bytes the chunk consumed, or @@ -552,13 +556,14 @@ out_err: * %-ENOTCONN if posting failed (connection is lost), * %-EIO if rdma_rw initialization failed (DMA mapping, etc). */ -int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, __be32 *rp_ch, - bool writelist, struct xdr_buf *xdr) +int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, + const struct svc_rdma_recv_ctxt *rctxt, + struct xdr_buf *xdr) { struct svc_rdma_write_info *info; int consumed, ret; - info = svc_rdma_write_info_alloc(rdma, rp_ch); + info = svc_rdma_write_info_alloc(rdma, rctxt->rc_reply_chunk); if (!info) return -ENOMEM; @@ -570,8 +575,10 @@ int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, __be32 *rp_ch, /* Send the page list in the Reply chunk only if the * client did not provide Write chunks. */ - if (!writelist && xdr->page_len) { - ret = svc_rdma_send_xdr_pagelist(info, xdr); + if (!rctxt->rc_write_list && xdr->page_len) { + ret = svc_rdma_send_xdr_pagelist(info, xdr, + xdr->head[0].iov_len, + xdr->page_len); if (ret < 0) goto out_err; consumed += xdr->page_len; @@ -588,7 +595,7 @@ int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, __be32 *rp_ch, if (ret < 0) goto out_err; - trace_svcrdma_encode_reply(consumed); + trace_svcrdma_send_reply_chunk(consumed); return consumed; out_err: @@ -691,7 +698,7 @@ static int svc_rdma_build_read_chunk(struct svc_rqst *rqstp, if (ret < 0) break; - trace_svcrdma_encode_rseg(rs_handle, rs_length, rs_offset); + trace_svcrdma_send_rseg(rs_handle, rs_length, rs_offset); info->ri_chunklen += rs_length; } @@ -722,7 +729,7 @@ static int svc_rdma_build_normal_read_chunk(struct svc_rqst *rqstp, if (ret < 0) goto out; - trace_svcrdma_encode_read(info->ri_chunklen, info->ri_position); + trace_svcrdma_send_read_chunk(info->ri_chunklen, info->ri_position); head->rc_hdr_count = 0; @@ -778,7 +785,7 @@ static int svc_rdma_build_pz_read_chunk(struct svc_rqst *rqstp, if (ret < 0) goto out; - trace_svcrdma_encode_pzr(info->ri_chunklen); + trace_svcrdma_send_pzr(info->ri_chunklen); head->rc_arg.len += info->ri_chunklen; head->rc_arg.buflen += info->ri_chunklen; diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index f3f108090aa4..90cba3058f04 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -151,6 +151,8 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma) ctxt->sc_send_wr.send_flags = IB_SEND_SIGNALED; ctxt->sc_cqe.done = svc_rdma_wc_send; ctxt->sc_xprt_buf = buffer; + xdr_buf_init(&ctxt->sc_hdrbuf, ctxt->sc_xprt_buf, + rdma->sc_max_req_size); ctxt->sc_sges[0].addr = addr; for (i = 0; i < rdma->sc_max_send_sges; i++) @@ -204,6 +206,10 @@ struct svc_rdma_send_ctxt *svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma) spin_unlock(&rdma->sc_send_lock); out: + rpcrdma_set_xdrlen(&ctxt->sc_hdrbuf, 0); + xdr_init_encode(&ctxt->sc_stream, &ctxt->sc_hdrbuf, + ctxt->sc_xprt_buf, NULL); + ctxt->sc_send_wr.num_sge = 0; ctxt->sc_cur_sge_no = 0; ctxt->sc_page_count = 0; @@ -295,6 +301,12 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct ib_send_wr *wr) might_sleep(); + /* Sync the transport header buffer */ + ib_dma_sync_single_for_device(rdma->sc_pd->device, + wr->sg_list[0].addr, + wr->sg_list[0].length, + DMA_TO_DEVICE); + /* If the SQ is full, wait until an SQ entry is available */ while (1) { if ((atomic_dec_return(&rdma->sc_sq_avail) < 0)) { @@ -322,166 +334,173 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct ib_send_wr *wr) return ret; } -static u32 xdr_padsize(u32 len) +/** + * svc_rdma_encode_read_list - Encode RPC Reply's Read chunk list + * @sctxt: Send context for the RPC Reply + * + * Return values: + * On success, returns length in bytes of the Reply XDR buffer + * that was consumed by the Reply Read list + * %-EMSGSIZE on XDR buffer overflow + */ +static ssize_t svc_rdma_encode_read_list(struct svc_rdma_send_ctxt *sctxt) { - return (len & 3) ? (4 - (len & 3)) : 0; + /* RPC-over-RDMA version 1 replies never have a Read list. */ + return xdr_stream_encode_item_absent(&sctxt->sc_stream); } -/* Returns length of transport header, in bytes. +/** + * svc_rdma_encode_write_segment - Encode one Write segment + * @src: matching Write chunk in the RPC Call header + * @sctxt: Send context for the RPC Reply + * @remaining: remaining bytes of the payload left in the Write chunk + * + * Return values: + * On success, returns length in bytes of the Reply XDR buffer + * that was consumed by the Write segment + * %-EMSGSIZE on XDR buffer overflow */ -static unsigned int svc_rdma_reply_hdr_len(__be32 *rdma_resp) +static ssize_t svc_rdma_encode_write_segment(__be32 *src, + struct svc_rdma_send_ctxt *sctxt, + unsigned int *remaining) { - unsigned int nsegs; __be32 *p; - - p = rdma_resp; - - /* RPC-over-RDMA V1 replies never have a Read list. */ - p += rpcrdma_fixed_maxsz + 1; - - /* Skip Write list. */ - while (*p++ != xdr_zero) { - nsegs = be32_to_cpup(p++); - p += nsegs * rpcrdma_segment_maxsz; - } - - /* Skip Reply chunk. */ - if (*p++ != xdr_zero) { - nsegs = be32_to_cpup(p++); - p += nsegs * rpcrdma_segment_maxsz; + const size_t len = rpcrdma_segment_maxsz * sizeof(*p); + u32 handle, length; + u64 offset; + + p = xdr_reserve_space(&sctxt->sc_stream, len); + if (!p) + return -EMSGSIZE; + + handle = be32_to_cpup(src++); + length = be32_to_cpup(src++); + xdr_decode_hyper(src, &offset); + + *p++ = cpu_to_be32(handle); + if (*remaining < length) { + /* segment only partly filled */ + length = *remaining; + *remaining = 0; + } else { + /* entire segment was consumed */ + *remaining -= length; } + *p++ = cpu_to_be32(length); + xdr_encode_hyper(p, offset); - return (unsigned long)p - (unsigned long)rdma_resp; + trace_svcrdma_encode_wseg(handle, length, offset); + return len; } -/* One Write chunk is copied from Call transport header to Reply - * transport header. Each segment's length field is updated to - * reflect number of bytes consumed in the segment. - * - * Returns number of segments in this chunk. +/** + * svc_rdma_encode_write_chunk - Encode one Write chunk + * @src: matching Write chunk in the RPC Call header + * @sctxt: Send context for the RPC Reply + * @remaining: size in bytes of the payload in the Write chunk + * + * Copy a Write chunk from the Call transport header to the + * Reply transport header. Update each segment's length field + * to reflect the number of bytes written in that segment. + * + * Return values: + * On success, returns length in bytes of the Reply XDR buffer + * that was consumed by the Write chunk + * %-EMSGSIZE on XDR buffer overflow */ -static unsigned int xdr_encode_write_chunk(__be32 *dst, __be32 *src, +static ssize_t svc_rdma_encode_write_chunk(__be32 *src, + struct svc_rdma_send_ctxt *sctxt, unsigned int remaining) { unsigned int i, nsegs; - u32 seg_len; + ssize_t len, ret; - /* Write list discriminator */ - *dst++ = *src++; + len = 0; + trace_svcrdma_encode_write_chunk(remaining); - /* number of segments in this chunk */ - nsegs = be32_to_cpup(src); - *dst++ = *src++; + src++; + ret = xdr_stream_encode_item_present(&sctxt->sc_stream); + if (ret < 0) + return -EMSGSIZE; + len += ret; - for (i = nsegs; i; i--) { - /* segment's RDMA handle */ - *dst++ = *src++; - - /* bytes returned in this segment */ - seg_len = be32_to_cpu(*src); - if (remaining >= seg_len) { - /* entire segment was consumed */ - *dst = *src; - remaining -= seg_len; - } else { - /* segment only partly filled */ - *dst = cpu_to_be32(remaining); - remaining = 0; - } - dst++; src++; + nsegs = be32_to_cpup(src++); + ret = xdr_stream_encode_u32(&sctxt->sc_stream, nsegs); + if (ret < 0) + return -EMSGSIZE; + len += ret; - /* segment's RDMA offset */ - *dst++ = *src++; - *dst++ = *src++; + for (i = nsegs; i; i--) { + ret = svc_rdma_encode_write_segment(src, sctxt, &remaining); + if (ret < 0) + return -EMSGSIZE; + src += rpcrdma_segment_maxsz; + len += ret; } - return nsegs; + return len; } -/* The client provided a Write list in the Call message. Fill in - * the segments in the first Write chunk in the Reply's transport +/** + * svc_rdma_encode_write_list - Encode RPC Reply's Write chunk list + * @rctxt: Reply context with information about the RPC Call + * @sctxt: Send context for the RPC Reply + * @length: size in bytes of the payload in the first Write chunk + * + * The client provides a Write chunk list in the Call message. Fill + * in the segments in the first Write chunk in the Reply's transport * header with the number of bytes consumed in each segment. * Remaining chunks are returned unused. * * Assumptions: * - Client has provided only one Write chunk - */ -static void svc_rdma_xdr_encode_write_list(__be32 *rdma_resp, __be32 *wr_ch, - unsigned int consumed) -{ - unsigned int nsegs; - __be32 *p, *q; - - /* RPC-over-RDMA V1 replies never have a Read list. */ - p = rdma_resp + rpcrdma_fixed_maxsz + 1; - - q = wr_ch; - while (*q != xdr_zero) { - nsegs = xdr_encode_write_chunk(p, q, consumed); - q += 2 + nsegs * rpcrdma_segment_maxsz; - p += 2 + nsegs * rpcrdma_segment_maxsz; - consumed = 0; - } - - /* Terminate Write list */ - *p++ = xdr_zero; - - /* Reply chunk discriminator; may be replaced later */ - *p = xdr_zero; -} - -/* The client provided a Reply chunk in the Call message. Fill in - * the segments in the Reply chunk in the Reply message with the - * number of bytes consumed in each segment. * - * Assumptions: - * - Reply can always fit in the provided Reply chunk + * Return values: + * On success, returns length in bytes of the Reply XDR buffer + * that was consumed by the Reply's Write list + * %-EMSGSIZE on XDR buffer overflow */ -static void svc_rdma_xdr_encode_reply_chunk(__be32 *rdma_resp, __be32 *rp_ch, - unsigned int consumed) +static ssize_t +svc_rdma_encode_write_list(const struct svc_rdma_recv_ctxt *rctxt, + struct svc_rdma_send_ctxt *sctxt, + unsigned int length) { - __be32 *p; + ssize_t len, ret; - /* Find the Reply chunk in the Reply's xprt header. - * RPC-over-RDMA V1 replies never have a Read list. - */ - p = rdma_resp + rpcrdma_fixed_maxsz + 1; + ret = svc_rdma_encode_write_chunk(rctxt->rc_write_list, sctxt, length); + if (ret < 0) + return ret; + len = ret; - /* Skip past Write list */ - while (*p++ != xdr_zero) - p += 1 + be32_to_cpup(p) * rpcrdma_segment_maxsz; + /* Terminate the Write list */ + ret = xdr_stream_encode_item_absent(&sctxt->sc_stream); + if (ret < 0) + return ret; - xdr_encode_write_chunk(p, rp_ch, consumed); + return len + ret; } -/* Parse the RPC Call's transport header. +/** + * svc_rdma_encode_reply_chunk - Encode RPC Reply's Reply chunk + * @rctxt: Reply context with information about the RPC Call + * @sctxt: Send context for the RPC Reply + * @length: size in bytes of the payload in the Reply chunk + * + * Assumptions: + * - Reply can always fit in the client-provided Reply chunk + * + * Return values: + * On success, returns length in bytes of the Reply XDR buffer + * that was consumed by the Reply's Reply chunk + * %-EMSGSIZE on XDR buffer overflow */ -static void svc_rdma_get_write_arrays(__be32 *rdma_argp, - __be32 **write, __be32 **reply) +static ssize_t +svc_rdma_encode_reply_chunk(const struct svc_rdma_recv_ctxt *rctxt, + struct svc_rdma_send_ctxt *sctxt, + unsigned int length) { - __be32 *p; - - p = rdma_argp + rpcrdma_fixed_maxsz; - - /* Read list */ - while (*p++ != xdr_zero) - p += 5; - - /* Write list */ - if (*p != xdr_zero) { - *write = p; - while (*p++ != xdr_zero) - p += 1 + be32_to_cpu(*p) * 4; - } else { - *write = NULL; - p++; - } - - /* Reply chunk */ - if (*p != xdr_zero) - *reply = p; - else - *reply = NULL; + return svc_rdma_encode_write_chunk(rctxt->rc_reply_chunk, sctxt, + length); } static int svc_rdma_dma_map_page(struct svcxprt_rdma *rdma, @@ -520,38 +539,36 @@ static int svc_rdma_dma_map_buf(struct svcxprt_rdma *rdma, } /** - * svc_rdma_sync_reply_hdr - DMA sync the transport header buffer + * svc_rdma_pull_up_needed - Determine whether to use pull-up * @rdma: controlling transport - * @ctxt: send_ctxt for the Send WR - * @len: length of transport header + * @sctxt: send_ctxt for the Send WR + * @rctxt: Write and Reply chunks provided by client + * @xdr: xdr_buf containing RPC message to transmit * - */ -void svc_rdma_sync_reply_hdr(struct svcxprt_rdma *rdma, - struct svc_rdma_send_ctxt *ctxt, - unsigned int len) -{ - ctxt->sc_sges[0].length = len; - ctxt->sc_send_wr.num_sge++; - ib_dma_sync_single_for_device(rdma->sc_pd->device, - ctxt->sc_sges[0].addr, len, - DMA_TO_DEVICE); -} - -/* If the xdr_buf has more elements than the device can - * transmit in a single RDMA Send, then the reply will - * have to be copied into a bounce buffer. + * Returns: + * %true if pull-up must be used + * %false otherwise */ static bool svc_rdma_pull_up_needed(struct svcxprt_rdma *rdma, - struct xdr_buf *xdr, - __be32 *wr_lst) + struct svc_rdma_send_ctxt *sctxt, + const struct svc_rdma_recv_ctxt *rctxt, + struct xdr_buf *xdr) { int elements; + /* For small messages, copying bytes is cheaper than DMA mapping. + */ + if (sctxt->sc_hdrbuf.len + xdr->len < RPCRDMA_PULLUP_THRESH) + return true; + + /* Check whether the xdr_buf has more elements than can + * fit in a single RDMA Send. + */ /* xdr->head */ elements = 1; /* xdr->pages */ - if (!wr_lst) { + if (!rctxt || !rctxt->rc_write_list) { unsigned int remaining; unsigned long pageoff; @@ -573,29 +590,36 @@ static bool svc_rdma_pull_up_needed(struct svcxprt_rdma *rdma, return elements >= rdma->sc_max_send_sges; } -/* The device is not capable of sending the reply directly. - * Assemble the elements of @xdr into the transport header - * buffer. +/** + * svc_rdma_pull_up_reply_msg - Copy Reply into a single buffer + * @rdma: controlling transport + * @sctxt: send_ctxt for the Send WR; xprt hdr is already prepared + * @rctxt: Write and Reply chunks provided by client + * @xdr: prepared xdr_buf containing RPC message + * + * The device is not capable of sending the reply directly. + * Assemble the elements of @xdr into the transport header buffer. + * + * Returns zero on success, or a negative errno on failure. */ static int svc_rdma_pull_up_reply_msg(struct svcxprt_rdma *rdma, - struct svc_rdma_send_ctxt *ctxt, - struct xdr_buf *xdr, __be32 *wr_lst) + struct svc_rdma_send_ctxt *sctxt, + const struct svc_rdma_recv_ctxt *rctxt, + const struct xdr_buf *xdr) { unsigned char *dst, *tailbase; unsigned int taillen; - dst = ctxt->sc_xprt_buf; - dst += ctxt->sc_sges[0].length; - + dst = sctxt->sc_xprt_buf + sctxt->sc_hdrbuf.len; memcpy(dst, xdr->head[0].iov_base, xdr->head[0].iov_len); dst += xdr->head[0].iov_len; tailbase = xdr->tail[0].iov_base; taillen = xdr->tail[0].iov_len; - if (wr_lst) { + if (rctxt && rctxt->rc_write_list) { u32 xdrpad; - xdrpad = xdr_padsize(xdr->page_len); + xdrpad = xdr_pad_size(xdr->page_len); if (taillen && xdrpad) { tailbase += xdrpad; taillen -= xdrpad; @@ -621,29 +645,26 @@ static int svc_rdma_pull_up_reply_msg(struct svcxprt_rdma *rdma, if (taillen) memcpy(dst, tailbase, taillen); - ctxt->sc_sges[0].length += xdr->len; - ib_dma_sync_single_for_device(rdma->sc_pd->device, - ctxt->sc_sges[0].addr, - ctxt->sc_sges[0].length, - DMA_TO_DEVICE); - + sctxt->sc_sges[0].length += xdr->len; + trace_svcrdma_send_pullup(sctxt->sc_sges[0].length); return 0; } -/* svc_rdma_map_reply_msg - Map the buffer holding RPC message +/* svc_rdma_map_reply_msg - DMA map the buffer holding RPC message * @rdma: controlling transport - * @ctxt: send_ctxt for the Send WR + * @sctxt: send_ctxt for the Send WR + * @rctxt: Write and Reply chunks provided by client * @xdr: prepared xdr_buf containing RPC message - * @wr_lst: pointer to Call header's Write list, or NULL * * Load the xdr_buf into the ctxt's sge array, and DMA map each - * element as it is added. + * element as it is added. The Send WR's num_sge field is set. * * Returns zero on success, or a negative errno on failure. */ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, - struct svc_rdma_send_ctxt *ctxt, - struct xdr_buf *xdr, __be32 *wr_lst) + struct svc_rdma_send_ctxt *sctxt, + const struct svc_rdma_recv_ctxt *rctxt, + struct xdr_buf *xdr) { unsigned int len, remaining; unsigned long page_off; @@ -652,11 +673,24 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, u32 xdr_pad; int ret; - if (svc_rdma_pull_up_needed(rdma, xdr, wr_lst)) - return svc_rdma_pull_up_reply_msg(rdma, ctxt, xdr, wr_lst); + /* Set up the (persistently-mapped) transport header SGE. */ + sctxt->sc_send_wr.num_sge = 1; + sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len; - ++ctxt->sc_cur_sge_no; - ret = svc_rdma_dma_map_buf(rdma, ctxt, + /* If there is a Reply chunk, nothing follows the transport + * header, and we're done here. + */ + if (rctxt && rctxt->rc_reply_chunk) + return 0; + + /* For pull-up, svc_rdma_send() will sync the transport header. + * No additional DMA mapping is necessary. + */ + if (svc_rdma_pull_up_needed(rdma, sctxt, rctxt, xdr)) + return svc_rdma_pull_up_reply_msg(rdma, sctxt, rctxt, xdr); + + ++sctxt->sc_cur_sge_no; + ret = svc_rdma_dma_map_buf(rdma, sctxt, xdr->head[0].iov_base, xdr->head[0].iov_len); if (ret < 0) @@ -667,10 +701,10 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, * have added XDR padding in the tail buffer, and that * should not be included inline. */ - if (wr_lst) { + if (rctxt && rctxt->rc_write_list) { base = xdr->tail[0].iov_base; len = xdr->tail[0].iov_len; - xdr_pad = xdr_padsize(xdr->page_len); + xdr_pad = xdr_pad_size(xdr->page_len); if (len && xdr_pad) { base += xdr_pad; @@ -686,8 +720,8 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, while (remaining) { len = min_t(u32, PAGE_SIZE - page_off, remaining); - ++ctxt->sc_cur_sge_no; - ret = svc_rdma_dma_map_page(rdma, ctxt, *ppages++, + ++sctxt->sc_cur_sge_no; + ret = svc_rdma_dma_map_page(rdma, sctxt, *ppages++, page_off, len); if (ret < 0) return ret; @@ -700,8 +734,8 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, len = xdr->tail[0].iov_len; tail: if (len) { - ++ctxt->sc_cur_sge_no; - ret = svc_rdma_dma_map_buf(rdma, ctxt, base, len); + ++sctxt->sc_cur_sge_no; + ret = svc_rdma_dma_map_buf(rdma, sctxt, base, len); if (ret < 0) return ret; } @@ -748,18 +782,14 @@ static void svc_rdma_save_io_pages(struct svc_rqst *rqstp, */ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *sctxt, - struct svc_rdma_recv_ctxt *rctxt, - struct svc_rqst *rqstp, - __be32 *wr_lst, __be32 *rp_ch) + const struct svc_rdma_recv_ctxt *rctxt, + struct svc_rqst *rqstp) { int ret; - if (!rp_ch) { - ret = svc_rdma_map_reply_msg(rdma, sctxt, - &rqstp->rq_res, wr_lst); - if (ret < 0) - return ret; - } + ret = svc_rdma_map_reply_msg(rdma, sctxt, rctxt, &rqstp->rq_res); + if (ret < 0) + return ret; svc_rdma_save_io_pages(rqstp, sctxt); @@ -769,8 +799,6 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma, } else { sctxt->sc_send_wr.opcode = IB_WR_SEND; } - dprintk("svcrdma: posting Send WR with %u sge(s)\n", - sctxt->sc_send_wr.num_sge); return svc_rdma_send(rdma, &sctxt->sc_send_wr); } @@ -785,26 +813,31 @@ static int svc_rdma_send_error_msg(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt, struct svc_rqst *rqstp) { + struct svc_rdma_recv_ctxt *rctxt = rqstp->rq_xprt_ctxt; + __be32 *rdma_argp = rctxt->rc_recv_buf; __be32 *p; - int ret; - p = ctxt->sc_xprt_buf; - trace_svcrdma_err_chunk(*p); - p += 3; + rpcrdma_set_xdrlen(&ctxt->sc_hdrbuf, 0); + xdr_init_encode(&ctxt->sc_stream, &ctxt->sc_hdrbuf, ctxt->sc_xprt_buf, + NULL); + + p = xdr_reserve_space(&ctxt->sc_stream, RPCRDMA_HDRLEN_ERR); + if (!p) + return -ENOMSG; + + *p++ = *rdma_argp; + *p++ = *(rdma_argp + 1); + *p++ = rdma->sc_fc_credits; *p++ = rdma_error; *p = err_chunk; - svc_rdma_sync_reply_hdr(rdma, ctxt, RPCRDMA_HDRLEN_ERR); + trace_svcrdma_err_chunk(*rdma_argp); svc_rdma_save_io_pages(rqstp, ctxt); + ctxt->sc_send_wr.num_sge = 1; ctxt->sc_send_wr.opcode = IB_WR_SEND; - ret = svc_rdma_send(rdma, &ctxt->sc_send_wr); - if (ret) { - svc_rdma_send_ctxt_put(rdma, ctxt); - return ret; - } - - return 0; + ctxt->sc_sges[0].length = ctxt->sc_hdrbuf.len; + return svc_rdma_send(rdma, &ctxt->sc_send_wr); } /** @@ -825,14 +858,14 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) struct svcxprt_rdma *rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt); struct svc_rdma_recv_ctxt *rctxt = rqstp->rq_xprt_ctxt; - __be32 *p, *rdma_argp, *rdma_resp, *wr_lst, *rp_ch; + __be32 *rdma_argp = rctxt->rc_recv_buf; + __be32 *wr_lst = rctxt->rc_write_list; + __be32 *rp_ch = rctxt->rc_reply_chunk; struct xdr_buf *xdr = &rqstp->rq_res; struct svc_rdma_send_ctxt *sctxt; + __be32 *p; int ret; - rdma_argp = rctxt->rc_recv_buf; - svc_rdma_get_write_arrays(rdma_argp, &wr_lst, &rp_ch); - /* Create the RDMA response header. xprt->xpt_mutex, * acquired in svc_send(), serializes RPC replies. The * code path below that inserts the credit grant value @@ -843,36 +876,52 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) sctxt = svc_rdma_send_ctxt_get(rdma); if (!sctxt) goto err0; - rdma_resp = sctxt->sc_xprt_buf; - p = rdma_resp; + p = xdr_reserve_space(&sctxt->sc_stream, + rpcrdma_fixed_maxsz * sizeof(*p)); + if (!p) + goto err0; *p++ = *rdma_argp; *p++ = *(rdma_argp + 1); *p++ = rdma->sc_fc_credits; - *p++ = rp_ch ? rdma_nomsg : rdma_msg; - - /* Start with empty chunks */ - *p++ = xdr_zero; - *p++ = xdr_zero; - *p = xdr_zero; + *p = rp_ch ? rdma_nomsg : rdma_msg; + if (svc_rdma_encode_read_list(sctxt) < 0) + goto err0; if (wr_lst) { /* XXX: Presume the client sent only one Write chunk */ - ret = svc_rdma_send_write_chunk(rdma, wr_lst, xdr); + unsigned long offset; + unsigned int length; + + if (rctxt->rc_read_payload_length) { + offset = rctxt->rc_read_payload_offset; + length = rctxt->rc_read_payload_length; + } else { + offset = xdr->head[0].iov_len; + length = xdr->page_len; + } + ret = svc_rdma_send_write_chunk(rdma, wr_lst, xdr, offset, + length); if (ret < 0) goto err2; - svc_rdma_xdr_encode_write_list(rdma_resp, wr_lst, ret); + if (svc_rdma_encode_write_list(rctxt, sctxt, length) < 0) + goto err0; + } else { + if (xdr_stream_encode_item_absent(&sctxt->sc_stream) < 0) + goto err0; } if (rp_ch) { - ret = svc_rdma_send_reply_chunk(rdma, rp_ch, wr_lst, xdr); + ret = svc_rdma_send_reply_chunk(rdma, rctxt, &rqstp->rq_res); if (ret < 0) goto err2; - svc_rdma_xdr_encode_reply_chunk(rdma_resp, rp_ch, ret); + if (svc_rdma_encode_reply_chunk(rctxt, sctxt, ret) < 0) + goto err0; + } else { + if (xdr_stream_encode_item_absent(&sctxt->sc_stream) < 0) + goto err0; } - svc_rdma_sync_reply_hdr(rdma, sctxt, svc_rdma_reply_hdr_len(rdma_resp)); - ret = svc_rdma_send_reply_msg(rdma, sctxt, rctxt, rqstp, - wr_lst, rp_ch); + ret = svc_rdma_send_reply_msg(rdma, sctxt, rctxt, rqstp); if (ret < 0) goto err1; ret = 0; @@ -900,3 +949,30 @@ out: ret = -ENOTCONN; goto out; } + +/** + * svc_rdma_read_payload - special processing for a READ payload + * @rqstp: svc_rqst to operate on + * @offset: payload's byte offset in @xdr + * @length: size of payload, in bytes + * + * Returns zero on success. + * + * For the moment, just record the xdr_buf location of the READ + * payload. svc_rdma_sendto will use that location later when + * we actually send the payload. + */ +int svc_rdma_read_payload(struct svc_rqst *rqstp, unsigned int offset, + unsigned int length) +{ + struct svc_rdma_recv_ctxt *rctxt = rqstp->rq_xprt_ctxt; + + /* XXX: Just one READ payload slot for now, since our + * transport implementation currently supports only one + * Write chunk. + */ + rctxt->rc_read_payload_offset = offset; + rctxt->rc_read_payload_length = length; + + return 0; +} diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 145a3615c319..8bb99980ae85 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -82,6 +82,7 @@ static const struct svc_xprt_ops svc_rdma_ops = { .xpo_create = svc_rdma_create, .xpo_recvfrom = svc_rdma_recvfrom, .xpo_sendto = svc_rdma_sendto, + .xpo_read_payload = svc_rdma_read_payload, .xpo_release_rqst = svc_rdma_release_rqst, .xpo_detach = svc_rdma_detach, .xpo_free = svc_rdma_free, @@ -240,10 +241,6 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id, static int rdma_listen_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) { - struct sockaddr *sap = (struct sockaddr *)&cma_id->route.addr.src_addr; - - trace_svcrdma_cm_event(event, sap); - switch (event->event) { case RDMA_CM_EVENT_CONNECT_REQUEST: dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, " @@ -265,12 +262,9 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id, static int rdma_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) { - struct sockaddr *sap = (struct sockaddr *)&cma_id->route.addr.dst_addr; struct svcxprt_rdma *rdma = cma_id->context; struct svc_xprt *xprt = &rdma->sc_xprt; - trace_svcrdma_cm_event(event, sap); - switch (event->event) { case RDMA_CM_EVENT_ESTABLISHED: /* Accept complete */ |