From 8502427ccd9500cefc1ad47655371f9121934845 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 Jan 2015 11:02:04 -0500 Subject: xprtrdma: human-readable completion status Make it easier to grep the system log for specific error conditions. The wc.opcode field is not included because opcode numbers are sparse, and because wc.opcode is not necessarily valid when completion reports an error. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 70 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 57 insertions(+), 13 deletions(-) (limited to 'net/sunrpc/xprtrdma/verbs.c') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index c98e40643910..56f705d63d5c 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -173,18 +173,54 @@ rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context) } } +static const char * const wc_status[] = { + "success", + "local length error", + "local QP operation error", + "local EE context operation error", + "local protection error", + "WR flushed", + "memory management operation error", + "bad response error", + "local access error", + "remote invalid request error", + "remote access error", + "remote operation error", + "transport retry counter exceeded", + "RNR retrycounter exceeded", + "local RDD violation error", + "remove invalid RD request", + "operation aborted", + "invalid EE context number", + "invalid EE context state", + "fatal error", + "response timeout error", + "general error", +}; + +#define COMPLETION_MSG(status) \ + ((status) < ARRAY_SIZE(wc_status) ? \ + wc_status[(status)] : "unexpected completion error") + static void rpcrdma_sendcq_process_wc(struct ib_wc *wc) { - struct rpcrdma_mw *frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; + if (likely(wc->status == IB_WC_SUCCESS)) + return; - dprintk("RPC: %s: frmr %p status %X opcode %d\n", - __func__, frmr, wc->status, wc->opcode); + /* WARNING: Only wr_id and status are reliable at this point */ + if (wc->wr_id == 0ULL) { + if (wc->status != IB_WC_WR_FLUSH_ERR) + pr_err("RPC: %s: SEND: %s\n", + __func__, COMPLETION_MSG(wc->status)); + } else { + struct rpcrdma_mw *r; - if (wc->wr_id == 0ULL) - return; - if (wc->status != IB_WC_SUCCESS) - frmr->r.frmr.fr_state = FRMR_IS_STALE; + r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; + r->r.frmr.fr_state = FRMR_IS_STALE; + pr_err("RPC: %s: frmr %p (stale): %s\n", + __func__, r, COMPLETION_MSG(wc->status)); + } } static int @@ -248,16 +284,17 @@ rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list) struct rpcrdma_rep *rep = (struct rpcrdma_rep *)(unsigned long)wc->wr_id; - dprintk("RPC: %s: rep %p status %X opcode %X length %u\n", - __func__, rep, wc->status, wc->opcode, wc->byte_len); + /* WARNING: Only wr_id and status are reliable at this point */ + if (wc->status != IB_WC_SUCCESS) + goto out_fail; - if (wc->status != IB_WC_SUCCESS) { - rep->rr_len = ~0U; - goto out_schedule; - } + /* status == SUCCESS means all fields in wc are trustworthy */ if (wc->opcode != IB_WC_RECV) return; + dprintk("RPC: %s: rep %p opcode 'recv', length %u: success\n", + __func__, rep, wc->byte_len); + rep->rr_len = wc->byte_len; ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device, rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE); @@ -275,6 +312,13 @@ rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list) out_schedule: list_add_tail(&rep->rr_list, sched_list); + return; +out_fail: + if (wc->status != IB_WC_WR_FLUSH_ERR) + pr_err("RPC: %s: rep %p: %s\n", + __func__, rep, COMPLETION_MSG(wc->status)); + rep->rr_len = ~0U; + goto out_schedule; } static int -- cgit From 5d410ba061c1e4bc0068ce91f2cf349998cde46c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 Jan 2015 11:02:46 -0500 Subject: xprtrdma: Remove rpcrdma_ep::rep_ia Clean up: This field is not used. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net/sunrpc/xprtrdma/verbs.c') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 56f705d63d5c..56e14b369d42 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -825,7 +825,6 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, else if (ep->rep_cqinit <= 2) ep->rep_cqinit = 0; INIT_CQCOUNT(ep); - ep->rep_ia = ia; init_waitqueue_head(&ep->rep_connect_wait); INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker); -- cgit From 3eb358106660195948f4e95822039c5799fc41f8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 Jan 2015 11:02:54 -0500 Subject: xprtrdma: Remove rl_mr field, and the mr_chunk union Clean up: Since commit 0ac531c18323 ("xprtrdma: Remove REGISTER memory registration mode"), the rl_mr pointer is no longer used anywhere. After removal, there's only a single member of the mr_chunk union, so mr_chunk can be removed as well, in favor of a single pointer field. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) (limited to 'net/sunrpc/xprtrdma/verbs.c') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 56e14b369d42..1000f637edee 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1493,8 +1493,8 @@ rpcrdma_buffer_put_mrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf) int i; for (i = 1, seg++; i < RPCRDMA_MAX_SEGS; seg++, i++) - rpcrdma_buffer_put_mr(&seg->mr_chunk.rl_mw, buf); - rpcrdma_buffer_put_mr(&seg1->mr_chunk.rl_mw, buf); + rpcrdma_buffer_put_mr(&seg->rl_mw, buf); + rpcrdma_buffer_put_mr(&seg1->rl_mw, buf); } static void @@ -1580,7 +1580,7 @@ rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf, list_add(&r->mw_list, stale); continue; } - req->rl_segments[i].mr_chunk.rl_mw = r; + req->rl_segments[i].rl_mw = r; if (unlikely(i-- == 0)) return req; /* Success */ } @@ -1602,7 +1602,7 @@ rpcrdma_buffer_get_fmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf) r = list_entry(buf->rb_mws.next, struct rpcrdma_mw, mw_list); list_del(&r->mw_list); - req->rl_segments[i].mr_chunk.rl_mw = r; + req->rl_segments[i].rl_mw = r; if (unlikely(i-- == 0)) return req; /* Success */ } @@ -1842,7 +1842,7 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, struct rpcrdma_xprt *r_xprt) { struct rpcrdma_mr_seg *seg1 = seg; - struct rpcrdma_mw *mw = seg1->mr_chunk.rl_mw; + struct rpcrdma_mw *mw = seg1->rl_mw; struct rpcrdma_frmr *frmr = &mw->r.frmr; struct ib_mr *mr = frmr->fr_mr; struct ib_send_wr fastreg_wr, *bad_wr; @@ -1931,12 +1931,12 @@ rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg, struct ib_send_wr invalidate_wr, *bad_wr; int rc; - seg1->mr_chunk.rl_mw->r.frmr.fr_state = FRMR_IS_INVALID; + seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID; memset(&invalidate_wr, 0, sizeof invalidate_wr); - invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw; + invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw; invalidate_wr.opcode = IB_WR_LOCAL_INV; - invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; + invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey; DECR_CQCOUNT(&r_xprt->rx_ep); read_lock(&ia->ri_qplock); @@ -1946,7 +1946,7 @@ rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg, read_unlock(&ia->ri_qplock); if (rc) { /* Force rpcrdma_buffer_get() to retry */ - seg1->mr_chunk.rl_mw->r.frmr.fr_state = FRMR_IS_STALE; + seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE; dprintk("RPC: %s: failed ib_post_send for invalidate," " status %i\n", __func__, rc); } @@ -1978,8 +1978,7 @@ rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg, offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) break; } - rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr, - physaddrs, i, seg1->mr_dma); + rc = ib_map_phys_fmr(seg1->rl_mw->r.fmr, physaddrs, i, seg1->mr_dma); if (rc) { dprintk("RPC: %s: failed ib_map_phys_fmr " "%u@0x%llx+%i (%d)... status %i\n", __func__, @@ -1988,7 +1987,7 @@ rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg, while (i--) rpcrdma_unmap_one(ia, --seg); } else { - seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey; + seg1->mr_rkey = seg1->rl_mw->r.fmr->rkey; seg1->mr_base = seg1->mr_dma + pageoff; seg1->mr_nsegs = i; seg1->mr_len = len; @@ -2005,7 +2004,7 @@ rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg, LIST_HEAD(l); int rc; - list_add(&seg1->mr_chunk.rl_mw->r.fmr->list, &l); + list_add(&seg1->rl_mw->r.fmr->list, &l); rc = ib_unmap_fmr(&l); read_lock(&ia->ri_qplock); while (seg1->mr_nsegs--) -- cgit From eba8ff660b2d8b7fcd6669fcab2c025b59f66d26 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 Jan 2015 11:03:02 -0500 Subject: xprtrdma: Move credit update to RPC reply handler Reduce work in the receive CQ handler, which can be run at hardware interrupt level, by moving the RPC/RDMA credit update logic to the RPC reply handler. This has some additional benefits: More header sanity checking is done before trusting the incoming credit value, and the receive CQ handler no longer touches the RPC/RDMA header (the CPU stalls while waiting for the header contents to be brought into the cache). This further extends work begun by commit e7ce710a8802 ("xprtrdma: Avoid deadlock when credit window is reset"). Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) (limited to 'net/sunrpc/xprtrdma/verbs.c') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 1000f637edee..71a071aaf0ab 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -49,6 +49,7 @@ #include #include +#include #include #include "xprt_rdma.h" @@ -298,17 +299,7 @@ rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list) rep->rr_len = wc->byte_len; ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device, rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE); - - if (rep->rr_len >= 16) { - struct rpcrdma_msg *p = (struct rpcrdma_msg *)rep->rr_base; - unsigned int credits = ntohl(p->rm_credit); - - if (credits == 0) - credits = 1; /* don't deadlock */ - else if (credits > rep->rr_buffer->rb_max_requests) - credits = rep->rr_buffer->rb_max_requests; - atomic_set(&rep->rr_buffer->rb_credits, credits); - } + prefetch(rep->rr_base); out_schedule: list_add_tail(&rep->rr_list, sched_list); @@ -480,7 +471,6 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) case RDMA_CM_EVENT_DEVICE_REMOVAL: connstate = -ENODEV; connected: - atomic_set(&rpcx_to_rdmax(ep->rep_xprt)->rx_buf.rb_credits, 1); dprintk("RPC: %s: %sconnected\n", __func__, connstate > 0 ? "" : "dis"); ep->rep_connected = connstate; @@ -1186,7 +1176,6 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, buf->rb_max_requests = cdata->max_requests; spin_lock_init(&buf->rb_lock); - atomic_set(&buf->rb_credits, 1); /* Need to allocate: * 1. arrays for send and recv pointers -- cgit From afadc468eb309b7c48ffdc8fa4c72acbb9991613 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 Jan 2015 11:03:11 -0500 Subject: xprtrdma: Remove rpcrdma_ep::rep_func and ::rep_xprt Clean up: The rep_func field always refers to rpcrdma_conn_func(). rep_func should have been removed by commit b45ccfd25d50 ("xprtrdma: Remove MEMWINDOWS registration modes"). Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/sunrpc/xprtrdma/verbs.c') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 71a071aaf0ab..c61bb61c4d13 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -154,7 +154,7 @@ rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context) event->device->name, context); if (ep->rep_connected == 1) { ep->rep_connected = -EIO; - ep->rep_func(ep); + rpcrdma_conn_func(ep); wake_up_all(&ep->rep_connect_wait); } } @@ -169,7 +169,7 @@ rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context) event->device->name, context); if (ep->rep_connected == 1) { ep->rep_connected = -EIO; - ep->rep_func(ep); + rpcrdma_conn_func(ep); wake_up_all(&ep->rep_connect_wait); } } @@ -474,7 +474,7 @@ connected: dprintk("RPC: %s: %sconnected\n", __func__, connstate > 0 ? "" : "dis"); ep->rep_connected = connstate; - ep->rep_func(ep); + rpcrdma_conn_func(ep); wake_up_all(&ep->rep_connect_wait); /*FALLTHROUGH*/ default: -- cgit From 5ae711a24601257f395c1f8746ac95be0cbd75e5 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 Jan 2015 11:03:19 -0500 Subject: xprtrdma: Free the pd if ib_query_qp() fails If ib_query_qp() fails or the memory registration mode isn't supported, don't leak the PD. An orphaned IB/core resource will cause IB module removal to hang. Fixes: bd7ed1d13304 ("RPC/RDMA: check selected memory registration ...") Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'net/sunrpc/xprtrdma/verbs.c') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index c61bb61c4d13..aa012a393448 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -614,7 +614,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) if (rc) { dprintk("RPC: %s: ib_query_device failed %d\n", __func__, rc); - goto out2; + goto out3; } if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) { @@ -672,14 +672,14 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) "phys register failed with %lX\n", __func__, PTR_ERR(ia->ri_bind_mem)); rc = -ENOMEM; - goto out2; + goto out3; } break; default: printk(KERN_ERR "RPC: Unsupported memory " "registration mode: %d\n", memreg); rc = -ENOMEM; - goto out2; + goto out3; } dprintk("RPC: %s: memory registration strategy is %d\n", __func__, memreg); @@ -689,6 +689,10 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) rwlock_init(&ia->ri_qplock); return 0; + +out3: + ib_dealloc_pd(ia->ri_pd); + ia->ri_pd = NULL; out2: rdma_destroy_id(ia->ri_id); ia->ri_id = NULL; -- cgit From 7bc7972cdd1f137552ca979caa11c8acbe119ae8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 Jan 2015 11:03:27 -0500 Subject: xprtrdma: Take struct ib_device_attr off the stack Device attributes are large, and are used in more than one place. Stash a copy in dynamically allocated memory. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 37 +++++++++++++------------------------ 1 file changed, 13 insertions(+), 24 deletions(-) (limited to 'net/sunrpc/xprtrdma/verbs.c') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index aa012a393448..123bb04dd823 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -588,8 +588,8 @@ int rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) { int rc, mem_priv; - struct ib_device_attr devattr; struct rpcrdma_ia *ia = &xprt->rx_ia; + struct ib_device_attr *devattr = &ia->ri_devattr; ia->ri_id = rpcrdma_create_id(xprt, ia, addr); if (IS_ERR(ia->ri_id)) { @@ -605,26 +605,21 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) goto out2; } - /* - * Query the device to determine if the requested memory - * registration strategy is supported. If it isn't, set the - * strategy to a globally supported model. - */ - rc = ib_query_device(ia->ri_id->device, &devattr); + rc = ib_query_device(ia->ri_id->device, devattr); if (rc) { dprintk("RPC: %s: ib_query_device failed %d\n", __func__, rc); goto out3; } - if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) { + if (devattr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) { ia->ri_have_dma_lkey = 1; ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey; } if (memreg == RPCRDMA_FRMR) { /* Requires both frmr reg and local dma lkey */ - if ((devattr.device_cap_flags & + if ((devattr->device_cap_flags & (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) != (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) { dprintk("RPC: %s: FRMR registration " @@ -634,7 +629,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) /* Mind the ia limit on FRMR page list depth */ ia->ri_max_frmr_depth = min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, - devattr.max_fast_reg_page_list_len); + devattr->max_fast_reg_page_list_len); } } if (memreg == RPCRDMA_MTHCAFMR) { @@ -736,20 +731,13 @@ int rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata) { - struct ib_device_attr devattr; + struct ib_device_attr *devattr = &ia->ri_devattr; struct ib_cq *sendcq, *recvcq; int rc, err; - rc = ib_query_device(ia->ri_id->device, &devattr); - if (rc) { - dprintk("RPC: %s: ib_query_device failed %d\n", - __func__, rc); - return rc; - } - /* check provider's send/recv wr limits */ - if (cdata->max_requests > devattr.max_qp_wr) - cdata->max_requests = devattr.max_qp_wr; + if (cdata->max_requests > devattr->max_qp_wr) + cdata->max_requests = devattr->max_qp_wr; ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall; ep->rep_attr.qp_context = ep; @@ -784,8 +772,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, } ep->rep_attr.cap.max_send_wr *= depth; - if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) { - cdata->max_requests = devattr.max_qp_wr / depth; + if (ep->rep_attr.cap.max_send_wr > devattr->max_qp_wr) { + cdata->max_requests = devattr->max_qp_wr / depth; if (!cdata->max_requests) return -EINVAL; ep->rep_attr.cap.max_send_wr = cdata->max_requests * @@ -868,10 +856,11 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, /* Client offers RDMA Read but does not initiate */ ep->rep_remote_cma.initiator_depth = 0; - if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */ + if (devattr->max_qp_rd_atom > 32) /* arbitrary but <= 255 */ ep->rep_remote_cma.responder_resources = 32; else - ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom; + ep->rep_remote_cma.responder_resources = + devattr->max_qp_rd_atom; ep->rep_remote_cma.retry_count = 7; ep->rep_remote_cma.flow_control = 0; -- cgit From ce1ab9ab47973dcff7548abda20e49add2c4ca95 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 Jan 2015 11:03:35 -0500 Subject: xprtrdma: Take struct ib_qp_attr and ib_qp_init_attr off the stack Reduce stack footprint of the connection upcall handler function. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'net/sunrpc/xprtrdma/verbs.c') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 123bb04dd823..958b372cb919 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -425,8 +425,8 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr; #endif - struct ib_qp_attr attr; - struct ib_qp_init_attr iattr; + struct ib_qp_attr *attr = &ia->ri_qp_attr; + struct ib_qp_init_attr *iattr = &ia->ri_qp_init_attr; int connstate = 0; switch (event->event) { @@ -449,12 +449,13 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) break; case RDMA_CM_EVENT_ESTABLISHED: connstate = 1; - ib_query_qp(ia->ri_id->qp, &attr, - IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC, - &iattr); + ib_query_qp(ia->ri_id->qp, attr, + IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC, + iattr); dprintk("RPC: %s: %d responder resources" " (%d initiator)\n", - __func__, attr.max_dest_rd_atomic, attr.max_rd_atomic); + __func__, attr->max_dest_rd_atomic, + attr->max_rd_atomic); goto connected; case RDMA_CM_EVENT_CONNECT_ERROR: connstate = -ENOTCONN; @@ -487,7 +488,7 @@ connected: #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) if (connstate == 1) { - int ird = attr.max_dest_rd_atomic; + int ird = attr->max_dest_rd_atomic; int tird = ep->rep_remote_cma.responder_resources; printk(KERN_INFO "rpcrdma: connection to %pI4:%u " "on %s, memreg %d slots %d ird %d%s\n", -- cgit From ac920d04a7f307bfd7633f60abe33fb626f6ec83 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 Jan 2015 11:03:44 -0500 Subject: xprtrdma: Simplify synopsis of rpcrdma_buffer_create() Clean up: There is one call site for rpcrdma_buffer_create(). All of the arguments there are fields of an rpcrdma_xprt. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net/sunrpc/xprtrdma/verbs.c') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 958b372cb919..fd71501403fd 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1161,9 +1161,11 @@ out_free: } int -rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, - struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata) +rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) { + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; + struct rpcrdma_ia *ia = &r_xprt->rx_ia; + struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; char *p; size_t len, rlen, wlen; int i, rc; @@ -1200,6 +1202,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, * Register the zeroed pad buffer, if any. */ if (cdata->padding) { + struct rpcrdma_ep *ep = &r_xprt->rx_ep; rc = rpcrdma_register_internal(ia, p, cdata->padding, &ep->rep_pad_mr, &ep->rep_pad); if (rc) -- cgit From 1392402c405a75de1cdc658d36c6007ea1c037de Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 Jan 2015 11:03:52 -0500 Subject: xprtrdma: Refactor rpcrdma_buffer_create() and rpcrdma_buffer_destroy() Move the details of how to create and destroy rpcrdma_req and rpcrdma_rep structures into helper functions. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 148 ++++++++++++++++++++++++++++---------------- 1 file changed, 95 insertions(+), 53 deletions(-) (limited to 'net/sunrpc/xprtrdma/verbs.c') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index fd71501403fd..24ea6dd184e4 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1075,6 +1075,69 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) } } +static struct rpcrdma_req * +rpcrdma_create_req(struct rpcrdma_xprt *r_xprt) +{ + struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; + size_t wlen = 1 << fls(cdata->inline_wsize + + sizeof(struct rpcrdma_req)); + struct rpcrdma_ia *ia = &r_xprt->rx_ia; + struct rpcrdma_req *req; + int rc; + + rc = -ENOMEM; + req = kmalloc(wlen, GFP_KERNEL); + if (req == NULL) + goto out; + memset(req, 0, sizeof(struct rpcrdma_req)); + + rc = rpcrdma_register_internal(ia, req->rl_base, wlen - + offsetof(struct rpcrdma_req, rl_base), + &req->rl_handle, &req->rl_iov); + if (rc) + goto out_free; + + req->rl_size = wlen - sizeof(struct rpcrdma_req); + req->rl_buffer = &r_xprt->rx_buf; + return req; + +out_free: + kfree(req); +out: + return ERR_PTR(rc); +} + +static struct rpcrdma_rep * +rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt) +{ + struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; + size_t rlen = 1 << fls(cdata->inline_rsize + + sizeof(struct rpcrdma_rep)); + struct rpcrdma_ia *ia = &r_xprt->rx_ia; + struct rpcrdma_rep *rep; + int rc; + + rc = -ENOMEM; + rep = kmalloc(rlen, GFP_KERNEL); + if (rep == NULL) + goto out; + memset(rep, 0, sizeof(struct rpcrdma_rep)); + + rc = rpcrdma_register_internal(ia, rep->rr_base, rlen - + offsetof(struct rpcrdma_rep, rr_base), + &rep->rr_handle, &rep->rr_iov); + if (rc) + goto out_free; + + rep->rr_buffer = &r_xprt->rx_buf; + return rep; + +out_free: + kfree(rep); +out: + return ERR_PTR(rc); +} + static int rpcrdma_init_fmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf) { @@ -1167,7 +1230,7 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; char *p; - size_t len, rlen, wlen; + size_t len; int i, rc; buf->rb_max_requests = cdata->max_requests; @@ -1227,68 +1290,55 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) break; } - /* - * Allocate/init the request/reply buffers. Doing this - * using kmalloc for now -- one for each buf. - */ - wlen = 1 << fls(cdata->inline_wsize + sizeof(struct rpcrdma_req)); - rlen = 1 << fls(cdata->inline_rsize + sizeof(struct rpcrdma_rep)); - dprintk("RPC: %s: wlen = %zu, rlen = %zu\n", - __func__, wlen, rlen); - for (i = 0; i < buf->rb_max_requests; i++) { struct rpcrdma_req *req; struct rpcrdma_rep *rep; - req = kmalloc(wlen, GFP_KERNEL); - if (req == NULL) { + req = rpcrdma_create_req(r_xprt); + if (IS_ERR(req)) { dprintk("RPC: %s: request buffer %d alloc" " failed\n", __func__, i); - rc = -ENOMEM; + rc = PTR_ERR(req); goto out; } - memset(req, 0, sizeof(struct rpcrdma_req)); buf->rb_send_bufs[i] = req; - buf->rb_send_bufs[i]->rl_buffer = buf; - - rc = rpcrdma_register_internal(ia, req->rl_base, - wlen - offsetof(struct rpcrdma_req, rl_base), - &buf->rb_send_bufs[i]->rl_handle, - &buf->rb_send_bufs[i]->rl_iov); - if (rc) - goto out; - buf->rb_send_bufs[i]->rl_size = wlen - - sizeof(struct rpcrdma_req); - - rep = kmalloc(rlen, GFP_KERNEL); - if (rep == NULL) { + rep = rpcrdma_create_rep(r_xprt); + if (IS_ERR(rep)) { dprintk("RPC: %s: reply buffer %d alloc failed\n", __func__, i); - rc = -ENOMEM; + rc = PTR_ERR(rep); goto out; } - memset(rep, 0, sizeof(struct rpcrdma_rep)); buf->rb_recv_bufs[i] = rep; - buf->rb_recv_bufs[i]->rr_buffer = buf; - - rc = rpcrdma_register_internal(ia, rep->rr_base, - rlen - offsetof(struct rpcrdma_rep, rr_base), - &buf->rb_recv_bufs[i]->rr_handle, - &buf->rb_recv_bufs[i]->rr_iov); - if (rc) - goto out; - } - dprintk("RPC: %s: max_requests %d\n", - __func__, buf->rb_max_requests); - /* done */ + return 0; out: rpcrdma_buffer_destroy(buf); return rc; } +static void +rpcrdma_destroy_rep(struct rpcrdma_ia *ia, struct rpcrdma_rep *rep) +{ + if (!rep) + return; + + rpcrdma_deregister_internal(ia, rep->rr_handle, &rep->rr_iov); + kfree(rep); +} + +static void +rpcrdma_destroy_req(struct rpcrdma_ia *ia, struct rpcrdma_req *req) +{ + if (!req) + return; + + rpcrdma_deregister_internal(ia, req->rl_handle, &req->rl_iov); + kfree(req); +} + static void rpcrdma_destroy_fmrs(struct rpcrdma_buffer *buf) { @@ -1344,18 +1394,10 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) dprintk("RPC: %s: entering\n", __func__); for (i = 0; i < buf->rb_max_requests; i++) { - if (buf->rb_recv_bufs && buf->rb_recv_bufs[i]) { - rpcrdma_deregister_internal(ia, - buf->rb_recv_bufs[i]->rr_handle, - &buf->rb_recv_bufs[i]->rr_iov); - kfree(buf->rb_recv_bufs[i]); - } - if (buf->rb_send_bufs && buf->rb_send_bufs[i]) { - rpcrdma_deregister_internal(ia, - buf->rb_send_bufs[i]->rl_handle, - &buf->rb_send_bufs[i]->rl_iov); - kfree(buf->rb_send_bufs[i]); - } + if (buf->rb_recv_bufs) + rpcrdma_destroy_rep(ia, buf->rb_recv_bufs[i]); + if (buf->rb_send_bufs) + rpcrdma_destroy_req(ia, buf->rb_send_bufs[i]); } switch (ia->ri_memreg_strategy) { -- cgit From 9128c3e794a77917a86dd5490ca2c5233a8c6fde Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 Jan 2015 11:04:00 -0500 Subject: xprtrdma: Add struct rpcrdma_regbuf and helpers There are several spots that allocate a buffer via kmalloc (usually contiguously with another data structure) and then register that buffer internally. I'd like to split the buffers out of these data structures to allow the data structures to scale. Start by adding functions that can kmalloc and register a buffer, and can manage/preserve the buffer's associated ib_sge and ib_mr fields. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 55 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) (limited to 'net/sunrpc/xprtrdma/verbs.c') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 24ea6dd184e4..cdd6aacc9168 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1828,6 +1828,61 @@ rpcrdma_deregister_internal(struct rpcrdma_ia *ia, return rc; } +/** + * rpcrdma_alloc_regbuf - kmalloc and register memory for SEND/RECV buffers + * @ia: controlling rpcrdma_ia + * @size: size of buffer to be allocated, in bytes + * @flags: GFP flags + * + * Returns pointer to private header of an area of internally + * registered memory, or an ERR_PTR. The registered buffer follows + * the end of the private header. + * + * xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for + * receiving the payload of RDMA RECV operations. regbufs are not + * used for RDMA READ/WRITE operations, thus are registered only for + * LOCAL access. + */ +struct rpcrdma_regbuf * +rpcrdma_alloc_regbuf(struct rpcrdma_ia *ia, size_t size, gfp_t flags) +{ + struct rpcrdma_regbuf *rb; + int rc; + + rc = -ENOMEM; + rb = kmalloc(sizeof(*rb) + size, flags); + if (rb == NULL) + goto out; + + rb->rg_size = size; + rb->rg_owner = NULL; + rc = rpcrdma_register_internal(ia, rb->rg_base, size, + &rb->rg_mr, &rb->rg_iov); + if (rc) + goto out_free; + + return rb; + +out_free: + kfree(rb); +out: + return ERR_PTR(rc); +} + +/** + * rpcrdma_free_regbuf - deregister and free registered buffer + * @ia: controlling rpcrdma_ia + * @rb: regbuf to be deregistered and freed + */ +void +rpcrdma_free_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb) +{ + if (rb) { + rpcrdma_deregister_internal(ia, rb->rg_mr, &rb->rg_iov); + kfree(rb); + } +} + /* * Wrappers for chunk registration, shared by read/write chunk code. */ -- cgit From 0ca77dc372110cbed4dbac5e867ffdc60ebccf6a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 Jan 2015 11:04:08 -0500 Subject: xprtrdma: Allocate RPC send buffer separately from struct rpcrdma_req Because internal memory registration is an expensive and synchronous operation, xprtrdma pre-registers send and receive buffers at mount time, and then re-uses them for each RPC. A "hardway" allocation is a memory allocation and registration that replaces a send buffer during the processing of an RPC. Hardway must be done if the RPC send buffer is too small to accommodate an RPC's call and reply headers. For xprtrdma, each RPC send buffer is currently part of struct rpcrdma_req so that xprt_rdma_free(), which is passed nothing but the address of an RPC send buffer, can find its matching struct rpcrdma_req and rpcrdma_rep quickly via container_of / offsetof. That means that hardway currently has to replace a whole rpcrmda_req when it replaces an RPC send buffer. This is often a fairly hefty chunk of contiguous memory due to the size of the rl_segments array and the fact that both the send and receive buffers are part of struct rpcrdma_req. Some obscure re-use of fields in rpcrdma_req is done so that xprt_rdma_free() can detect replaced rpcrdma_req structs, and restore the original. This commit breaks apart the RPC send buffer and struct rpcrdma_req so that increasing the size of the rl_segments array does not change the alignment of each RPC send buffer. (Increasing rl_segments is needed to bump up the maximum r/wsize for NFS/RDMA). This change opens up some interesting possibilities for improving the design of xprt_rdma_allocate(). xprt_rdma_allocate() is now the one place where RPC send buffers are allocated or re-allocated, and they are now always left in place by xprt_rdma_free(). A large re-allocation that includes both the rl_segments array and the RPC send buffer is no longer needed. Send buffer re-allocation becomes quite rare. Good send buffer alignment is guaranteed no matter what the size of the rl_segments array is. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'net/sunrpc/xprtrdma/verbs.c') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index cdd6aacc9168..40894403db81 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1079,25 +1079,22 @@ static struct rpcrdma_req * rpcrdma_create_req(struct rpcrdma_xprt *r_xprt) { struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; - size_t wlen = 1 << fls(cdata->inline_wsize + - sizeof(struct rpcrdma_req)); + size_t wlen = cdata->inline_wsize; struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_req *req; int rc; rc = -ENOMEM; - req = kmalloc(wlen, GFP_KERNEL); + req = kmalloc(sizeof(*req) + wlen, GFP_KERNEL); if (req == NULL) goto out; - memset(req, 0, sizeof(struct rpcrdma_req)); + memset(req, 0, sizeof(*req)); - rc = rpcrdma_register_internal(ia, req->rl_base, wlen - - offsetof(struct rpcrdma_req, rl_base), + rc = rpcrdma_register_internal(ia, req->rl_base, wlen, &req->rl_handle, &req->rl_iov); if (rc) goto out_free; - req->rl_size = wlen - sizeof(struct rpcrdma_req); req->rl_buffer = &r_xprt->rx_buf; return req; @@ -1121,7 +1118,7 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt) rep = kmalloc(rlen, GFP_KERNEL); if (rep == NULL) goto out; - memset(rep, 0, sizeof(struct rpcrdma_rep)); + memset(rep, 0, sizeof(*rep)); rc = rpcrdma_register_internal(ia, rep->rr_base, rlen - offsetof(struct rpcrdma_rep, rr_base), @@ -1335,6 +1332,7 @@ rpcrdma_destroy_req(struct rpcrdma_ia *ia, struct rpcrdma_req *req) if (!req) return; + rpcrdma_free_regbuf(ia, req->rl_sendbuf); rpcrdma_deregister_internal(ia, req->rl_handle, &req->rl_iov); kfree(req); } @@ -1729,8 +1727,6 @@ rpcrdma_recv_buffer_get(struct rpcrdma_req *req) struct rpcrdma_buffer *buffers = req->rl_buffer; unsigned long flags; - if (req->rl_iov.length == 0) /* special case xprt_rdma_allocate() */ - buffers = ((struct rpcrdma_req *) buffers)->rl_buffer; spin_lock_irqsave(&buffers->rb_lock, flags); if (buffers->rb_recv_index < buffers->rb_max_requests) { req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index]; -- cgit From 85275c874eaeb92fb2a78a1d4ebb1ff4b0f7b732 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 Jan 2015 11:04:16 -0500 Subject: xprtrdma: Allocate RPC/RDMA send buffer separately from struct rpcrdma_req The rl_base field is currently the buffer where each RPC/RDMA call header is built. The inline threshold is an agreed-on size limit to for RDMA SEND operations that pass between client and server. The sum of the RPC/RDMA header size and the RPC header size must be less than or equal to this threshold. Increasing the r/wsize maximum will require MAX_SEGS to grow significantly, but the inline threshold size won't change (both sides agree on it). The server's inline threshold doesn't change. Since an RPC/RDMA header can never be larger than the inline threshold, make all RPC/RDMA header buffers the size of the inline threshold. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) (limited to 'net/sunrpc/xprtrdma/verbs.c') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 40894403db81..c81749b9a0de 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1078,30 +1078,14 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) static struct rpcrdma_req * rpcrdma_create_req(struct rpcrdma_xprt *r_xprt) { - struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; - size_t wlen = cdata->inline_wsize; - struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_req *req; - int rc; - rc = -ENOMEM; - req = kmalloc(sizeof(*req) + wlen, GFP_KERNEL); + req = kzalloc(sizeof(*req), GFP_KERNEL); if (req == NULL) - goto out; - memset(req, 0, sizeof(*req)); - - rc = rpcrdma_register_internal(ia, req->rl_base, wlen, - &req->rl_handle, &req->rl_iov); - if (rc) - goto out_free; + return ERR_PTR(-ENOMEM); req->rl_buffer = &r_xprt->rx_buf; return req; - -out_free: - kfree(req); -out: - return ERR_PTR(rc); } static struct rpcrdma_rep * @@ -1333,7 +1317,7 @@ rpcrdma_destroy_req(struct rpcrdma_ia *ia, struct rpcrdma_req *req) return; rpcrdma_free_regbuf(ia, req->rl_sendbuf); - rpcrdma_deregister_internal(ia, req->rl_handle, &req->rl_iov); + rpcrdma_free_regbuf(ia, req->rl_rdmabuf); kfree(req); } -- cgit From 6b1184cd4fb086a826f658b02d9d9912dd0dde08 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 Jan 2015 11:04:25 -0500 Subject: xprtrdma: Allocate RPC/RDMA receive buffer separately from struct rpcrdma_rep The rr_base field is currently the buffer where RPC replies land. An RPC/RDMA reply header lands in this buffer. In some cases an RPC reply header also lands in this buffer, just after the RPC/RDMA header. The inline threshold is an agreed-on size limit for RDMA SEND operations that pass from server and client. The sum of the RPC/RDMA reply header size and the RPC reply header size must be less than this threshold. The largest RDMA RECV that the client should have to handle is the size of the inline threshold. The receive buffer should thus be the size of the inline threshold, and not related to RPCRDMA_MAX_SEGS. RPC replies received via RDMA WRITE (long replies) are caught in rq_rcv_buf, which is the second half of the RPC send buffer. Ie, such replies are not involved in any way with rr_base. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) (limited to 'net/sunrpc/xprtrdma/verbs.c') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index c81749b9a0de..f58521dd88e2 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -298,8 +298,9 @@ rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list) rep->rr_len = wc->byte_len; ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device, - rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE); - prefetch(rep->rr_base); + rdmab_addr(rep->rr_rdmabuf), + rep->rr_len, DMA_FROM_DEVICE); + prefetch(rdmab_to_msg(rep->rr_rdmabuf)); out_schedule: list_add_tail(&rep->rr_list, sched_list); @@ -1092,23 +1093,21 @@ static struct rpcrdma_rep * rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt) { struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; - size_t rlen = 1 << fls(cdata->inline_rsize + - sizeof(struct rpcrdma_rep)); struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_rep *rep; int rc; rc = -ENOMEM; - rep = kmalloc(rlen, GFP_KERNEL); + rep = kzalloc(sizeof(*rep), GFP_KERNEL); if (rep == NULL) goto out; - memset(rep, 0, sizeof(*rep)); - rc = rpcrdma_register_internal(ia, rep->rr_base, rlen - - offsetof(struct rpcrdma_rep, rr_base), - &rep->rr_handle, &rep->rr_iov); - if (rc) + rep->rr_rdmabuf = rpcrdma_alloc_regbuf(ia, cdata->inline_rsize, + GFP_KERNEL); + if (IS_ERR(rep->rr_rdmabuf)) { + rc = PTR_ERR(rep->rr_rdmabuf); goto out_free; + } rep->rr_buffer = &r_xprt->rx_buf; return rep; @@ -1306,7 +1305,7 @@ rpcrdma_destroy_rep(struct rpcrdma_ia *ia, struct rpcrdma_rep *rep) if (!rep) return; - rpcrdma_deregister_internal(ia, rep->rr_handle, &rep->rr_iov); + rpcrdma_free_regbuf(ia, rep->rr_rdmabuf); kfree(rep); } @@ -2209,11 +2208,13 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia, recv_wr.next = NULL; recv_wr.wr_id = (u64) (unsigned long) rep; - recv_wr.sg_list = &rep->rr_iov; + recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; recv_wr.num_sge = 1; ib_dma_sync_single_for_cpu(ia->ri_id->device, - rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL); + rdmab_addr(rep->rr_rdmabuf), + rdmab_length(rep->rr_rdmabuf), + DMA_BIDIRECTIONAL); rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail); -- cgit From c05fbb5a593571961fdb4ba06a2bff49aed9dcee Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 Jan 2015 11:04:33 -0500 Subject: xprtrdma: Allocate zero pad separately from rpcrdma_buffer Use the new rpcrdma_alloc_regbuf() API to shrink the amount of contiguous memory needed for a buffer pool by moving the zero pad buffer into a regbuf. This is for consistency with the other uses of internally registered memory. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) (limited to 'net/sunrpc/xprtrdma/verbs.c') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index f58521dd88e2..8a05f45d1a11 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -794,6 +794,14 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, ep->rep_attr.qp_type = IB_QPT_RC; ep->rep_attr.port_num = ~0; + if (cdata->padding) { + ep->rep_padbuf = rpcrdma_alloc_regbuf(ia, cdata->padding, + GFP_KERNEL); + if (IS_ERR(ep->rep_padbuf)) + return PTR_ERR(ep->rep_padbuf); + } else + ep->rep_padbuf = NULL; + dprintk("RPC: %s: requested max: dtos: send %d recv %d; " "iovs: send %d recv %d\n", __func__, @@ -876,6 +884,7 @@ out2: dprintk("RPC: %s: ib_destroy_cq returned %i\n", __func__, err); out1: + rpcrdma_free_regbuf(ia, ep->rep_padbuf); return rc; } @@ -902,11 +911,7 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) ia->ri_id->qp = NULL; } - /* padding - could be done in rpcrdma_buffer_destroy... */ - if (ep->rep_pad_mr) { - rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad); - ep->rep_pad_mr = NULL; - } + rpcrdma_free_regbuf(ia, ep->rep_padbuf); rpcrdma_clean_cq(ep->rep_attr.recv_cq); rc = ib_destroy_cq(ep->rep_attr.recv_cq); @@ -1220,12 +1225,10 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) * 1. arrays for send and recv pointers * 2. arrays of struct rpcrdma_req to fill in pointers * 3. array of struct rpcrdma_rep for replies - * 4. padding, if any * Send/recv buffers in req/rep need to be registered */ len = buf->rb_max_requests * (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *)); - len += cdata->padding; p = kzalloc(len, GFP_KERNEL); if (p == NULL) { @@ -1241,18 +1244,6 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) buf->rb_recv_bufs = (struct rpcrdma_rep **) p; p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests]; - /* - * Register the zeroed pad buffer, if any. - */ - if (cdata->padding) { - struct rpcrdma_ep *ep = &r_xprt->rx_ep; - rc = rpcrdma_register_internal(ia, p, cdata->padding, - &ep->rep_pad_mr, &ep->rep_pad); - if (rc) - goto out; - } - p += cdata->padding; - INIT_LIST_HEAD(&buf->rb_mws); INIT_LIST_HEAD(&buf->rb_all); switch (ia->ri_memreg_strategy) { -- cgit From df515ca7b3b47bf6fd489fe6fca0d9ab243e1985 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 Jan 2015 11:04:41 -0500 Subject: xprtrdma: Clean up after adding regbuf management rpcrdma_{de}register_internal() are used only in verbs.c now. MAX_RPCRDMAHDR is no longer used and can be removed. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/sunrpc/xprtrdma/verbs.c') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 8a05f45d1a11..124676c13780 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1729,7 +1729,7 @@ rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) * Wrappers for internal-use kmalloc memory registration, used by buffer code. */ -int +static int rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len, struct ib_mr **mrp, struct ib_sge *iov) { @@ -1780,7 +1780,7 @@ rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len, return rc; } -int +static int rpcrdma_deregister_internal(struct rpcrdma_ia *ia, struct ib_mr *mr, struct ib_sge *iov) { -- cgit