From 691b45ddbd182a4ce0bc91953c70c845cf0935f1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 19 Aug 2019 18:36:19 -0400 Subject: SUNRPC: Remove rpc_wake_up_queued_task_on_wq() Clean up: commit c544577daddb ("SUNRPC: Clean up transport write space handling") appears to have removed the last caller of rpc_wake_up_queued_task_on_wq(). Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/sunrpc/sched.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index baa3ecdb882f..d1283bddd218 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -243,9 +243,6 @@ void rpc_sleep_on_priority_timeout(struct rpc_wait_queue *queue, void rpc_sleep_on_priority(struct rpc_wait_queue *, struct rpc_task *, int priority); -void rpc_wake_up_queued_task_on_wq(struct workqueue_struct *wq, - struct rpc_wait_queue *queue, - struct rpc_task *task); void rpc_wake_up_queued_task(struct rpc_wait_queue *, struct rpc_task *); void rpc_wake_up_queued_task_set_status(struct rpc_wait_queue *, -- cgit From aeaed4848234c97fb720b0e51a0c56dc8de0eeed Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 19 Aug 2019 18:40:58 -0400 Subject: xprtrdma: Boost client's max slot table size to match Linux server I've heard rumors of an NFS/RDMA server implementation that has a default credit limit of 1024. The client's default setting remains at 128. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xprtrdma.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h index 86fc38ff0355..16c239e0d6dd 100644 --- a/include/linux/sunrpc/xprtrdma.h +++ b/include/linux/sunrpc/xprtrdma.h @@ -49,9 +49,9 @@ * fully-chunked NFS message (read chunks are the largest). Note only * a single chunk type per message is supported currently. */ -#define RPCRDMA_MIN_SLOT_TABLE (2U) +#define RPCRDMA_MIN_SLOT_TABLE (4U) #define RPCRDMA_DEF_SLOT_TABLE (128U) -#define RPCRDMA_MAX_SLOT_TABLE (256U) +#define RPCRDMA_MAX_SLOT_TABLE (16384U) #define RPCRDMA_MIN_INLINE (1024) /* min inline thresh */ #define RPCRDMA_DEF_INLINE (4096) /* default inline thresh */ -- cgit From 2dfdcd88cf0ea66eec0478de82283ef20eb6f421 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 19 Aug 2019 18:41:44 -0400 Subject: xprtrdma: Rename CQE field in Receive trace points Make the field name the same for all trace points that handle pointers to struct rpcrdma_rep. That makes it easy to grep for matching rep points in trace output. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index f6a4eaa85a3e..6e6055eb67e7 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -623,21 +623,21 @@ TRACE_EVENT(xprtrdma_post_send, TRACE_EVENT(xprtrdma_post_recv, TP_PROTO( - const struct ib_cqe *cqe + const struct rpcrdma_rep *rep ), - TP_ARGS(cqe), + TP_ARGS(rep), TP_STRUCT__entry( - __field(const void *, cqe) + __field(const void *, rep) ), TP_fast_assign( - __entry->cqe = cqe; + __entry->rep = rep; ), - TP_printk("cqe=%p", - __entry->cqe + TP_printk("rep=%p", + __entry->rep ) ); @@ -715,14 +715,15 @@ TRACE_EVENT(xprtrdma_wc_receive, TP_ARGS(wc), TP_STRUCT__entry( - __field(const void *, cqe) + __field(const void *, rep) __field(u32, byte_len) __field(unsigned int, status) __field(u32, vendor_err) ), TP_fast_assign( - __entry->cqe = wc->wr_cqe; + __entry->rep = container_of(wc->wr_cqe, struct rpcrdma_rep, + rr_cqe); __entry->status = wc->status; if (wc->status) { __entry->byte_len = 0; @@ -733,8 +734,8 @@ TRACE_EVENT(xprtrdma_wc_receive, } ), - TP_printk("cqe=%p %u bytes: %s (%u/0x%x)", - __entry->cqe, __entry->byte_len, + TP_printk("rep=%p %u bytes: %s (%u/0x%x)", + __entry->rep, __entry->byte_len, rdma_show_wc_status(__entry->status), __entry->status, __entry->vendor_err ) -- cgit From 3b39f52a02d4b3322744a0a32d59142e01afa435 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 19 Aug 2019 18:45:37 -0400 Subject: xprtrdma: Move rpcrdma_mr_get out of frwr_map Refactor: Retrieve an MR and handle error recovery entirely in rpc_rdma.c, as this is not a device-specific function. Note that since commit 89f90fe1ad8b ("SUNRPC: Allow calls to xprt_transmit() to drain the entire transmit queue"), the xprt_transmit function handles the cond_resched. The transport no longer has to do this itself. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 6e6055eb67e7..83c4dfd7feea 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -464,7 +464,34 @@ TRACE_EVENT(xprtrdma_createmrs, ) ); -DEFINE_RXPRT_EVENT(xprtrdma_nomrs); +TRACE_EVENT(xprtrdma_nomrs, + TP_PROTO( + const struct rpcrdma_req *req + ), + + TP_ARGS(req), + + TP_STRUCT__entry( + __field(const void *, req) + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(u32, xid) + ), + + TP_fast_assign( + const struct rpc_rqst *rqst = &req->rl_slot; + + __entry->req = req; + __entry->task_id = rqst->rq_task->tk_pid; + __entry->client_id = rqst->rq_task->tk_client->cl_clid; + __entry->xid = be32_to_cpu(rqst->rq_xid); + ), + + TP_printk("task:%u@%u xid=0x%08x req=%p", + __entry->task_id, __entry->client_id, __entry->xid, + __entry->req + ) +); DEFINE_RDCH_EVENT(read); DEFINE_WRCH_EVENT(write); -- cgit From 6dc6ec9e04c468d994bff6eb660f3146f94cbfd9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 19 Aug 2019 18:47:10 -0400 Subject: xprtrdma: Cache free MRs in each rpcrdma_req Instead of a globally-contended MR free list, cache MRs in each rpcrdma_req as they are released. This means acquiring and releasing an MR will be lock-free in the common case, even outside the transport send lock. The original idea of per-rpcrdma_req MR free lists was suggested by Shirley Ma several years ago. I just now figured out how to make that idea work with on-demand MR allocation. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 83c4dfd7feea..a13830616107 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -451,16 +451,50 @@ TRACE_EVENT(xprtrdma_createmrs, TP_STRUCT__entry( __field(const void *, r_xprt) + __string(addr, rpcrdma_addrstr(r_xprt)) + __string(port, rpcrdma_portstr(r_xprt)) __field(unsigned int, count) ), TP_fast_assign( __entry->r_xprt = r_xprt; __entry->count = count; + __assign_str(addr, rpcrdma_addrstr(r_xprt)); + __assign_str(port, rpcrdma_portstr(r_xprt)); ), - TP_printk("r_xprt=%p: created %u MRs", - __entry->r_xprt, __entry->count + TP_printk("peer=[%s]:%s r_xprt=%p: created %u MRs", + __get_str(addr), __get_str(port), __entry->r_xprt, + __entry->count + ) +); + +TRACE_EVENT(xprtrdma_mr_get, + TP_PROTO( + const struct rpcrdma_req *req + ), + + TP_ARGS(req), + + TP_STRUCT__entry( + __field(const void *, req) + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(u32, xid) + ), + + TP_fast_assign( + const struct rpc_rqst *rqst = &req->rl_slot; + + __entry->req = req; + __entry->task_id = rqst->rq_task->tk_pid; + __entry->client_id = rqst->rq_task->tk_client->cl_clid; + __entry->xid = be32_to_cpu(rqst->rq_xid); + ), + + TP_printk("task:%u@%u xid=0x%08x req=%p", + __entry->task_id, __entry->client_id, __entry->xid, + __entry->req ) ); -- cgit From cc204d01262a69218b2d0db5cdea371de85871d9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 10 Sep 2019 13:01:35 -0400 Subject: SUNRPC: Dequeue the request from the receive queue while we're re-encoding Ensure that we dequeue the request from the transport receive queue while we're re-encoding to prevent issues like use-after-free when we release the bvec. Fixes: 7536908982047 ("SUNRPC: Ensure the bvecs are reset when we re-encode...") Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org # v4.20+ Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xprt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 13e108bcc9eb..d783e15ba898 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -352,6 +352,7 @@ bool xprt_prepare_transmit(struct rpc_task *task); void xprt_request_enqueue_transmit(struct rpc_task *task); void xprt_request_enqueue_receive(struct rpc_task *task); void xprt_request_wait_receive(struct rpc_task *task); +void xprt_request_dequeue_xprt(struct rpc_task *task); bool xprt_request_need_retransmit(struct rpc_task *task); void xprt_transmit(struct rpc_task *task); void xprt_end_transmit(struct rpc_task *task); -- cgit From f925ab926d1a9c2112d34ecb59fbb050bb58646c Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Mon, 16 Sep 2019 07:59:38 -0400 Subject: SUNRPC: Rename xdr_buf_read_netobj to xdr_buf_read_mic Let the name reflect the single use. The function now assumes the GSS MIC is the last object in the buffer. Signed-off-by: Benjamin Coddington Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xdr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 8a87d8bcb197..f33e5013bdfb 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -186,7 +186,7 @@ xdr_adjust_iovec(struct kvec *iov, __be32 *p) extern void xdr_shift_buf(struct xdr_buf *, size_t); extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *); extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int); -extern int xdr_buf_read_netobj(struct xdr_buf *, struct xdr_netobj *, unsigned int); +extern int xdr_buf_read_mic(struct xdr_buf *, struct xdr_netobj *, unsigned int); extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); -- cgit From 406cd91533dcc5e82ef2373c39e6a531d944131e Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Fri, 13 Sep 2019 08:29:02 -0400 Subject: NFS: Refactor nfs_instantiate() for dentry referencing callers Since commit b0c6108ecf64 ("nfs_instantiate(): prevent multiple aliases for directory inode"), nfs_instantiate() may succeed without actually instantiating the dentry that was passed in. That can be problematic for some callers in NFSv3, so this patch breaks things up so we can get the actual dentry obtained. Signed-off-by: Benjamin Coddington Signed-off-by: Anna Schumaker --- include/linux/nfs_fs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 0a11712a80e3..570a60c2f4f4 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -490,6 +490,9 @@ extern const struct file_operations nfs_dir_operations; extern const struct dentry_operations nfs_dentry_operations; extern void nfs_force_lookup_revalidate(struct inode *dir); +extern struct dentry *nfs_add_or_obtain(struct dentry *dentry, + struct nfs_fh *fh, struct nfs_fattr *fattr, + struct nfs4_label *label); extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh, struct nfs_fattr *fattr, struct nfs4_label *label); extern int nfs_may_open(struct inode *inode, const struct cred *cred, int openflags); -- cgit