From bdb2ce82818577ba6e57b7d68b698b8d17329281 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 19 Apr 2020 20:03:05 -0400 Subject: xprtrdma: Fix trace point use-after-free race It's not safe to use resources pointed to by the @send_wr of ib_post_send() _after_ that function returns. Those resources are typically freed by the Send completion handler, which can run before ib_post_send() returns. Thus the trace points currently around ib_post_send() in the client's RPC/RDMA transport are a hazard, even when they are disabled. Rearrange them so that they touch the Work Request only _before_ ib_post_send() is invoked. Fixes: ab03eff58eb5 ("xprtrdma: Add trace points in RPC Call transmit paths") Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 051f26fedc4d..72f043876019 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -692,11 +692,10 @@ TRACE_EVENT(xprtrdma_prepsend_failed, TRACE_EVENT(xprtrdma_post_send, TP_PROTO( - const struct rpcrdma_req *req, - int status + const struct rpcrdma_req *req ), - TP_ARGS(req, status), + TP_ARGS(req), TP_STRUCT__entry( __field(const void *, req) @@ -705,7 +704,6 @@ TRACE_EVENT(xprtrdma_post_send, __field(unsigned int, client_id) __field(int, num_sge) __field(int, signaled) - __field(int, status) ), TP_fast_assign( @@ -718,15 +716,13 @@ TRACE_EVENT(xprtrdma_post_send, __entry->sc = req->rl_sendctx; __entry->num_sge = req->rl_wr.num_sge; __entry->signaled = req->rl_wr.send_flags & IB_SEND_SIGNALED; - __entry->status = status; ), - TP_printk("task:%u@%u req=%p sc=%p (%d SGE%s) %sstatus=%d", + TP_printk("task:%u@%u req=%p sc=%p (%d SGE%s) %s", __entry->task_id, __entry->client_id, __entry->req, __entry->sc, __entry->num_sge, (__entry->num_sge == 1 ? "" : "s"), - (__entry->signaled ? "signaled " : ""), - __entry->status + (__entry->signaled ? "signaled" : "") ) ); -- cgit From 7c4310ff56422ea43418305d22bbc5fe19150ec4 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 3 Apr 2020 14:33:41 +1100 Subject: SUNRPC: defer slow parts of rpc_free_client() to a workqueue. The rpciod workqueue is on the write-out path for freeing dirty memory, so it is important that it never block waiting for memory to be allocated - this can lead to a deadlock. rpc_execute() - which is often called by an rpciod work item - calls rcp_task_release_client() which can lead to rpc_free_client(). rpc_free_client() makes two calls which could potentially block wating for memory allocation. rpc_clnt_debugfs_unregister() calls into debugfs and will block while any of the debugfs files are being accessed. In particular it can block while any of the 'open' methods are being called and all of these use malloc for one thing or another. So this can deadlock if the memory allocation waits for NFS to complete some writes via rpciod. rpc_clnt_remove_pipedir() can take the inode_lock() and while it isn't obvious that memory allocations can happen while the lock it held, it is safer to assume they might and to not let rpciod call rpc_clnt_remove_pipedir(). So this patch moves these two calls (together with the final kfree() and rpciod_down()) into a work-item to be run from the system work-queue. rpciod can continue its important work, and the final stages of the free can happen whenever they happen. I have seen this deadlock on a 4.12 based kernel where debugfs used synchronize_srcu() when removing objects. synchronize_srcu() requires a workqueue and there were no free workther threads and none could be allocated. While debugsfs no longer uses SRCU, I believe the deadlock is still possible. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index ca7e108248e2..7bd124e06b36 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -71,7 +71,13 @@ struct rpc_clnt { #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) struct dentry *cl_debugfs; /* debugfs directory */ #endif - struct rpc_xprt_iter cl_xpi; + /* cl_work is only needed after cl_xpi is no longer used, + * and that are of similar size + */ + union { + struct rpc_xprt_iter cl_xpi; + struct work_struct cl_work; + }; const struct cred *cl_cred; }; -- cgit From dff58530c4ca8ce7ee5a74db431c6e35362cf682 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Fri, 24 Apr 2020 17:45:50 -0400 Subject: NFSv4.1: fix handling of backchannel binding in BIND_CONN_TO_SESSION Currently, if the client sends BIND_CONN_TO_SESSION with NFS4_CDFC4_FORE_OR_BOTH but only gets NFS4_CDFS4_FORE back it ignores that it wasn't able to enable a backchannel. To make sure, the client sends BIND_CONN_TO_SESSION as the first operation on the connections (ie., no other session compounds haven't been sent before), and if the client's request to bind the backchannel is not satisfied, then reset the connection and retry. Cc: stable@vger.kernel.org Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 2 ++ include/linux/sunrpc/clnt.h | 5 +++++ 2 files changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 440230488025..e5f3e7d8d3d5 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1317,11 +1317,13 @@ struct nfs41_impl_id { struct nfstime4 date; }; +#define MAX_BIND_CONN_TO_SESSION_RETRIES 3 struct nfs41_bind_conn_to_session_args { struct nfs_client *client; struct nfs4_sessionid sessionid; u32 dir; bool use_conn_in_rdma_mode; + int retries; }; struct nfs41_bind_conn_to_session_res { diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 7bd124e06b36..02e7a5863d28 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -242,4 +242,9 @@ static inline int rpc_reply_expected(struct rpc_task *task) (task->tk_msg.rpc_proc->p_decode != NULL); } +static inline void rpc_task_close_connection(struct rpc_task *task) +{ + if (task->tk_xprt) + xprt_force_disconnect(task->tk_xprt); +} #endif /* _LINUX_SUNRPC_CLNT_H */ -- cgit