From bd5ae9288d6451bd346a1b4a59d4fe7e62ba29b7 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 3 Feb 2021 11:42:13 -0500 Subject: nfsd: register pernet ops last, unregister first These pernet operations may depend on stuff set up or torn down in the module init/exit functions. And they may be called at any time in between. So it makes more sense for them to be the last to be registered in the init function, and the first to be unregistered in the exit function. In particular, without this, the drc slab is being destroyed before all the per-net drcs are shut down, resulting in an "Objects remaining in nfsd_drc on __kmem_cache_shutdown()" warning in exit_nfsd. Reported-by: Zhi Li Fixes: 3ba75830ce17 "nfsd4: drc containerization" Signed-off-by: J. Bruce Fields Signed-off-by: Chuck Lever --- fs/nfsd/nfsctl.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index f6d5d783f4a4..0759e589ab52 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1522,12 +1522,9 @@ static int __init init_nfsd(void) int retval; printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n"); - retval = register_pernet_subsys(&nfsd_net_ops); - if (retval < 0) - return retval; retval = register_cld_notifier(); if (retval) - goto out_unregister_pernet; + return retval; retval = nfsd4_init_slabs(); if (retval) goto out_unregister_notifier; @@ -1544,9 +1541,14 @@ static int __init init_nfsd(void) goto out_free_lockd; retval = register_filesystem(&nfsd_fs_type); if (retval) + goto out_free_exports; + retval = register_pernet_subsys(&nfsd_net_ops); + if (retval < 0) goto out_free_all; return 0; out_free_all: + unregister_pernet_subsys(&nfsd_net_ops); +out_free_exports: remove_proc_entry("fs/nfs/exports", NULL); remove_proc_entry("fs/nfs", NULL); out_free_lockd: @@ -1559,13 +1561,12 @@ out_free_slabs: nfsd4_free_slabs(); out_unregister_notifier: unregister_cld_notifier(); -out_unregister_pernet: - unregister_pernet_subsys(&nfsd_net_ops); return retval; } static void __exit exit_nfsd(void) { + unregister_pernet_subsys(&nfsd_net_ops); nfsd_drc_slab_free(); remove_proc_entry("fs/nfs/exports", NULL); remove_proc_entry("fs/nfs", NULL); @@ -1575,7 +1576,6 @@ static void __exit exit_nfsd(void) nfsd4_exit_pnfs(); unregister_filesystem(&nfsd_fs_type); unregister_cld_notifier(); - unregister_pernet_subsys(&nfsd_net_ops); } MODULE_AUTHOR("Olaf Kirch "); -- cgit From 0ac24c320c4d89a9de6ec802591398b8675c7b3c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 9 Feb 2021 10:48:57 -0500 Subject: svcrdma: Hold private mutex while invoking rdma_accept() RDMA core mutex locking was restructured by commit d114c6feedfe ("RDMA/cma: Add missing locking to rdma_accept()") [Aug 2020]. When lock debugging is enabled, the RPC/RDMA server trips over the new lockdep assertion in rdma_accept() because it doesn't call rdma_accept() from its CM event handler. As a temporary fix, have svc_rdma_accept() take the handler_mutex explicitly. In the meantime, let's consider how to restructure the RPC/RDMA transport to invoke rdma_accept() from the proper context. Calls to svc_rdma_accept() are serialized with calls to svc_rdma_free() by the generic RPC server layer. Suggested-by: Jason Gunthorpe Link: https://lore.kernel.org/linux-rdma/20210209154014.GO4247@nvidia.com/ Fixes: d114c6feedfe ("RDMA/cma: Add missing locking to rdma_accept()") Signed-off-by: Chuck Lever --- net/sunrpc/xprtrdma/svc_rdma_transport.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index afba4e9d5425..c895f80df659 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -475,9 +475,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) if (!svc_rdma_post_recvs(newxprt)) goto errout; - /* Swap out the handler */ - newxprt->sc_cm_id->event_handler = svc_rdma_cma_handler; - /* Construct RDMA-CM private message */ pmsg.cp_magic = rpcrdma_cmp_magic; pmsg.cp_version = RPCRDMA_CMP_VERSION; @@ -498,7 +495,10 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) } conn_param.private_data = &pmsg; conn_param.private_data_len = sizeof(pmsg); + rdma_lock_handler(newxprt->sc_cm_id); + newxprt->sc_cm_id->event_handler = svc_rdma_cma_handler; ret = rdma_accept(newxprt->sc_cm_id, &conn_param); + rdma_unlock_handler(newxprt->sc_cm_id); if (ret) { trace_svcrdma_accept_err(newxprt, ret); goto errout; -- cgit From e0a912e8ddbaa0536352dd8318845cdfdbab7bab Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 16 Feb 2021 12:17:22 -0500 Subject: SUNRPC: Use TCP_CORK to optimise send performance on the server Use a counter to keep track of how many requests are queued behind the xprt->xpt_mutex, and keep TCP_CORK set until the queue is empty. Signed-off-by: Trond Myklebust Link: https://lore.kernel.org/linux-nfs/20210213202532.23146-1-trondmy@kernel.org/T/#u Signed-off-by: Chuck Lever --- include/linux/sunrpc/svcsock.h | 2 ++ net/sunrpc/svcsock.c | 8 +++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index b7ac7fe68306..bcc555c7ae9c 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -35,6 +35,8 @@ struct svc_sock { /* Total length of the data (not including fragment headers) * received so far in the fragments making up this rpc: */ u32 sk_datalen; + /* Number of queued send requests */ + atomic_t sk_sendqlen; struct page * sk_pages[RPCSVC_MAXPAGES]; /* received data */ }; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 5a809c64dc7b..231f510a4830 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1171,18 +1171,23 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp) svc_tcp_release_rqst(rqstp); + atomic_inc(&svsk->sk_sendqlen); mutex_lock(&xprt->xpt_mutex); if (svc_xprt_is_dead(xprt)) goto out_notconn; + tcp_sock_set_cork(svsk->sk_sk, true); err = svc_tcp_sendmsg(svsk->sk_sock, &msg, xdr, marker, &sent); xdr_free_bvec(xdr); trace_svcsock_tcp_send(xprt, err < 0 ? err : sent); if (err < 0 || sent != (xdr->len + sizeof(marker))) goto out_close; + if (atomic_dec_and_test(&svsk->sk_sendqlen)) + tcp_sock_set_cork(svsk->sk_sk, false); mutex_unlock(&xprt->xpt_mutex); return sent; out_notconn: + atomic_dec(&svsk->sk_sendqlen); mutex_unlock(&xprt->xpt_mutex); return -ENOTCONN; out_close: @@ -1192,6 +1197,7 @@ out_close: (err < 0) ? err : sent, xdr->len); set_bit(XPT_CLOSE, &xprt->xpt_flags); svc_xprt_enqueue(xprt); + atomic_dec(&svsk->sk_sendqlen); mutex_unlock(&xprt->xpt_mutex); return -EAGAIN; } @@ -1261,7 +1267,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) svsk->sk_datalen = 0; memset(&svsk->sk_pages[0], 0, sizeof(svsk->sk_pages)); - tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; + tcp_sock_set_nodelay(sk); set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); switch (sk->sk_state) { -- cgit From 987c7b1d094db339e99d121e39011bdf3d32c5b8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 16 Feb 2021 12:17:23 -0500 Subject: SUNRPC: Remove redundant socket flags from svc_tcp_sendmsg() Now that the caller controls the TCP_CORK socket option, it is redundant to set MSG_MORE and MSG_SENDPAGE_NOTLAST in the calls to kernel_sendpage(). Signed-off-by: Trond Myklebust Signed-off-by: Chuck Lever --- net/sunrpc/svcsock.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 231f510a4830..8c19732e425d 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1088,12 +1088,11 @@ static int svc_tcp_sendmsg(struct socket *sock, struct msghdr *msg, .iov_base = &marker, .iov_len = sizeof(marker), }; - int flags, ret; + int ret; *sentp = 0; xdr_alloc_bvec(xdr, GFP_KERNEL); - msg->msg_flags = MSG_MORE; ret = kernel_sendmsg(sock, msg, &rm, 1, rm.iov_len); if (ret < 0) return ret; @@ -1101,8 +1100,7 @@ static int svc_tcp_sendmsg(struct socket *sock, struct msghdr *msg, if (ret != rm.iov_len) return -EAGAIN; - flags = head->iov_len < xdr->len ? MSG_MORE | MSG_SENDPAGE_NOTLAST : 0; - ret = svc_tcp_send_kvec(sock, head, flags); + ret = svc_tcp_send_kvec(sock, head, 0); if (ret < 0) return ret; *sentp += ret; @@ -1116,15 +1114,11 @@ static int svc_tcp_sendmsg(struct socket *sock, struct msghdr *msg, bvec = xdr->bvec + (xdr->page_base >> PAGE_SHIFT); offset = offset_in_page(xdr->page_base); remaining = xdr->page_len; - flags = MSG_MORE | MSG_SENDPAGE_NOTLAST; while (remaining > 0) { - if (remaining <= PAGE_SIZE && tail->iov_len == 0) - flags = 0; - len = min(remaining, bvec->bv_len - offset); ret = kernel_sendpage(sock, bvec->bv_page, bvec->bv_offset + offset, - len, flags); + len, 0); if (ret < 0) return ret; *sentp += ret; -- cgit From 4d12b727538609d7936fc509c032e0a52683367f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 16 Feb 2021 12:37:40 -0500 Subject: SUNRPC: Further clean up svc_tcp_sendmsg() Clean up: The msghdr is no longer needed in the caller. Signed-off-by: Chuck Lever --- net/sunrpc/svcsock.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 8c19732e425d..2e2f007dfc9f 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1078,9 +1078,8 @@ static int svc_tcp_send_kvec(struct socket *sock, const struct kvec *vec, * In addition, the logic assumes that * .bv_len is never larger * than PAGE_SIZE. */ -static int svc_tcp_sendmsg(struct socket *sock, struct msghdr *msg, - struct xdr_buf *xdr, rpc_fraghdr marker, - unsigned int *sentp) +static int svc_tcp_sendmsg(struct socket *sock, struct xdr_buf *xdr, + rpc_fraghdr marker, unsigned int *sentp) { const struct kvec *head = xdr->head; const struct kvec *tail = xdr->tail; @@ -1088,12 +1087,15 @@ static int svc_tcp_sendmsg(struct socket *sock, struct msghdr *msg, .iov_base = &marker, .iov_len = sizeof(marker), }; + struct msghdr msg = { + .msg_flags = 0, + }; int ret; *sentp = 0; xdr_alloc_bvec(xdr, GFP_KERNEL); - ret = kernel_sendmsg(sock, msg, &rm, 1, rm.iov_len); + ret = kernel_sendmsg(sock, &msg, &rm, 1, rm.iov_len); if (ret < 0) return ret; *sentp += ret; @@ -1157,9 +1159,6 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp) struct xdr_buf *xdr = &rqstp->rq_res; rpc_fraghdr marker = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | (u32)xdr->len); - struct msghdr msg = { - .msg_flags = 0, - }; unsigned int sent; int err; @@ -1170,7 +1169,7 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp) if (svc_xprt_is_dead(xprt)) goto out_notconn; tcp_sock_set_cork(svsk->sk_sk, true); - err = svc_tcp_sendmsg(svsk->sk_sock, &msg, xdr, marker, &sent); + err = svc_tcp_sendmsg(svsk->sk_sock, xdr, marker, &sent); xdr_free_bvec(xdr); trace_svcsock_tcp_send(xprt, err < 0 ? err : sent); if (err < 0 || sent != (xdr->len + sizeof(marker))) -- cgit