From ca4faf543a33373bed3650812d5f0cd0bd295b1a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 2 May 2020 10:37:44 -0400 Subject: SUNRPC: Move xpt_mutex into socket xpo_sendto methods It appears that the RPC/RDMA transport does not need serialization of calls to its xpo_sendto method. Move the mutex into the socket methods that still need that serialization. Tail latencies are unambiguously better with this patch applied. fio randrw 8KB 70/30 on NFSv3, smaller numbers are better: clat percentiles (usec): With xpt_mutex: r | 99.99th=[ 8848] w | 99.99th=[ 9634] Without xpt_mutex: r | 99.99th=[ 8586] w | 99.99th=[ 8979] Serializing the construction of RPC/RDMA transport headers is not really necessary at this point, because the Linux NFS server implementation never changes its credit grant on a connection. If that should change, then svc_rdma_sendto will need to serialize access to the transport's credit grant fields. Reported-by: kbuild test robot [ cel: fix uninitialized variable warning ] Signed-off-by: Chuck Lever --- net/sunrpc/svcsock.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'net/sunrpc/svcsock.c') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 023514e392b3..3e7b6445e317 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -506,6 +506,9 @@ out_free: * svc_udp_sendto - Send out a reply on a UDP socket * @rqstp: completed svc_rqst * + * xpt_mutex ensures @rqstp's whole message is written to the socket + * without interruption. + * * Returns the number of bytes sent, or a negative errno. */ static int svc_udp_sendto(struct svc_rqst *rqstp) @@ -531,6 +534,11 @@ static int svc_udp_sendto(struct svc_rqst *rqstp) svc_set_cmsg_data(rqstp, cmh); + mutex_lock(&xprt->xpt_mutex); + + if (svc_xprt_is_dead(xprt)) + goto out_notconn; + err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent); xdr_free_bvec(xdr); if (err == -ECONNREFUSED) { @@ -538,9 +546,15 @@ static int svc_udp_sendto(struct svc_rqst *rqstp) err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent); xdr_free_bvec(xdr); } + + mutex_unlock(&xprt->xpt_mutex); if (err < 0) return err; return sent; + +out_notconn: + mutex_unlock(&xprt->xpt_mutex); + return -ENOTCONN; } static int svc_udp_has_wspace(struct svc_xprt *xprt) @@ -1063,6 +1077,9 @@ err_noclose: * svc_tcp_sendto - Send out a reply on a TCP socket * @rqstp: completed svc_rqst * + * xpt_mutex ensures @rqstp's whole message is written to the socket + * without interruption. + * * Returns the number of bytes sent, or a negative errno. */ static int svc_tcp_sendto(struct svc_rqst *rqstp) @@ -1080,12 +1097,19 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp) svc_release_skb(rqstp); + mutex_lock(&xprt->xpt_mutex); + if (svc_xprt_is_dead(xprt)) + goto out_notconn; err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, marker, &sent); xdr_free_bvec(xdr); if (err < 0 || sent != (xdr->len + sizeof(marker))) goto out_close; + mutex_unlock(&xprt->xpt_mutex); return sent; +out_notconn: + mutex_unlock(&xprt->xpt_mutex); + return -ENOTCONN; out_close: pr_notice("rpc-srv/tcp: %s: %s %d when sending %d bytes - shutting down socket\n", xprt->xpt_server->sv_name, @@ -1093,6 +1117,7 @@ out_close: (err < 0) ? err : sent, xdr->len); set_bit(XPT_CLOSE, &xprt->xpt_flags); svc_xprt_enqueue(xprt); + mutex_unlock(&xprt->xpt_mutex); return -EAGAIN; } -- cgit From 11bbb0f76e995cb617f582e7a4ec6cb8f6daf910 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 17 Mar 2020 17:41:43 -0400 Subject: SUNRPC: Trace a few more generic svc_xprt events In lieu of dprintks or tracepoints in each individual transport implementation, introduce tracepoints in the generic part of the RPC layer. These typically fire for connection lifetime events, so shouldn't contribute a lot of noise. Signed-off-by: Chuck Lever --- net/sunrpc/svcsock.c | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'net/sunrpc/svcsock.c') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 3e7b6445e317..cf4bd198c19d 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -727,7 +727,6 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt) int err, slen; RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); - dprintk("svc: tcp_accept %p sock %p\n", svsk, sock); if (!sock) return NULL; @@ -1363,11 +1362,6 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv, int newlen; int family; int val; - RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); - - dprintk("svc: svc_create_socket(%s, %d, %s)\n", - serv->sv_program->pg_name, protocol, - __svc_print_addr(sin, buf, sizeof(buf))); if (protocol != IPPROTO_UDP && protocol != IPPROTO_TCP) { printk(KERN_WARNING "svc: only UDP and TCP " @@ -1427,7 +1421,6 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv, svc_xprt_set_local(&svsk->sk_xprt, newsin, newlen); return (struct svc_xprt *)svsk; bummer: - dprintk("svc: svc_create_socket error = %d\n", -error); sock_release(sock); return ERR_PTR(error); } @@ -1441,8 +1434,6 @@ static void svc_sock_detach(struct svc_xprt *xprt) struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); struct sock *sk = svsk->sk_sk; - dprintk("svc: svc_sock_detach(%p)\n", svsk); - /* put back the old socket callbacks */ lock_sock(sk); sk->sk_state_change = svsk->sk_ostate; @@ -1459,8 +1450,6 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt) { struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); - dprintk("svc: svc_tcp_sock_detach(%p)\n", svsk); - svc_sock_detach(xprt); if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) { @@ -1475,7 +1464,6 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt) static void svc_sock_free(struct svc_xprt *xprt) { struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); - dprintk("svc: svc_sock_free(%p)\n", svsk); if (svsk->sk_sock->file) sockfd_put(svsk->sk_sock); -- cgit From d998882b4b1b08e72f4414d8eb6647af72bffe2f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 1 May 2020 10:40:47 -0400 Subject: SUNRPC: Remove "#include " Clean up: Commit 850cbaddb52d ("udp: use it's own memory accounting schema") removed the last skb-related tracepoint from svcsock.c, so it is no longer necessary to include trace/events/skb.h. Signed-off-by: Chuck Lever --- net/sunrpc/svcsock.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net/sunrpc/svcsock.c') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index cf4bd198c19d..1c4d0aacc531 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -45,7 +45,6 @@ #include #include #include -#include #include #include -- cgit From 998024dee197944a7018a0bdc85b83b569ddec22 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 17 Mar 2020 15:06:31 -0400 Subject: SUNRPC: Add more svcsock tracepoints In addition to tracing recently-updated socket sendto events, this commit adds a trace event class that can be used for additional svcsock-related tracepoints in subsequent patches. Signed-off-by: Chuck Lever --- net/sunrpc/svcsock.c | 30 ++++++++++-------------------- 1 file changed, 10 insertions(+), 20 deletions(-) (limited to 'net/sunrpc/svcsock.c') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 1c4d0aacc531..758b835ad4ce 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -54,6 +54,8 @@ #include #include +#include + #include "socklib.h" #include "sunrpc.h" @@ -281,13 +283,10 @@ static void svc_data_ready(struct sock *sk) struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; if (svsk) { - dprintk("svc: socket %p(inet %p), busy=%d\n", - svsk, sk, - test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags)); - /* Refer to svc_setup_socket() for details. */ rmb(); svsk->sk_odata(sk); + trace_svcsock_data_ready(&svsk->sk_xprt, 0); if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags)) svc_xprt_enqueue(&svsk->sk_xprt); } @@ -301,11 +300,9 @@ static void svc_write_space(struct sock *sk) struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data); if (svsk) { - dprintk("svc: socket %p(inet %p), write_space busy=%d\n", - svsk, sk, test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags)); - /* Refer to svc_setup_socket() for details. */ rmb(); + trace_svcsock_write_space(&svsk->sk_xprt, 0); svsk->sk_owspace(sk); svc_xprt_enqueue(&svsk->sk_xprt); } @@ -545,6 +542,7 @@ static int svc_udp_sendto(struct svc_rqst *rqstp) err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent); xdr_free_bvec(xdr); } + trace_svcsock_udp_send(xprt, err); mutex_unlock(&xprt->xpt_mutex); if (err < 0) @@ -616,7 +614,7 @@ static struct svc_xprt_class svc_udp_class = { static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv) { - int err, level, optname, one = 1; + int level, optname, one = 1; svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_udp_class, &svsk->sk_xprt, serv); @@ -647,9 +645,8 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv) default: BUG(); } - err = kernel_setsockopt(svsk->sk_sock, level, optname, - (char *)&one, sizeof(one)); - dprintk("svc: kernel_setsockopt returned %d\n", err); + kernel_setsockopt(svsk->sk_sock, level, optname, (char *)&one, + sizeof(one)); } /* @@ -1100,6 +1097,7 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp) goto out_notconn; err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, marker, &sent); xdr_free_bvec(xdr); + trace_svcsock_tcp_send(xprt, err < 0 ? err : sent); if (err < 0 || sent != (xdr->len + sizeof(marker))) goto out_close; mutex_unlock(&xprt->xpt_mutex); @@ -1170,13 +1168,11 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) set_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags); set_bit(XPT_CONG_CTRL, &svsk->sk_xprt.xpt_flags); if (sk->sk_state == TCP_LISTEN) { - dprintk("setting up TCP socket for listening\n"); strcpy(svsk->sk_xprt.xpt_remotebuf, "listener"); set_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags); sk->sk_data_ready = svc_tcp_listen_data_ready; set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); } else { - dprintk("setting up TCP socket for reading\n"); sk->sk_state_change = svc_tcp_state_change; sk->sk_data_ready = svc_data_ready; sk->sk_write_space = svc_write_space; @@ -1226,7 +1222,6 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, int pmap_register = !(flags & SVC_SOCK_ANONYMOUS); int err = 0; - dprintk("svc: svc_setup_socket %p\n", sock); svsk = kzalloc(sizeof(*svsk), GFP_KERNEL); if (!svsk) return ERR_PTR(-ENOMEM); @@ -1263,12 +1258,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, else svc_tcp_init(svsk, serv); - dprintk("svc: svc_setup_socket created %p (inet %p), " - "listen %d close %d\n", - svsk, svsk->sk_sk, - test_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags), - test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags)); - + trace_svcsock_new_socket(sock); return svsk; } -- cgit From a0469f46faab786e8ec9f8c8526a185357b38772 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 18 Mar 2020 11:20:50 -0400 Subject: SUNRPC: Replace dprintk call sites in TCP state change callouts Report TCP socket state changes and accept failures via tracepoints, replacing dprintk() call sites. No tracepoint is added in svc_tcp_listen_data_ready. There's no information available there that isn't also reported by the svcsock_new_socket and the accept failure tracepoints. Signed-off-by: Chuck Lever --- net/sunrpc/svcsock.c | 35 ++++++----------------------------- 1 file changed, 6 insertions(+), 29 deletions(-) (limited to 'net/sunrpc/svcsock.c') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 758b835ad4ce..4ac1180c6306 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -657,9 +657,6 @@ static void svc_tcp_listen_data_ready(struct sock *sk) { struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; - dprintk("svc: socket %p TCP (listen) state change %d\n", - sk, sk->sk_state); - if (svsk) { /* Refer to svc_setup_socket() for details. */ rmb(); @@ -680,8 +677,7 @@ static void svc_tcp_listen_data_ready(struct sock *sk) if (svsk) { set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); svc_xprt_enqueue(&svsk->sk_xprt); - } else - printk("svc: socket %p: no user data\n", sk); + } } } @@ -692,15 +688,11 @@ static void svc_tcp_state_change(struct sock *sk) { struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; - dprintk("svc: socket %p TCP (connected) state change %d (svsk %p)\n", - sk, sk->sk_state, sk->sk_user_data); - - if (!svsk) - printk("svc: socket %p: no user data\n", sk); - else { + if (svsk) { /* Refer to svc_setup_socket() for details. */ rmb(); svsk->sk_ostate(sk); + trace_svcsock_tcp_state(&svsk->sk_xprt, svsk->sk_sock); if (sk->sk_state != TCP_ESTABLISHED) { set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); svc_xprt_enqueue(&svsk->sk_xprt); @@ -721,7 +713,6 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt) struct socket *newsock; struct svc_sock *newsvsk; int err, slen; - RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); if (!sock) return NULL; @@ -735,30 +726,18 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt) else if (err != -EAGAIN) net_warn_ratelimited("%s: accept failed (err %d)!\n", serv->sv_name, -err); + trace_svcsock_accept_err(xprt, serv->sv_name, err); return NULL; } set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); err = kernel_getpeername(newsock, sin); if (err < 0) { - net_warn_ratelimited("%s: peername failed (err %d)!\n", - serv->sv_name, -err); + trace_svcsock_getpeername_err(xprt, serv->sv_name, err); goto failed; /* aborted connection or whatever */ } slen = err; - /* Ideally, we would want to reject connections from unauthorized - * hosts here, but when we get encryption, the IP of the host won't - * tell us anything. For now just warn about unpriv connections. - */ - if (!svc_port_is_privileged(sin)) { - dprintk("%s: connect from unprivileged port: %s\n", - serv->sv_name, - __svc_print_addr(sin, buf, sizeof(buf))); - } - dprintk("%s: connect from %s\n", serv->sv_name, - __svc_print_addr(sin, buf, sizeof(buf))); - /* Reset the inherited callbacks before calling svc_setup_socket */ newsock->sk->sk_state_change = svsk->sk_ostate; newsock->sk->sk_data_ready = svsk->sk_odata; @@ -776,10 +755,8 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt) svc_xprt_set_remote(&newsvsk->sk_xprt, sin, slen); err = kernel_getsockname(newsock, sin); slen = err; - if (unlikely(err < 0)) { - dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err); + if (unlikely(err < 0)) slen = offsetof(struct sockaddr, sa_data); - } svc_xprt_set_local(&newsvsk->sk_xprt, sin, slen); if (sock_is_loopback(newsock->sk)) -- cgit From 02648908d19a99532b0839959e38ae53d95d2798 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 17 Mar 2020 14:12:15 -0400 Subject: SUNRPC: Rename svc_sock::sk_reclen Clean up. I find the name of the svc_sock::sk_reclen field confusing, so I've changed it to better reflect its function. This field is not read directly to get the record length. Rather, it is a buffer containing a record marker that needs to be decoded. Signed-off-by: Chuck Lever --- net/sunrpc/svcsock.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/sunrpc/svcsock.c') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 4ac1180c6306..d63b21f3f207 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -841,7 +841,7 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) struct kvec iov; want = sizeof(rpc_fraghdr) - svsk->sk_tcplen; - iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen; + iov.iov_base = ((char *)&svsk->sk_marker) + svsk->sk_tcplen; iov.iov_len = want; len = svc_recvfrom(rqstp, &iov, 1, want, 0); if (len < 0) @@ -938,7 +938,7 @@ static void svc_tcp_fragment_received(struct svc_sock *svsk) svc_sock_final_rec(svsk) ? "final" : "nonfinal", svc_sock_reclen(svsk)); svsk->sk_tcplen = 0; - svsk->sk_reclen = 0; + svsk->sk_marker = xdr_zero; } /* @@ -1154,7 +1154,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) sk->sk_data_ready = svc_data_ready; sk->sk_write_space = svc_write_space; - svsk->sk_reclen = 0; + svsk->sk_marker = xdr_zero; svsk->sk_tcplen = 0; svsk->sk_datalen = 0; memset(&svsk->sk_pages[0], 0, sizeof(svsk->sk_pages)); -- cgit From a5cda73e49aaaac58b25750f4b591c0dc9726a44 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 16 Mar 2020 14:53:04 -0400 Subject: SUNRPC: Restructure svc_tcp_recv_record() Refactor: svc_recvfrom() is going to be converted to read into bio_vecs in a moment. Unhook the only other caller, svc_tcp_recv_record(), which just wants to read the 4-byte stream record marker into a kvec. While we're in the area, streamline this helper by straight-lining the hot path, replace dprintk call sites with tracepoints, and reduce the number of atomic bit operations in this path. Signed-off-by: Chuck Lever --- net/sunrpc/svcsock.c | 51 +++++++++++++++++++++++++-------------------------- 1 file changed, 25 insertions(+), 26 deletions(-) (limited to 'net/sunrpc/svcsock.c') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index d63b21f3f207..9c1eb13aa9b8 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -828,47 +828,45 @@ out: } /* - * Receive fragment record header. - * If we haven't gotten the record length yet, get the next four bytes. + * Receive fragment record header into sk_marker. */ -static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) +static ssize_t svc_tcp_read_marker(struct svc_sock *svsk, + struct svc_rqst *rqstp) { - struct svc_serv *serv = svsk->sk_xprt.xpt_server; - unsigned int want; - int len; + ssize_t want, len; + /* If we haven't gotten the record length yet, + * get the next four bytes. + */ if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) { + struct msghdr msg = { NULL }; struct kvec iov; want = sizeof(rpc_fraghdr) - svsk->sk_tcplen; iov.iov_base = ((char *)&svsk->sk_marker) + svsk->sk_tcplen; iov.iov_len = want; - len = svc_recvfrom(rqstp, &iov, 1, want, 0); + iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, want); + len = sock_recvmsg(svsk->sk_sock, &msg, MSG_DONTWAIT); if (len < 0) - goto error; + return len; svsk->sk_tcplen += len; - if (len < want) { - dprintk("svc: short recvfrom while reading record " - "length (%d of %d)\n", len, want); - return -EAGAIN; + /* call again to read the remaining bytes */ + goto err_short; } - - dprintk("svc: TCP record, %d bytes\n", svc_sock_reclen(svsk)); + trace_svcsock_marker(&svsk->sk_xprt, svsk->sk_marker); if (svc_sock_reclen(svsk) + svsk->sk_datalen > - serv->sv_max_mesg) { - net_notice_ratelimited("RPC: fragment too large: %d\n", - svc_sock_reclen(svsk)); - goto err_delete; - } + svsk->sk_xprt.xpt_server->sv_max_mesg) + goto err_too_large; } - return svc_sock_reclen(svsk); -error: - dprintk("RPC: TCP recv_record got %d\n", len); - return len; -err_delete: + +err_too_large: + net_notice_ratelimited("svc: %s %s RPC fragment too large: %d\n", + __func__, svsk->sk_xprt.xpt_server->sv_name, + svc_sock_reclen(svsk)); set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); +err_short: return -EAGAIN; } @@ -961,12 +959,13 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags), test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags)); - len = svc_tcp_recv_record(svsk, rqstp); + clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); + len = svc_tcp_read_marker(svsk, rqstp); if (len < 0) goto error; base = svc_tcp_restore_pages(svsk, rqstp); - want = svc_sock_reclen(svsk) - (svsk->sk_tcplen - sizeof(rpc_fraghdr)); + want = len - (svsk->sk_tcplen - sizeof(rpc_fraghdr)); vec = rqstp->rq_vec; -- cgit From 7dae1dd726aac7871d9cc56ed9d13fa09c0212f9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 20 May 2020 13:41:02 -0400 Subject: SUNRPC: Replace dprintk() call sites in TCP receive path Signed-off-by: Chuck Lever --- net/sunrpc/svcsock.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) (limited to 'net/sunrpc/svcsock.c') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 9c1eb13aa9b8..8cf06b676831 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -119,9 +119,8 @@ static void svc_release_skb(struct svc_rqst *rqstp) if (skb) { struct svc_sock *svsk = container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); - rqstp->rq_xprt_ctxt = NULL; - dprintk("svc: service %p, releasing skb %p\n", rqstp, skb); + rqstp->rq_xprt_ctxt = NULL; skb_free_datagram_locked(svsk->sk_sk, skb); } } @@ -132,8 +131,6 @@ static void svc_release_udp_skb(struct svc_rqst *rqstp) if (skb) { rqstp->rq_xprt_ctxt = NULL; - - dprintk("svc: service %p, releasing skb %p\n", rqstp, skb); consume_skb(skb); } } @@ -245,8 +242,6 @@ static ssize_t svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, if (len == buflen) set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); - dprintk("svc: socket %p recvfrom(%p, %zu) = %zd\n", - svsk, iov[0].iov_base, iov[0].iov_len, len); return len; } @@ -932,9 +927,6 @@ static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len) static void svc_tcp_fragment_received(struct svc_sock *svsk) { /* If we have more data, signal svc_xprt_enqueue() to try again */ - dprintk("svc: TCP %s record (%d bytes)\n", - svc_sock_final_rec(svsk) ? "final" : "nonfinal", - svc_sock_reclen(svsk)); svsk->sk_tcplen = 0; svsk->sk_marker = xdr_zero; } @@ -954,11 +946,6 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) __be32 calldir; int pnum; - dprintk("svc: tcp_recv %p data %d conn %d close %d\n", - svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags), - test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags), - test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags)); - clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); len = svc_tcp_read_marker(svsk, rqstp); if (len < 0) @@ -977,6 +964,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) /* Now receive data */ len = svc_recvfrom(rqstp, vec, pnum, base + want, base); if (len >= 0) { + trace_svcsock_tcp_recv(&svsk->sk_xprt, len); svsk->sk_tcplen += len; svsk->sk_datalen += len; } -- cgit From 6be8c5949149ff45c86dd9e49dfab920078bfcd5 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 20 May 2020 12:29:13 -0400 Subject: SUNRPC: Refactor recvfrom path dealing with incomplete TCP receives Clean up: move exception processing out of the main path. Signed-off-by: Chuck Lever --- net/sunrpc/svcsock.c | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) (limited to 'net/sunrpc/svcsock.c') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 8cf06b676831..087e21b0f1bb 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -968,23 +968,10 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) svsk->sk_tcplen += len; svsk->sk_datalen += len; } - if (len != want || !svc_sock_final_rec(svsk)) { - svc_tcp_save_pages(svsk, rqstp); - if (len < 0 && len != -EAGAIN) - goto err_delete; - if (len == want) - svc_tcp_fragment_received(svsk); - else - dprintk("svc: incomplete TCP record (%d of %d)\n", - (int)(svsk->sk_tcplen - sizeof(rpc_fraghdr)), - svc_sock_reclen(svsk)); - goto err_noclose; - } - - if (svsk->sk_datalen < 8) { - svsk->sk_datalen = 0; - goto err_delete; /* client is nuts. */ - } + if (len != want || !svc_sock_final_rec(svsk)) + goto err_incomplete; + if (svsk->sk_datalen < 8) + goto err_nuts; rqstp->rq_arg.len = svsk->sk_datalen; rqstp->rq_arg.page_base = 0; @@ -1019,14 +1006,26 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) return rqstp->rq_arg.len; +err_incomplete: + svc_tcp_save_pages(svsk, rqstp); + if (len < 0 && len != -EAGAIN) + goto err_delete; + if (len == want) + svc_tcp_fragment_received(svsk); + else + trace_svcsock_tcp_recv_short(&svsk->sk_xprt, + svc_sock_reclen(svsk), + svsk->sk_tcplen - sizeof(rpc_fraghdr)); + goto err_noclose; error: if (len != -EAGAIN) goto err_delete; - dprintk("RPC: TCP recvfrom got EAGAIN\n"); + trace_svcsock_tcp_recv_eagain(&svsk->sk_xprt, 0); return 0; +err_nuts: + svsk->sk_datalen = 0; err_delete: - printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", - svsk->sk_xprt.xpt_server->sv_name, -len); + trace_svcsock_tcp_recv_err(&svsk->sk_xprt, len); set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); err_noclose: return 0; /* record not complete */ -- cgit From cca557a5a60faaf307bbb76035dd90ec47cf0e0c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 20 May 2020 12:53:05 -0400 Subject: SUNRPC: Clean up svc_release_skb() functions Rename these functions using the convention used for other xpo method entry points. Signed-off-by: Chuck Lever --- net/sunrpc/svcsock.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'net/sunrpc/svcsock.c') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 087e21b0f1bb..5b2981a0711c 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -109,10 +109,12 @@ static void svc_reclassify_socket(struct socket *sock) } #endif -/* - * Release an skbuff after use +/** + * svc_tcp_release_rqst - Release transport-related resources + * @rqstp: request structure with resources to be released + * */ -static void svc_release_skb(struct svc_rqst *rqstp) +static void svc_tcp_release_rqst(struct svc_rqst *rqstp) { struct sk_buff *skb = rqstp->rq_xprt_ctxt; @@ -125,7 +127,12 @@ static void svc_release_skb(struct svc_rqst *rqstp) } } -static void svc_release_udp_skb(struct svc_rqst *rqstp) +/** + * svc_udp_release_rqst - Release transport-related resources + * @rqstp: request structure with resources to be released + * + */ +static void svc_udp_release_rqst(struct svc_rqst *rqstp) { struct sk_buff *skb = rqstp->rq_xprt_ctxt; @@ -521,7 +528,7 @@ static int svc_udp_sendto(struct svc_rqst *rqstp) unsigned int uninitialized_var(sent); int err; - svc_release_udp_skb(rqstp); + svc_udp_release_rqst(rqstp); svc_set_cmsg_data(rqstp, cmh); @@ -590,7 +597,7 @@ static const struct svc_xprt_ops svc_udp_ops = { .xpo_recvfrom = svc_udp_recvfrom, .xpo_sendto = svc_udp_sendto, .xpo_read_payload = svc_sock_read_payload, - .xpo_release_rqst = svc_release_udp_skb, + .xpo_release_rqst = svc_udp_release_rqst, .xpo_detach = svc_sock_detach, .xpo_free = svc_sock_free, .xpo_has_wspace = svc_udp_has_wspace, @@ -1053,7 +1060,7 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp) unsigned int uninitialized_var(sent); int err; - svc_release_skb(rqstp); + svc_tcp_release_rqst(rqstp); mutex_lock(&xprt->xpt_mutex); if (svc_xprt_is_dead(xprt)) @@ -1093,7 +1100,7 @@ static const struct svc_xprt_ops svc_tcp_ops = { .xpo_recvfrom = svc_tcp_recvfrom, .xpo_sendto = svc_tcp_sendto, .xpo_read_payload = svc_sock_read_payload, - .xpo_release_rqst = svc_release_skb, + .xpo_release_rqst = svc_tcp_release_rqst, .xpo_detach = svc_tcp_sock_detach, .xpo_free = svc_sock_free, .xpo_has_wspace = svc_tcp_has_wspace, -- cgit From ca07eda33e01eafa7a26ec06974f7eacee6a89c8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 20 May 2020 17:30:12 -0400 Subject: SUNRPC: Refactor svc_recvfrom() This function is not currently "generic" so remove the documenting comment and rename it appropriately. Its internals are converted to use bio_vecs for reading from the transport socket. In existing typical sunrpc uses of bio_vecs, the bio_vec array is allocated dynamically. Here, instead, an array of bio_vecs is added to svc_rqst. The lifetime of this array can be greater than one call to xpo_recvfrom(): - Multiple calls to xpo_recvfrom() might be needed to read an RPC message completely. - At some later point, rq_arg.bvecs will point to this array and it will carry the received message into svc_process(). I also expect that a future optimization will remove either the rq_vec or rq_pages array in favor of rq_bvec, thus conserving the size of struct svc_rqst. Signed-off-by: Chuck Lever --- net/sunrpc/svcsock.c | 109 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 68 insertions(+), 41 deletions(-) (limited to 'net/sunrpc/svcsock.c') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 5b2981a0711c..3e25511b800e 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -223,26 +223,62 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining) return len; } +#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE +static void svc_flush_bvec(const struct bio_vec *bvec, size_t size, size_t seek) +{ + struct bvec_iter bi = { + .bi_size = size, + }; + struct bio_vec bv; + + bvec_iter_advance(bvec, &bi, seek & PAGE_MASK); + for_each_bvec(bv, bvec, bi, bi) + flush_dcache_page(bv.bv_page); +} +#else +static inline void svc_flush_bvec(const struct bio_vec *bvec, size_t size, + size_t seek) +{ +} +#endif + /* - * Generic recvfrom routine. + * Read from @rqstp's transport socket. The incoming message fills whole + * pages in @rqstp's rq_pages array until the last page of the message + * has been received into a partial page. */ -static ssize_t svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, - unsigned int nr, size_t buflen, unsigned int base) +static ssize_t svc_tcp_read_msg(struct svc_rqst *rqstp, size_t buflen, + size_t seek) { struct svc_sock *svsk = container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); + struct bio_vec *bvec = rqstp->rq_bvec; struct msghdr msg = { NULL }; + unsigned int i; ssize_t len; + size_t t; rqstp->rq_xprt_hlen = 0; clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); - iov_iter_kvec(&msg.msg_iter, READ, iov, nr, buflen); - if (base != 0) { - iov_iter_advance(&msg.msg_iter, base); - buflen -= base; + + for (i = 0, t = 0; t < buflen; i++, t += PAGE_SIZE) { + bvec[i].bv_page = rqstp->rq_pages[i]; + bvec[i].bv_len = PAGE_SIZE; + bvec[i].bv_offset = 0; + } + rqstp->rq_respages = &rqstp->rq_pages[i]; + rqstp->rq_next_page = rqstp->rq_respages + 1; + + iov_iter_bvec(&msg.msg_iter, READ, bvec, i, buflen); + if (seek) { + iov_iter_advance(&msg.msg_iter, seek); + buflen -= seek; } len = sock_recvmsg(svsk->sk_sock, &msg, MSG_DONTWAIT); + if (len > 0) + svc_flush_bvec(bvec, len, seek); + /* If we read a full record, then assume there may be more * data to read (stream based sockets only!) */ @@ -775,13 +811,14 @@ failed: return NULL; } -static unsigned int svc_tcp_restore_pages(struct svc_sock *svsk, struct svc_rqst *rqstp) +static size_t svc_tcp_restore_pages(struct svc_sock *svsk, + struct svc_rqst *rqstp) { - unsigned int i, len, npages; + size_t len = svsk->sk_datalen; + unsigned int i, npages; - if (svsk->sk_datalen == 0) + if (!len) return 0; - len = svsk->sk_datalen; npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; for (i = 0; i < npages; i++) { if (rqstp->rq_pages[i] != NULL) @@ -917,20 +954,6 @@ unlock_eagain: return -EAGAIN; } -static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len) -{ - int i = 0; - int t = 0; - - while (t < len) { - vec[i].iov_base = page_address(pages[i]); - vec[i].iov_len = PAGE_SIZE; - i++; - t += PAGE_SIZE; - } - return i; -} - static void svc_tcp_fragment_received(struct svc_sock *svsk) { /* If we have more data, signal svc_xprt_enqueue() to try again */ @@ -938,20 +961,33 @@ static void svc_tcp_fragment_received(struct svc_sock *svsk) svsk->sk_marker = xdr_zero; } -/* - * Receive data from a TCP socket. +/** + * svc_tcp_recvfrom - Receive data from a TCP socket + * @rqstp: request structure into which to receive an RPC Call + * + * Called in a loop when XPT_DATA has been set. + * + * Read the 4-byte stream record marker, then use the record length + * in that marker to set up exactly the resources needed to receive + * the next RPC message into @rqstp. + * + * Returns: + * On success, the number of bytes in a received RPC Call, or + * %0 if a complete RPC Call message was not ready to return + * + * The zero return case handles partial receives and callback Replies. + * The state of a partial receive is preserved in the svc_sock for + * the next call to svc_tcp_recvfrom. */ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) { struct svc_sock *svsk = container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); struct svc_serv *serv = svsk->sk_xprt.xpt_server; - int len; - struct kvec *vec; - unsigned int want, base; + size_t want, base; + ssize_t len; __be32 *p; __be32 calldir; - int pnum; clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); len = svc_tcp_read_marker(svsk, rqstp); @@ -960,16 +996,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) base = svc_tcp_restore_pages(svsk, rqstp); want = len - (svsk->sk_tcplen - sizeof(rpc_fraghdr)); - - vec = rqstp->rq_vec; - - pnum = copy_pages_to_kvecs(&vec[0], &rqstp->rq_pages[0], base + want); - - rqstp->rq_respages = &rqstp->rq_pages[pnum]; - rqstp->rq_next_page = rqstp->rq_respages + 1; - - /* Now receive data */ - len = svc_recvfrom(rqstp, vec, pnum, base + want, base); + len = svc_tcp_read_msg(rqstp, base + want, base); if (len >= 0) { trace_svcsock_tcp_recv(&svsk->sk_xprt, len); svsk->sk_tcplen += len; -- cgit From fff1ebb269b6c18b21bc0ddab7dd8b0c5e68e0a1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 20 May 2020 17:30:24 -0400 Subject: SUNRPC: Restructure svc_udp_recvfrom() Clean up. At this point, we are not ready yet to support bio_vecs in the UDP transport implementation. However, we can clean up svc_udp_recvfrom() to match the tracing and straight-lining recently changes made in svc_tcp_recvfrom(). Signed-off-by: Chuck Lever --- net/sunrpc/svcsock.c | 60 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 25 deletions(-) (limited to 'net/sunrpc/svcsock.c') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 3e25511b800e..97d2b6f8c791 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -425,8 +425,15 @@ static int svc_udp_get_dest_address(struct svc_rqst *rqstp, return 0; } -/* - * Receive a datagram from a UDP socket. +/** + * svc_udp_recvfrom - Receive a datagram from a UDP socket. + * @rqstp: request structure into which to receive an RPC Call + * + * Called in a loop when XPT_DATA has been set. + * + * Returns: + * On success, the number of bytes in a received RPC Call, or + * %0 if a complete RPC Call message was not ready to return */ static int svc_udp_recvfrom(struct svc_rqst *rqstp) { @@ -460,20 +467,14 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) svc_sock_setbufsize(svsk, serv->sv_nrthreads + 3); clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); - skb = NULL; err = kernel_recvmsg(svsk->sk_sock, &msg, NULL, 0, 0, MSG_PEEK | MSG_DONTWAIT); - if (err >= 0) - skb = skb_recv_udp(svsk->sk_sk, 0, 1, &err); - - if (skb == NULL) { - if (err != -EAGAIN) { - /* possibly an icmp error */ - dprintk("svc: recvfrom returned error %d\n", -err); - set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); - } - return 0; - } + if (err < 0) + goto out_recv_err; + skb = skb_recv_udp(svsk->sk_sk, 0, 1, &err); + if (!skb) + goto out_recv_err; + len = svc_addr_len(svc_addr(rqstp)); rqstp->rq_addrlen = len; if (skb->tstamp == 0) { @@ -484,26 +485,21 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) sock_write_timestamp(svsk->sk_sk, skb->tstamp); set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* there may be more data... */ - len = skb->len; + len = skb->len; rqstp->rq_arg.len = len; + trace_svcsock_udp_recv(&svsk->sk_xprt, len); rqstp->rq_prot = IPPROTO_UDP; - if (!svc_udp_get_dest_address(rqstp, cmh)) { - net_warn_ratelimited("svc: received unknown control message %d/%d; dropping RPC reply datagram\n", - cmh->cmsg_level, cmh->cmsg_type); - goto out_free; - } + if (!svc_udp_get_dest_address(rqstp, cmh)) + goto out_cmsg_err; rqstp->rq_daddrlen = svc_addr_len(svc_daddr(rqstp)); if (skb_is_nonlinear(skb)) { /* we have to copy */ local_bh_disable(); - if (csum_partial_copy_to_xdr(&rqstp->rq_arg, skb)) { - local_bh_enable(); - /* checksum error */ - goto out_free; - } + if (csum_partial_copy_to_xdr(&rqstp->rq_arg, skb)) + goto out_bh_enable; local_bh_enable(); consume_skb(skb); } else { @@ -531,6 +527,20 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) serv->sv_stats->netudpcnt++; return len; + +out_recv_err: + if (err != -EAGAIN) { + /* possibly an icmp error */ + set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); + } + trace_svcsock_udp_recv_err(&svsk->sk_xprt, err); + return 0; +out_cmsg_err: + net_warn_ratelimited("svc: received unknown control message %d/%d; dropping RPC reply datagram\n", + cmh->cmsg_level, cmh->cmsg_type); + goto out_free; +out_bh_enable: + local_bh_enable(); out_free: kfree_skb(skb); return 0; -- cgit