summaryrefslogtreecommitdiff
path: root/net/sunrpc
diff options
context:
space:
mode:
Diffstat (limited to 'net/sunrpc')
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seqnum.c49
-rw-r--r--net/sunrpc/auth_unix.c2
-rw-r--r--net/sunrpc/backchannel_rqst.c1
-rw-r--r--net/sunrpc/debugfs.c2
-rw-r--r--net/sunrpc/sched.c7
-rw-r--r--net/sunrpc/xprt.c6
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c1
-rw-r--r--net/sunrpc/xprtrdma/transport.c1
-rw-r--r--net/sunrpc/xprtrdma/verbs.c3
-rw-r--r--net/sunrpc/xprtsock.c281
10 files changed, 203 insertions, 150 deletions
diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
index fb6656295204..507105127095 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
@@ -44,7 +44,7 @@ krb5_make_rc4_seq_num(struct krb5_ctx *kctx, int direction, s32 seqnum,
unsigned char *cksum, unsigned char *buf)
{
struct crypto_sync_skcipher *cipher;
- unsigned char plain[8];
+ unsigned char *plain;
s32 code;
dprintk("RPC: %s:\n", __func__);
@@ -52,6 +52,10 @@ krb5_make_rc4_seq_num(struct krb5_ctx *kctx, int direction, s32 seqnum,
if (IS_ERR(cipher))
return PTR_ERR(cipher);
+ plain = kmalloc(8, GFP_NOFS);
+ if (!plain)
+ return -ENOMEM;
+
plain[0] = (unsigned char) ((seqnum >> 24) & 0xff);
plain[1] = (unsigned char) ((seqnum >> 16) & 0xff);
plain[2] = (unsigned char) ((seqnum >> 8) & 0xff);
@@ -67,6 +71,7 @@ krb5_make_rc4_seq_num(struct krb5_ctx *kctx, int direction, s32 seqnum,
code = krb5_encrypt(cipher, cksum, plain, buf, 8);
out:
+ kfree(plain);
crypto_free_sync_skcipher(cipher);
return code;
}
@@ -77,12 +82,17 @@ krb5_make_seq_num(struct krb5_ctx *kctx,
u32 seqnum,
unsigned char *cksum, unsigned char *buf)
{
- unsigned char plain[8];
+ unsigned char *plain;
+ s32 code;
if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC)
return krb5_make_rc4_seq_num(kctx, direction, seqnum,
cksum, buf);
+ plain = kmalloc(8, GFP_NOFS);
+ if (!plain)
+ return -ENOMEM;
+
plain[0] = (unsigned char) (seqnum & 0xff);
plain[1] = (unsigned char) ((seqnum >> 8) & 0xff);
plain[2] = (unsigned char) ((seqnum >> 16) & 0xff);
@@ -93,7 +103,9 @@ krb5_make_seq_num(struct krb5_ctx *kctx,
plain[6] = direction;
plain[7] = direction;
- return krb5_encrypt(key, cksum, plain, buf, 8);
+ code = krb5_encrypt(key, cksum, plain, buf, 8);
+ kfree(plain);
+ return code;
}
static s32
@@ -101,7 +113,7 @@ krb5_get_rc4_seq_num(struct krb5_ctx *kctx, unsigned char *cksum,
unsigned char *buf, int *direction, s32 *seqnum)
{
struct crypto_sync_skcipher *cipher;
- unsigned char plain[8];
+ unsigned char *plain;
s32 code;
dprintk("RPC: %s:\n", __func__);
@@ -113,20 +125,28 @@ krb5_get_rc4_seq_num(struct krb5_ctx *kctx, unsigned char *cksum,
if (code)
goto out;
+ plain = kmalloc(8, GFP_NOFS);
+ if (!plain) {
+ code = -ENOMEM;
+ goto out;
+ }
+
code = krb5_decrypt(cipher, cksum, buf, plain, 8);
if (code)
- goto out;
+ goto out_plain;
if ((plain[4] != plain[5]) || (plain[4] != plain[6])
|| (plain[4] != plain[7])) {
code = (s32)KG_BAD_SEQ;
- goto out;
+ goto out_plain;
}
*direction = plain[4];
*seqnum = ((plain[0] << 24) | (plain[1] << 16) |
(plain[2] << 8) | (plain[3]));
+out_plain:
+ kfree(plain);
out:
crypto_free_sync_skcipher(cipher);
return code;
@@ -139,7 +159,7 @@ krb5_get_seq_num(struct krb5_ctx *kctx,
int *direction, u32 *seqnum)
{
s32 code;
- unsigned char plain[8];
+ unsigned char *plain;
struct crypto_sync_skcipher *key = kctx->seq;
dprintk("RPC: krb5_get_seq_num:\n");
@@ -147,18 +167,25 @@ krb5_get_seq_num(struct krb5_ctx *kctx,
if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC)
return krb5_get_rc4_seq_num(kctx, cksum, buf,
direction, seqnum);
+ plain = kmalloc(8, GFP_NOFS);
+ if (!plain)
+ return -ENOMEM;
if ((code = krb5_decrypt(key, cksum, buf, plain, 8)))
- return code;
+ goto out;
if ((plain[4] != plain[5]) || (plain[4] != plain[6]) ||
- (plain[4] != plain[7]))
- return (s32)KG_BAD_SEQ;
+ (plain[4] != plain[7])) {
+ code = (s32)KG_BAD_SEQ;
+ goto out;
+ }
*direction = plain[4];
*seqnum = ((plain[0]) |
(plain[1] << 8) | (plain[2] << 16) | (plain[3] << 24));
- return 0;
+out:
+ kfree(plain);
+ return code;
}
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index c048eb6deaaf..d4018e5a24c5 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -80,7 +80,7 @@ unx_match(struct auth_cred *acred, struct rpc_cred *cred, int flags)
if (!uid_eq(cred->cr_cred->fsuid, acred->cred->fsuid) || !gid_eq(cred->cr_cred->fsgid, acred->cred->fsgid))
return 0;
- if (acred->cred && acred->cred->group_info != NULL)
+ if (acred->cred->group_info != NULL)
groups = acred->cred->group_info->ngroups;
if (groups > UNX_NGROUPS)
groups = UNX_NGROUPS;
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index ec451b8114b0..b9313c15ee3a 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -252,7 +252,6 @@ static struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt, __be32 xid)
req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst,
rq_bc_pa_list);
req->rq_reply_bytes_recvd = 0;
- req->rq_bytes_sent = 0;
memcpy(&req->rq_private_buf, &req->rq_rcv_buf,
sizeof(req->rq_private_buf));
req->rq_xid = xid;
diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c
index 45a033329cd4..19bb356230ed 100644
--- a/net/sunrpc/debugfs.c
+++ b/net/sunrpc/debugfs.c
@@ -146,7 +146,7 @@ rpc_clnt_debugfs_register(struct rpc_clnt *clnt)
rcu_read_lock();
xprt = rcu_dereference(clnt->cl_xprt);
/* no "debugfs" dentry? Don't bother with the symlink. */
- if (!xprt->debugfs) {
+ if (IS_ERR_OR_NULL(xprt->debugfs)) {
rcu_read_unlock();
return;
}
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index adc3c40cc733..2168d4d9c09f 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -19,6 +19,7 @@
#include <linux/spinlock.h>
#include <linux/mutex.h>
#include <linux/freezer.h>
+#include <linux/sched/mm.h>
#include <linux/sunrpc/clnt.h>
@@ -902,7 +903,10 @@ void rpc_execute(struct rpc_task *task)
static void rpc_async_schedule(struct work_struct *work)
{
+ unsigned int pflags = memalloc_nofs_save();
+
__rpc_execute(container_of(work, struct rpc_task, u.tk_work));
+ memalloc_nofs_restore(pflags);
}
/**
@@ -1067,7 +1071,10 @@ static void rpc_free_task(struct rpc_task *task)
static void rpc_async_release(struct work_struct *work)
{
+ unsigned int pflags = memalloc_nofs_save();
+
rpc_free_task(container_of(work, struct rpc_task, u.tk_work));
+ memalloc_nofs_restore(pflags);
}
static void rpc_release_resources_task(struct rpc_task *task)
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index bc7489f1fe55..1cf4e379be7b 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -49,6 +49,7 @@
#include <linux/sunrpc/metrics.h>
#include <linux/sunrpc/bc_xprt.h>
#include <linux/rcupdate.h>
+#include <linux/sched/mm.h>
#include <trace/events/sunrpc.h>
@@ -643,11 +644,13 @@ static void xprt_autoclose(struct work_struct *work)
{
struct rpc_xprt *xprt =
container_of(work, struct rpc_xprt, task_cleanup);
+ unsigned int pflags = memalloc_nofs_save();
clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
xprt->ops->close(xprt);
xprt_release_write(xprt, NULL);
wake_up_bit(&xprt->state, XPRT_LOCKED);
+ memalloc_nofs_restore(pflags);
}
/**
@@ -1601,7 +1604,6 @@ xprt_request_init(struct rpc_task *task)
req->rq_buffer = NULL;
req->rq_xid = xprt_alloc_xid(xprt);
xprt_init_connect_cookie(req, xprt);
- req->rq_bytes_sent = 0;
req->rq_snd_buf.len = 0;
req->rq_snd_buf.buflen = 0;
req->rq_rcv_buf.len = 0;
@@ -1723,6 +1725,7 @@ void xprt_release(struct rpc_task *task)
xprt->ops->buf_free(task);
xprt_inject_disconnect(xprt);
xdr_free_bvec(&req->rq_rcv_buf);
+ xdr_free_bvec(&req->rq_snd_buf);
if (req->rq_cred != NULL)
put_rpccred(req->rq_cred);
task->tk_rqstp = NULL;
@@ -1751,7 +1754,6 @@ xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task)
*/
xbufp->len = xbufp->head[0].iov_len + xbufp->page_len +
xbufp->tail[0].iov_len;
- req->rq_bytes_sent = 0;
}
#endif
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 98c1e43eb7b1..d79b18c1f4cd 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -267,7 +267,6 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
/* Prepare rqst */
rqst->rq_reply_bytes_recvd = 0;
- rqst->rq_bytes_sent = 0;
rqst->rq_xid = *p;
rqst->rq_private_buf.len = size;
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index e7274dc10120..5d261353bd90 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -737,7 +737,6 @@ xprt_rdma_send_request(struct rpc_rqst *rqst)
goto drop_connection;
rqst->rq_xmit_bytes_sent += rqst->rq_snd_buf.len;
- rqst->rq_bytes_sent = 0;
/* An RPC with no reply will throw off credit accounting,
* so drop the connection to reset the credit grant.
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index b4e997d53ec7..89a63391d4d4 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -527,7 +527,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
sendcq = ib_alloc_cq(ia->ri_device, NULL,
ep->rep_attr.cap.max_send_wr + 1,
- 1, IB_POLL_WORKQUEUE);
+ ia->ri_device->num_comp_vectors > 1 ? 1 : 0,
+ IB_POLL_WORKQUEUE);
if (IS_ERR(sendcq)) {
rc = PTR_ERR(sendcq);
goto out1;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index ae09d850cd11..53de72d2dded 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -50,6 +50,7 @@
#include <linux/bvec.h>
#include <linux/highmem.h>
#include <linux/uio.h>
+#include <linux/sched/mm.h>
#include <trace/events/sunrpc.h>
@@ -404,8 +405,8 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
size_t want, seek_init = seek, offset = 0;
ssize_t ret;
- if (seek < buf->head[0].iov_len) {
- want = min_t(size_t, count, buf->head[0].iov_len);
+ want = min_t(size_t, count, buf->head[0].iov_len);
+ if (seek < want) {
ret = xs_read_kvec(sock, msg, flags, &buf->head[0], want, seek);
if (ret <= 0)
goto sock_err;
@@ -416,8 +417,8 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
goto out;
seek = 0;
} else {
- seek -= buf->head[0].iov_len;
- offset += buf->head[0].iov_len;
+ seek -= want;
+ offset += want;
}
want = xs_alloc_sparse_pages(buf,
@@ -442,8 +443,8 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
offset += want;
}
- if (seek < buf->tail[0].iov_len) {
- want = min_t(size_t, count - offset, buf->tail[0].iov_len);
+ want = min_t(size_t, count - offset, buf->tail[0].iov_len);
+ if (seek < want) {
ret = xs_read_kvec(sock, msg, flags, &buf->tail[0], want, seek);
if (ret <= 0)
goto sock_err;
@@ -453,7 +454,7 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
if (ret != want)
goto out;
} else
- offset += buf->tail[0].iov_len;
+ offset = seek_init;
ret = -EMSGSIZE;
out:
*read = offset - seek_init;
@@ -481,6 +482,14 @@ xs_read_stream_request_done(struct sock_xprt *transport)
return transport->recv.fraghdr & cpu_to_be32(RPC_LAST_STREAM_FRAGMENT);
}
+static void
+xs_read_stream_check_eor(struct sock_xprt *transport,
+ struct msghdr *msg)
+{
+ if (xs_read_stream_request_done(transport))
+ msg->msg_flags |= MSG_EOR;
+}
+
static ssize_t
xs_read_stream_request(struct sock_xprt *transport, struct msghdr *msg,
int flags, struct rpc_rqst *req)
@@ -492,17 +501,21 @@ xs_read_stream_request(struct sock_xprt *transport, struct msghdr *msg,
xs_read_header(transport, buf);
want = transport->recv.len - transport->recv.offset;
- ret = xs_read_xdr_buf(transport->sock, msg, flags, buf,
- transport->recv.copied + want, transport->recv.copied,
- &read);
- transport->recv.offset += read;
- transport->recv.copied += read;
- if (transport->recv.offset == transport->recv.len) {
- if (xs_read_stream_request_done(transport))
- msg->msg_flags |= MSG_EOR;
- return read;
+ if (want != 0) {
+ ret = xs_read_xdr_buf(transport->sock, msg, flags, buf,
+ transport->recv.copied + want,
+ transport->recv.copied,
+ &read);
+ transport->recv.offset += read;
+ transport->recv.copied += read;
}
+ if (transport->recv.offset == transport->recv.len)
+ xs_read_stream_check_eor(transport, msg);
+
+ if (want == 0)
+ return 0;
+
switch (ret) {
default:
break;
@@ -655,13 +668,34 @@ out_err:
return ret != 0 ? ret : -ESHUTDOWN;
}
+static __poll_t xs_poll_socket(struct sock_xprt *transport)
+{
+ return transport->sock->ops->poll(NULL, transport->sock, NULL);
+}
+
+static bool xs_poll_socket_readable(struct sock_xprt *transport)
+{
+ __poll_t events = xs_poll_socket(transport);
+
+ return (events & (EPOLLIN | EPOLLRDNORM)) && !(events & EPOLLRDHUP);
+}
+
+static void xs_poll_check_readable(struct sock_xprt *transport)
+{
+
+ clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
+ if (!xs_poll_socket_readable(transport))
+ return;
+ if (!test_and_set_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
+ queue_work(xprtiod_workqueue, &transport->recv_worker);
+}
+
static void xs_stream_data_receive(struct sock_xprt *transport)
{
size_t read = 0;
ssize_t ret = 0;
mutex_lock(&transport->recv_mutex);
- clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
if (transport->sock == NULL)
goto out;
for (;;) {
@@ -671,6 +705,10 @@ static void xs_stream_data_receive(struct sock_xprt *transport)
read += ret;
cond_resched();
}
+ if (ret == -ESHUTDOWN)
+ kernel_sock_shutdown(transport->sock, SHUT_RDWR);
+ else
+ xs_poll_check_readable(transport);
out:
mutex_unlock(&transport->recv_mutex);
trace_xs_stream_read_data(&transport->xprt, ret, read);
@@ -680,7 +718,10 @@ static void xs_stream_data_receive_workfn(struct work_struct *work)
{
struct sock_xprt *transport =
container_of(work, struct sock_xprt, recv_worker);
+ unsigned int pflags = memalloc_nofs_save();
+
xs_stream_data_receive(transport);
+ memalloc_nofs_restore(pflags);
}
static void
@@ -690,99 +731,65 @@ xs_stream_reset_connect(struct sock_xprt *transport)
transport->recv.len = 0;
transport->recv.copied = 0;
transport->xmit.offset = 0;
+}
+
+static void
+xs_stream_start_connect(struct sock_xprt *transport)
+{
transport->xprt.stat.connect_count++;
transport->xprt.stat.connect_start = jiffies;
}
#define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL)
+static int xs_sendmsg(struct socket *sock, struct msghdr *msg, size_t seek)
+{
+ if (seek)
+ iov_iter_advance(&msg->msg_iter, seek);
+ return sock_sendmsg(sock, msg);
+}
+
+static int xs_send_kvec(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t seek)
+{
+ iov_iter_kvec(&msg->msg_iter, WRITE, vec, 1, vec->iov_len);
+ return xs_sendmsg(sock, msg, seek);
+}
+
+static int xs_send_pagedata(struct socket *sock, struct msghdr *msg, struct xdr_buf *xdr, size_t base)
+{
+ int err;
+
+ err = xdr_alloc_bvec(xdr, GFP_KERNEL);
+ if (err < 0)
+ return err;
+
+ iov_iter_bvec(&msg->msg_iter, WRITE, xdr->bvec,
+ xdr_buf_pagecount(xdr),
+ xdr->page_len + xdr->page_base);
+ return xs_sendmsg(sock, msg, base + xdr->page_base);
+}
+
+#define xs_record_marker_len() sizeof(rpc_fraghdr)
+
/* Common case:
* - stream transport
* - sending from byte 0 of the message
* - the message is wholly contained in @xdr's head iovec
*/
-static int xs_send_rm_and_kvec(struct socket *sock, struct xdr_buf *xdr,
- unsigned int remainder)
+static int xs_send_rm_and_kvec(struct socket *sock, struct msghdr *msg,
+ rpc_fraghdr marker, struct kvec *vec, size_t base)
{
- struct msghdr msg = {
- .msg_flags = XS_SENDMSG_FLAGS | (remainder ? MSG_MORE : 0)
- };
- rpc_fraghdr marker = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT |
- (u32)xdr->len);
struct kvec iov[2] = {
- {
+ [0] = {
.iov_base = &marker,
.iov_len = sizeof(marker)
},
- {
- .iov_base = xdr->head[0].iov_base,
- .iov_len = xdr->head[0].iov_len
- },
- };
- int ret;
-
- ret = kernel_sendmsg(sock, &msg, iov, 2,
- iov[0].iov_len + iov[1].iov_len);
- if (ret < 0)
- return ret;
- if (ret < iov[0].iov_len)
- return -EPIPE;
- return ret - iov[0].iov_len;
-}
-
-static int xs_send_kvec(struct socket *sock, struct sockaddr *addr, int addrlen, struct kvec *vec, unsigned int base, int more)
-{
- struct msghdr msg = {
- .msg_name = addr,
- .msg_namelen = addrlen,
- .msg_flags = XS_SENDMSG_FLAGS | (more ? MSG_MORE : 0),
- };
- struct kvec iov = {
- .iov_base = vec->iov_base + base,
- .iov_len = vec->iov_len - base,
+ [1] = *vec,
};
+ size_t len = iov[0].iov_len + iov[1].iov_len;
- if (iov.iov_len != 0)
- return kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len);
- return kernel_sendmsg(sock, &msg, NULL, 0, 0);
-}
-
-static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned int base, int more, bool zerocopy, int *sent_p)
-{
- ssize_t (*do_sendpage)(struct socket *sock, struct page *page,
- int offset, size_t size, int flags);
- struct page **ppage;
- unsigned int remainder;
- int err;
-
- remainder = xdr->page_len - base;
- base += xdr->page_base;
- ppage = xdr->pages + (base >> PAGE_SHIFT);
- base &= ~PAGE_MASK;
- do_sendpage = sock->ops->sendpage;
- if (!zerocopy)
- do_sendpage = sock_no_sendpage;
- for(;;) {
- unsigned int len = min_t(unsigned int, PAGE_SIZE - base, remainder);
- int flags = XS_SENDMSG_FLAGS;
-
- remainder -= len;
- if (more)
- flags |= MSG_MORE;
- if (remainder != 0)
- flags |= MSG_SENDPAGE_NOTLAST | MSG_MORE;
- err = do_sendpage(sock, *ppage, base, len, flags);
- if (remainder == 0 || err != len)
- break;
- *sent_p += err;
- ppage++;
- base = 0;
- }
- if (err > 0) {
- *sent_p += err;
- err = 0;
- }
- return err;
+ iov_iter_kvec(&msg->msg_iter, WRITE, iov, 2, len);
+ return xs_sendmsg(sock, msg, base);
}
/**
@@ -792,53 +799,60 @@ static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned i
* @addrlen: UDP only -- length of destination address
* @xdr: buffer containing this request
* @base: starting position in the buffer
- * @zerocopy: true if it is safe to use sendpage()
+ * @rm: stream record marker field
* @sent_p: return the total number of bytes successfully queued for sending
*
*/
-static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, bool zerocopy, int *sent_p)
+static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, rpc_fraghdr rm, int *sent_p)
{
- unsigned int remainder = xdr->len - base;
+ struct msghdr msg = {
+ .msg_name = addr,
+ .msg_namelen = addrlen,
+ .msg_flags = XS_SENDMSG_FLAGS | MSG_MORE,
+ };
+ unsigned int rmsize = rm ? sizeof(rm) : 0;
+ unsigned int remainder = rmsize + xdr->len - base;
+ unsigned int want;
int err = 0;
- int sent = 0;
if (unlikely(!sock))
return -ENOTSOCK;
- if (base != 0) {
- addr = NULL;
- addrlen = 0;
- }
-
- if (base < xdr->head[0].iov_len || addr != NULL) {
- unsigned int len = xdr->head[0].iov_len - base;
+ want = xdr->head[0].iov_len + rmsize;
+ if (base < want) {
+ unsigned int len = want - base;
remainder -= len;
- if (!base && !addr)
- err = xs_send_rm_and_kvec(sock, xdr, remainder);
+ if (remainder == 0)
+ msg.msg_flags &= ~MSG_MORE;
+ if (rmsize)
+ err = xs_send_rm_and_kvec(sock, &msg, rm,
+ &xdr->head[0], base);
else
- err = xs_send_kvec(sock, addr, addrlen, &xdr->head[0],
- base, remainder != 0);
+ err = xs_send_kvec(sock, &msg, &xdr->head[0], base);
if (remainder == 0 || err != len)
goto out;
*sent_p += err;
base = 0;
} else
- base -= xdr->head[0].iov_len;
+ base -= want;
if (base < xdr->page_len) {
unsigned int len = xdr->page_len - base;
remainder -= len;
- err = xs_send_pagedata(sock, xdr, base, remainder != 0, zerocopy, &sent);
- *sent_p += sent;
- if (remainder == 0 || sent != len)
+ if (remainder == 0)
+ msg.msg_flags &= ~MSG_MORE;
+ err = xs_send_pagedata(sock, &msg, xdr, base);
+ if (remainder == 0 || err != len)
goto out;
+ *sent_p += err;
base = 0;
} else
base -= xdr->page_len;
if (base >= xdr->tail[0].iov_len)
return 0;
- err = xs_send_kvec(sock, NULL, 0, &xdr->tail[0], base, 0);
+ msg.msg_flags &= ~MSG_MORE;
+ err = xs_send_kvec(sock, &msg, &xdr->tail[0], base);
out:
if (err > 0) {
*sent_p += err;
@@ -907,6 +921,17 @@ xs_send_request_was_aborted(struct sock_xprt *transport, struct rpc_rqst *req)
return transport->xmit.offset != 0 && req->rq_bytes_sent == 0;
}
+/*
+ * Return the stream record marker field for a record of length < 2^31-1
+ */
+static rpc_fraghdr
+xs_stream_record_marker(struct xdr_buf *xdr)
+{
+ if (!xdr->len)
+ return 0;
+ return cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | (u32)xdr->len);
+}
+
/**
* xs_local_send_request - write an RPC request to an AF_LOCAL socket
* @req: pointer to RPC request
@@ -939,7 +964,8 @@ static int xs_local_send_request(struct rpc_rqst *req)
req->rq_xtime = ktime_get();
status = xs_sendpages(transport->sock, NULL, 0, xdr,
transport->xmit.offset,
- true, &sent);
+ xs_stream_record_marker(xdr),
+ &sent);
dprintk("RPC: %s(%u) = %d\n",
__func__, xdr->len - transport->xmit.offset, status);
@@ -951,7 +977,6 @@ static int xs_local_send_request(struct rpc_rqst *req)
req->rq_bytes_sent = transport->xmit.offset;
if (likely(req->rq_bytes_sent >= req->rq_slen)) {
req->rq_xmit_bytes_sent += transport->xmit.offset;
- req->rq_bytes_sent = 0;
transport->xmit.offset = 0;
return 0;
}
@@ -1007,7 +1032,7 @@ static int xs_udp_send_request(struct rpc_rqst *req)
req->rq_xtime = ktime_get();
status = xs_sendpages(transport->sock, xs_addr(xprt), xprt->addrlen,
- xdr, 0, true, &sent);
+ xdr, 0, 0, &sent);
dprintk("RPC: xs_udp_send_request(%u) = %d\n",
xdr->len, status);
@@ -1071,7 +1096,6 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
struct rpc_xprt *xprt = req->rq_xprt;
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
struct xdr_buf *xdr = &req->rq_snd_buf;
- bool zerocopy = true;
bool vm_wait = false;
int status;
int sent;
@@ -1086,12 +1110,6 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
xs_pktdump("packet data:",
req->rq_svec->iov_base,
req->rq_svec->iov_len);
- /* Don't use zero copy if this is a resend. If the RPC call
- * completes while the socket holds a reference to the pages,
- * then we may end up resending corrupted data.
- */
- if (req->rq_task->tk_flags & RPC_TASK_SENT)
- zerocopy = false;
if (test_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state))
xs_tcp_set_socket_timeouts(xprt, transport->sock);
@@ -1104,7 +1122,8 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
sent = 0;
status = xs_sendpages(transport->sock, NULL, 0, xdr,
transport->xmit.offset,
- zerocopy, &sent);
+ xs_stream_record_marker(xdr),
+ &sent);
dprintk("RPC: xs_tcp_send_request(%u) = %d\n",
xdr->len - transport->xmit.offset, status);
@@ -1115,7 +1134,6 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
req->rq_bytes_sent = transport->xmit.offset;
if (likely(req->rq_bytes_sent >= req->rq_slen)) {
req->rq_xmit_bytes_sent += transport->xmit.offset;
- req->rq_bytes_sent = 0;
transport->xmit.offset = 0;
return 0;
}
@@ -1255,6 +1273,8 @@ static void xs_reset_transport(struct sock_xprt *transport)
xprt_clear_connected(xprt);
write_unlock_bh(&sk->sk_callback_lock);
xs_sock_reset_connection_flags(xprt);
+ /* Reset stream record info */
+ xs_stream_reset_connect(transport);
mutex_unlock(&transport->recv_mutex);
trace_rpc_socket_close(xprt, sock);
@@ -1382,7 +1402,6 @@ static void xs_udp_data_receive(struct sock_xprt *transport)
int err;
mutex_lock(&transport->recv_mutex);
- clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
sk = transport->inet;
if (sk == NULL)
goto out;
@@ -1394,6 +1413,7 @@ static void xs_udp_data_receive(struct sock_xprt *transport)
consume_skb(skb);
cond_resched();
}
+ xs_poll_check_readable(transport);
out:
mutex_unlock(&transport->recv_mutex);
}
@@ -1402,7 +1422,10 @@ static void xs_udp_data_receive_workfn(struct work_struct *work)
{
struct sock_xprt *transport =
container_of(work, struct sock_xprt, recv_worker);
+ unsigned int pflags = memalloc_nofs_save();
+
xs_udp_data_receive(transport);
+ memalloc_nofs_restore(pflags);
}
/**
@@ -1893,7 +1916,6 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt,
sk->sk_write_space = xs_udp_write_space;
sock_set_flag(sk, SOCK_FASYNC);
sk->sk_error_report = xs_error_report;
- sk->sk_allocation = GFP_NOIO;
xprt_clear_connected(xprt);
@@ -1904,7 +1926,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt,
write_unlock_bh(&sk->sk_callback_lock);
}
- xs_stream_reset_connect(transport);
+ xs_stream_start_connect(transport);
return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, 0);
}
@@ -2081,7 +2103,6 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
sk->sk_data_ready = xs_data_ready;
sk->sk_write_space = xs_udp_write_space;
sock_set_flag(sk, SOCK_FASYNC);
- sk->sk_allocation = GFP_NOIO;
xprt_set_connected(xprt);
@@ -2244,7 +2265,6 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
sk->sk_write_space = xs_tcp_write_space;
sock_set_flag(sk, SOCK_FASYNC);
sk->sk_error_report = xs_error_report;
- sk->sk_allocation = GFP_NOIO;
/* socket options */
sock_reset_flag(sk, SOCK_LINGER);
@@ -2264,8 +2284,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
xs_set_memalloc(xprt);
- /* Reset TCP record info */
- xs_stream_reset_connect(transport);
+ xs_stream_start_connect(transport);
/* Tell the socket layer to start connecting... */
set_bit(XPRT_SOCK_CONNECTING, &transport->sock_state);