summaryrefslogtreecommitdiff
path: root/net/rds/tcp_recv.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/rds/tcp_recv.c')
-rw-r--r--net/rds/tcp_recv.c121
1 files changed, 57 insertions, 64 deletions
diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c
index 4fac4f2bb9dc..7997a19d1da3 100644
--- a/net/rds/tcp_recv.c
+++ b/net/rds/tcp_recv.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 Oracle. All rights reserved.
+ * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -33,6 +33,7 @@
#include <linux/kernel.h>
#include <linux/slab.h>
#include <net/tcp.h>
+#include <trace/events/sock.h>
#include "rds.h"
#include "tcp.h"
@@ -59,50 +60,30 @@ void rds_tcp_inc_free(struct rds_incoming *inc)
/*
* this is pretty lame, but, whatever.
*/
-int rds_tcp_inc_copy_to_user(struct rds_incoming *inc, struct iovec *first_iov,
- size_t size)
+int rds_tcp_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to)
{
struct rds_tcp_incoming *tinc;
- struct iovec *iov, tmp;
struct sk_buff *skb;
- unsigned long to_copy, skb_off;
int ret = 0;
- if (size == 0)
+ if (!iov_iter_count(to))
goto out;
tinc = container_of(inc, struct rds_tcp_incoming, ti_inc);
- iov = first_iov;
- tmp = *iov;
skb_queue_walk(&tinc->ti_skb_list, skb) {
- skb_off = 0;
- while (skb_off < skb->len) {
- while (tmp.iov_len == 0) {
- iov++;
- tmp = *iov;
- }
-
- to_copy = min(tmp.iov_len, size);
+ unsigned long to_copy, skb_off;
+ for (skb_off = 0; skb_off < skb->len; skb_off += to_copy) {
+ to_copy = iov_iter_count(to);
to_copy = min(to_copy, skb->len - skb_off);
- rdsdebug("ret %d size %zu skb %p skb_off %lu "
- "skblen %d iov_base %p iov_len %zu cpy %lu\n",
- ret, size, skb, skb_off, skb->len,
- tmp.iov_base, tmp.iov_len, to_copy);
-
- /* modifies tmp as it copies */
- if (skb_copy_datagram_iovec(skb, skb_off, &tmp,
- to_copy)) {
- ret = -EFAULT;
- goto out;
- }
+ if (skb_copy_datagram_iter(skb, skb_off, to, to_copy))
+ return -EFAULT;
rds_stats_add(s_copy_to_user, to_copy);
- size -= to_copy;
ret += to_copy;
- skb_off += to_copy;
- if (size == 0)
+
+ if (!iov_iter_count(to))
goto out;
}
}
@@ -167,7 +148,7 @@ static void rds_tcp_cong_recv(struct rds_connection *conn,
}
struct rds_tcp_desc_arg {
- struct rds_connection *conn;
+ struct rds_conn_path *conn_path;
gfp_t gfp;
};
@@ -175,8 +156,8 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
unsigned int offset, size_t len)
{
struct rds_tcp_desc_arg *arg = desc->arg.data;
- struct rds_connection *conn = arg->conn;
- struct rds_tcp_connection *tc = conn->c_transport_data;
+ struct rds_conn_path *cp = arg->conn_path;
+ struct rds_tcp_connection *tc = cp->cp_transport_data;
struct rds_tcp_incoming *tinc = tc->t_tinc;
struct sk_buff *clone;
size_t left = len, to_copy;
@@ -191,14 +172,18 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
while (left) {
if (!tinc) {
tinc = kmem_cache_alloc(rds_tcp_incoming_slab,
- arg->gfp);
+ arg->gfp);
if (!tinc) {
desc->error = -ENOMEM;
goto out;
}
tc->t_tinc = tinc;
- rdsdebug("alloced tinc %p\n", tinc);
- rds_inc_init(&tinc->ti_inc, conn, conn->c_faddr);
+ rdsdebug("allocated tinc %p\n", tinc);
+ rds_inc_path_init(&tinc->ti_inc, cp,
+ &cp->cp_conn->c_faddr);
+ tinc->ti_inc.i_rx_lat_trace[RDS_MSG_RX_HDR] =
+ local_clock();
+
/*
* XXX * we might be able to use the __ variants when
* we've already serialized at a higher level.
@@ -223,19 +208,20 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
/* could be 0 for a 0 len message */
tc->t_tinc_data_rem =
be32_to_cpu(tinc->ti_inc.i_hdr.h_len);
+ tinc->ti_inc.i_rx_lat_trace[RDS_MSG_RX_START] =
+ local_clock();
}
}
if (left && tc->t_tinc_data_rem) {
- clone = skb_clone(skb, arg->gfp);
+ to_copy = min(tc->t_tinc_data_rem, left);
+
+ clone = pskb_extract(skb, offset, to_copy, arg->gfp);
if (!clone) {
desc->error = -ENOMEM;
goto out;
}
- to_copy = min(tc->t_tinc_data_rem, left);
- pskb_pull(clone, offset);
- pskb_trim(clone, to_copy);
skb_queue_tail(&tinc->ti_skb_list, clone);
rdsdebug("skb %p data %p len %d off %u to_copy %zu -> "
@@ -249,11 +235,14 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
}
if (tc->t_tinc_hdr_rem == 0 && tc->t_tinc_data_rem == 0) {
+ struct rds_connection *conn = cp->cp_conn;
+
if (tinc->ti_inc.i_hdr.h_flags == RDS_FLAG_CONG_BITMAP)
rds_tcp_cong_recv(conn, tinc);
else
- rds_recv_incoming(conn, conn->c_faddr,
- conn->c_laddr, &tinc->ti_inc,
+ rds_recv_incoming(conn, &conn->c_faddr,
+ &conn->c_laddr,
+ &tinc->ti_inc,
arg->gfp);
tc->t_tinc_hdr_rem = sizeof(struct rds_header);
@@ -271,15 +260,15 @@ out:
}
/* the caller has to hold the sock lock */
-static int rds_tcp_read_sock(struct rds_connection *conn, gfp_t gfp)
+static int rds_tcp_read_sock(struct rds_conn_path *cp, gfp_t gfp)
{
- struct rds_tcp_connection *tc = conn->c_transport_data;
+ struct rds_tcp_connection *tc = cp->cp_transport_data;
struct socket *sock = tc->t_sock;
read_descriptor_t desc;
struct rds_tcp_desc_arg arg;
/* It's like glib in the kernel! */
- arg.conn = conn;
+ arg.conn_path = cp;
arg.gfp = gfp;
desc.arg.data = &arg;
desc.error = 0;
@@ -299,52 +288,56 @@ static int rds_tcp_read_sock(struct rds_connection *conn, gfp_t gfp)
* if we fail to allocate we're in trouble.. blindly wait some time before
* trying again to see if the VM can free up something for us.
*/
-int rds_tcp_recv(struct rds_connection *conn)
+int rds_tcp_recv_path(struct rds_conn_path *cp)
{
- struct rds_tcp_connection *tc = conn->c_transport_data;
+ struct rds_tcp_connection *tc = cp->cp_transport_data;
struct socket *sock = tc->t_sock;
int ret = 0;
- rdsdebug("recv worker conn %p tc %p sock %p\n", conn, tc, sock);
+ rdsdebug("recv worker path [%d] tc %p sock %p\n",
+ cp->cp_index, tc, sock);
lock_sock(sock->sk);
- ret = rds_tcp_read_sock(conn, GFP_KERNEL);
+ ret = rds_tcp_read_sock(cp, GFP_KERNEL);
release_sock(sock->sk);
return ret;
}
-void rds_tcp_data_ready(struct sock *sk, int bytes)
+void rds_tcp_data_ready(struct sock *sk)
{
- void (*ready)(struct sock *sk, int bytes);
- struct rds_connection *conn;
+ void (*ready)(struct sock *sk);
+ struct rds_conn_path *cp;
struct rds_tcp_connection *tc;
- rdsdebug("data ready sk %p bytes %d\n", sk, bytes);
+ trace_sk_data_ready(sk);
+ rdsdebug("data ready sk %p\n", sk);
- read_lock(&sk->sk_callback_lock);
- conn = sk->sk_user_data;
- if (!conn) { /* check for teardown race */
+ read_lock_bh(&sk->sk_callback_lock);
+ cp = sk->sk_user_data;
+ if (!cp) { /* check for teardown race */
ready = sk->sk_data_ready;
goto out;
}
- tc = conn->c_transport_data;
+ tc = cp->cp_transport_data;
ready = tc->t_orig_data_ready;
rds_tcp_stats_inc(s_tcp_data_ready_calls);
- if (rds_tcp_read_sock(conn, GFP_ATOMIC) == -ENOMEM)
- queue_delayed_work(rds_wq, &conn->c_recv_w, 0);
+ if (rds_tcp_read_sock(cp, GFP_ATOMIC) == -ENOMEM) {
+ rcu_read_lock();
+ if (!rds_destroy_pending(cp->cp_conn))
+ queue_delayed_work(rds_wq, &cp->cp_recv_w, 0);
+ rcu_read_unlock();
+ }
out:
- read_unlock(&sk->sk_callback_lock);
- ready(sk, bytes);
+ read_unlock_bh(&sk->sk_callback_lock);
+ ready(sk);
}
int rds_tcp_recv_init(void)
{
- rds_tcp_incoming_slab = kmem_cache_create("rds_tcp_incoming",
- sizeof(struct rds_tcp_incoming),
- 0, 0, NULL);
+ rds_tcp_incoming_slab = KMEM_CACHE(rds_tcp_incoming, 0);
if (!rds_tcp_incoming_slab)
return -ENOMEM;
return 0;