diff options
Diffstat (limited to 'net/rds/tcp_recv.c')
| -rw-r--r-- | net/rds/tcp_recv.c | 121 |
1 files changed, 57 insertions, 64 deletions
diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c index 4fac4f2bb9dc..7997a19d1da3 100644 --- a/net/rds/tcp_recv.c +++ b/net/rds/tcp_recv.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 Oracle. All rights reserved. + * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -33,6 +33,7 @@ #include <linux/kernel.h> #include <linux/slab.h> #include <net/tcp.h> +#include <trace/events/sock.h> #include "rds.h" #include "tcp.h" @@ -59,50 +60,30 @@ void rds_tcp_inc_free(struct rds_incoming *inc) /* * this is pretty lame, but, whatever. */ -int rds_tcp_inc_copy_to_user(struct rds_incoming *inc, struct iovec *first_iov, - size_t size) +int rds_tcp_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to) { struct rds_tcp_incoming *tinc; - struct iovec *iov, tmp; struct sk_buff *skb; - unsigned long to_copy, skb_off; int ret = 0; - if (size == 0) + if (!iov_iter_count(to)) goto out; tinc = container_of(inc, struct rds_tcp_incoming, ti_inc); - iov = first_iov; - tmp = *iov; skb_queue_walk(&tinc->ti_skb_list, skb) { - skb_off = 0; - while (skb_off < skb->len) { - while (tmp.iov_len == 0) { - iov++; - tmp = *iov; - } - - to_copy = min(tmp.iov_len, size); + unsigned long to_copy, skb_off; + for (skb_off = 0; skb_off < skb->len; skb_off += to_copy) { + to_copy = iov_iter_count(to); to_copy = min(to_copy, skb->len - skb_off); - rdsdebug("ret %d size %zu skb %p skb_off %lu " - "skblen %d iov_base %p iov_len %zu cpy %lu\n", - ret, size, skb, skb_off, skb->len, - tmp.iov_base, tmp.iov_len, to_copy); - - /* modifies tmp as it copies */ - if (skb_copy_datagram_iovec(skb, skb_off, &tmp, - to_copy)) { - ret = -EFAULT; - goto out; - } + if (skb_copy_datagram_iter(skb, skb_off, to, to_copy)) + return -EFAULT; rds_stats_add(s_copy_to_user, to_copy); - size -= to_copy; ret += to_copy; - skb_off += to_copy; - if (size == 0) + + if (!iov_iter_count(to)) goto out; } } @@ -167,7 +148,7 @@ static void rds_tcp_cong_recv(struct rds_connection *conn, } struct rds_tcp_desc_arg { - struct rds_connection *conn; + struct rds_conn_path *conn_path; gfp_t gfp; }; @@ -175,8 +156,8 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb, unsigned int offset, size_t len) { struct rds_tcp_desc_arg *arg = desc->arg.data; - struct rds_connection *conn = arg->conn; - struct rds_tcp_connection *tc = conn->c_transport_data; + struct rds_conn_path *cp = arg->conn_path; + struct rds_tcp_connection *tc = cp->cp_transport_data; struct rds_tcp_incoming *tinc = tc->t_tinc; struct sk_buff *clone; size_t left = len, to_copy; @@ -191,14 +172,18 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb, while (left) { if (!tinc) { tinc = kmem_cache_alloc(rds_tcp_incoming_slab, - arg->gfp); + arg->gfp); if (!tinc) { desc->error = -ENOMEM; goto out; } tc->t_tinc = tinc; - rdsdebug("alloced tinc %p\n", tinc); - rds_inc_init(&tinc->ti_inc, conn, conn->c_faddr); + rdsdebug("allocated tinc %p\n", tinc); + rds_inc_path_init(&tinc->ti_inc, cp, + &cp->cp_conn->c_faddr); + tinc->ti_inc.i_rx_lat_trace[RDS_MSG_RX_HDR] = + local_clock(); + /* * XXX * we might be able to use the __ variants when * we've already serialized at a higher level. @@ -223,19 +208,20 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb, /* could be 0 for a 0 len message */ tc->t_tinc_data_rem = be32_to_cpu(tinc->ti_inc.i_hdr.h_len); + tinc->ti_inc.i_rx_lat_trace[RDS_MSG_RX_START] = + local_clock(); } } if (left && tc->t_tinc_data_rem) { - clone = skb_clone(skb, arg->gfp); + to_copy = min(tc->t_tinc_data_rem, left); + + clone = pskb_extract(skb, offset, to_copy, arg->gfp); if (!clone) { desc->error = -ENOMEM; goto out; } - to_copy = min(tc->t_tinc_data_rem, left); - pskb_pull(clone, offset); - pskb_trim(clone, to_copy); skb_queue_tail(&tinc->ti_skb_list, clone); rdsdebug("skb %p data %p len %d off %u to_copy %zu -> " @@ -249,11 +235,14 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb, } if (tc->t_tinc_hdr_rem == 0 && tc->t_tinc_data_rem == 0) { + struct rds_connection *conn = cp->cp_conn; + if (tinc->ti_inc.i_hdr.h_flags == RDS_FLAG_CONG_BITMAP) rds_tcp_cong_recv(conn, tinc); else - rds_recv_incoming(conn, conn->c_faddr, - conn->c_laddr, &tinc->ti_inc, + rds_recv_incoming(conn, &conn->c_faddr, + &conn->c_laddr, + &tinc->ti_inc, arg->gfp); tc->t_tinc_hdr_rem = sizeof(struct rds_header); @@ -271,15 +260,15 @@ out: } /* the caller has to hold the sock lock */ -static int rds_tcp_read_sock(struct rds_connection *conn, gfp_t gfp) +static int rds_tcp_read_sock(struct rds_conn_path *cp, gfp_t gfp) { - struct rds_tcp_connection *tc = conn->c_transport_data; + struct rds_tcp_connection *tc = cp->cp_transport_data; struct socket *sock = tc->t_sock; read_descriptor_t desc; struct rds_tcp_desc_arg arg; /* It's like glib in the kernel! */ - arg.conn = conn; + arg.conn_path = cp; arg.gfp = gfp; desc.arg.data = &arg; desc.error = 0; @@ -299,52 +288,56 @@ static int rds_tcp_read_sock(struct rds_connection *conn, gfp_t gfp) * if we fail to allocate we're in trouble.. blindly wait some time before * trying again to see if the VM can free up something for us. */ -int rds_tcp_recv(struct rds_connection *conn) +int rds_tcp_recv_path(struct rds_conn_path *cp) { - struct rds_tcp_connection *tc = conn->c_transport_data; + struct rds_tcp_connection *tc = cp->cp_transport_data; struct socket *sock = tc->t_sock; int ret = 0; - rdsdebug("recv worker conn %p tc %p sock %p\n", conn, tc, sock); + rdsdebug("recv worker path [%d] tc %p sock %p\n", + cp->cp_index, tc, sock); lock_sock(sock->sk); - ret = rds_tcp_read_sock(conn, GFP_KERNEL); + ret = rds_tcp_read_sock(cp, GFP_KERNEL); release_sock(sock->sk); return ret; } -void rds_tcp_data_ready(struct sock *sk, int bytes) +void rds_tcp_data_ready(struct sock *sk) { - void (*ready)(struct sock *sk, int bytes); - struct rds_connection *conn; + void (*ready)(struct sock *sk); + struct rds_conn_path *cp; struct rds_tcp_connection *tc; - rdsdebug("data ready sk %p bytes %d\n", sk, bytes); + trace_sk_data_ready(sk); + rdsdebug("data ready sk %p\n", sk); - read_lock(&sk->sk_callback_lock); - conn = sk->sk_user_data; - if (!conn) { /* check for teardown race */ + read_lock_bh(&sk->sk_callback_lock); + cp = sk->sk_user_data; + if (!cp) { /* check for teardown race */ ready = sk->sk_data_ready; goto out; } - tc = conn->c_transport_data; + tc = cp->cp_transport_data; ready = tc->t_orig_data_ready; rds_tcp_stats_inc(s_tcp_data_ready_calls); - if (rds_tcp_read_sock(conn, GFP_ATOMIC) == -ENOMEM) - queue_delayed_work(rds_wq, &conn->c_recv_w, 0); + if (rds_tcp_read_sock(cp, GFP_ATOMIC) == -ENOMEM) { + rcu_read_lock(); + if (!rds_destroy_pending(cp->cp_conn)) + queue_delayed_work(rds_wq, &cp->cp_recv_w, 0); + rcu_read_unlock(); + } out: - read_unlock(&sk->sk_callback_lock); - ready(sk, bytes); + read_unlock_bh(&sk->sk_callback_lock); + ready(sk); } int rds_tcp_recv_init(void) { - rds_tcp_incoming_slab = kmem_cache_create("rds_tcp_incoming", - sizeof(struct rds_tcp_incoming), - 0, 0, NULL); + rds_tcp_incoming_slab = KMEM_CACHE(rds_tcp_incoming, 0); if (!rds_tcp_incoming_slab) return -ENOMEM; return 0; |
