summaryrefslogtreecommitdiff
path: root/net/rds/tcp_send.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/rds/tcp_send.c')
-rw-r--r--net/rds/tcp_send.c103
1 files changed, 57 insertions, 46 deletions
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index 81cf5a4c5e40..7d284ac7e81a 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 Oracle. All rights reserved.
+ * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -34,53 +34,46 @@
#include <linux/in.h>
#include <net/tcp.h>
+#include "rds_single_path.h"
#include "rds.h"
#include "tcp.h"
-static void rds_tcp_cork(struct socket *sock, int val)
+void rds_tcp_xmit_path_prepare(struct rds_conn_path *cp)
{
- mm_segment_t oldfs;
+ struct rds_tcp_connection *tc = cp->cp_transport_data;
- oldfs = get_fs();
- set_fs(KERNEL_DS);
- sock->ops->setsockopt(sock, SOL_TCP, TCP_CORK, (char __user *)&val,
- sizeof(val));
- set_fs(oldfs);
+ tcp_sock_set_cork(tc->t_sock->sk, true);
}
-void rds_tcp_xmit_prepare(struct rds_connection *conn)
+void rds_tcp_xmit_path_complete(struct rds_conn_path *cp)
{
- struct rds_tcp_connection *tc = conn->c_transport_data;
+ struct rds_tcp_connection *tc = cp->cp_transport_data;
- rds_tcp_cork(tc->t_sock, 1);
-}
-
-void rds_tcp_xmit_complete(struct rds_connection *conn)
-{
- struct rds_tcp_connection *tc = conn->c_transport_data;
-
- rds_tcp_cork(tc->t_sock, 0);
+ tcp_sock_set_cork(tc->t_sock->sk, false);
}
/* the core send_sem serializes this with other xmit and shutdown */
static int rds_tcp_sendmsg(struct socket *sock, void *data, unsigned int len)
{
struct kvec vec = {
- .iov_base = data,
- .iov_len = len,
+ .iov_base = data,
+ .iov_len = len,
+ };
+ struct msghdr msg = {
+ .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL,
};
- struct msghdr msg = {
- .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL,
- };
return kernel_sendmsg(sock, &msg, &vec, 1, vec.iov_len);
}
/* the core send_sem serializes this with other xmit and shutdown */
int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
- unsigned int hdr_off, unsigned int sg, unsigned int off)
+ unsigned int hdr_off, unsigned int sg, unsigned int off)
{
- struct rds_tcp_connection *tc = conn->c_transport_data;
+ struct rds_conn_path *cp = rm->m_inc.i_conn_path;
+ struct rds_tcp_connection *tc = cp->cp_transport_data;
+ struct msghdr msg = {};
+ struct bio_vec bvec;
int done = 0;
int ret = 0;
@@ -89,16 +82,19 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
* m_ack_seq is set to the sequence number of the last byte of
* header and data. see rds_tcp_is_acked().
*/
- tc->t_last_sent_nxt = rds_tcp_snd_nxt(tc);
+ tc->t_last_sent_nxt = rds_tcp_write_seq(tc);
rm->m_ack_seq = tc->t_last_sent_nxt +
sizeof(struct rds_header) +
be32_to_cpu(rm->m_inc.i_hdr.h_len) - 1;
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
set_bit(RDS_MSG_HAS_ACK_SEQ, &rm->m_flags);
tc->t_last_expected_una = rm->m_ack_seq + 1;
+ if (test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags))
+ rm->m_inc.i_hdr.h_flags |= RDS_FLAG_RETRANSMITTED;
+
rdsdebug("rm %p tcp nxt %u ack_seq %llu\n",
- rm, rds_tcp_snd_nxt(tc),
+ rm, rds_tcp_write_seq(tc),
(unsigned long long)rm->m_ack_seq);
}
@@ -117,11 +113,16 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
}
while (sg < rm->data.op_nents) {
- ret = tc->t_sock->ops->sendpage(tc->t_sock,
- sg_page(&rm->data.op_sg[sg]),
- rm->data.op_sg[sg].offset + off,
- rm->data.op_sg[sg].length - off,
- MSG_DONTWAIT|MSG_NOSIGNAL);
+ msg.msg_flags = MSG_SPLICE_PAGES | MSG_DONTWAIT | MSG_NOSIGNAL;
+ if (sg + 1 < rm->data.op_nents)
+ msg.msg_flags |= MSG_MORE;
+
+ bvec_set_page(&bvec, sg_page(&rm->data.op_sg[sg]),
+ rm->data.op_sg[sg].length - off,
+ rm->data.op_sg[sg].offset + off);
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1,
+ rm->data.op_sg[sg].length - off);
+ ret = sock_sendmsg(tc->t_sock, &msg);
rdsdebug("tcp sendpage %p:%u:%u ret %d\n", (void *)sg_page(&rm->data.op_sg[sg]),
rm->data.op_sg[sg].offset + off, rm->data.op_sg[sg].length - off,
ret);
@@ -143,10 +144,17 @@ out:
rds_tcp_stats_inc(s_tcp_sndbuf_full);
ret = 0;
} else {
- printk(KERN_WARNING "RDS/tcp: send to %pI4 "
- "returned %d, disconnecting and reconnecting\n",
- &conn->c_faddr, ret);
- rds_conn_drop(conn);
+ /* No need to disconnect/reconnect if path_drop
+ * has already been triggered, because, e.g., of
+ * an incoming RST.
+ */
+ if (rds_conn_path_up(cp)) {
+ pr_warn("RDS/tcp: send to %pI6c on cp [%d]"
+ "returned %d, "
+ "disconnecting and reconnecting\n",
+ &conn->c_faddr, cp->cp_index, ret);
+ rds_conn_path_drop(cp, false);
+ }
}
}
if (done == 0)
@@ -171,30 +179,33 @@ static int rds_tcp_is_acked(struct rds_message *rm, uint64_t ack)
void rds_tcp_write_space(struct sock *sk)
{
void (*write_space)(struct sock *sk);
- struct rds_connection *conn;
+ struct rds_conn_path *cp;
struct rds_tcp_connection *tc;
- read_lock(&sk->sk_callback_lock);
- conn = sk->sk_user_data;
- if (!conn) {
+ read_lock_bh(&sk->sk_callback_lock);
+ cp = sk->sk_user_data;
+ if (!cp) {
write_space = sk->sk_write_space;
goto out;
}
- tc = conn->c_transport_data;
+ tc = cp->cp_transport_data;
rdsdebug("write_space for tc %p\n", tc);
write_space = tc->t_orig_write_space;
rds_tcp_stats_inc(s_tcp_write_space_calls);
rdsdebug("tcp una %u\n", rds_tcp_snd_una(tc));
tc->t_last_seen_una = rds_tcp_snd_una(tc);
- rds_send_drop_acked(conn, rds_tcp_snd_una(tc), rds_tcp_is_acked);
+ rds_send_path_drop_acked(cp, rds_tcp_snd_una(tc), rds_tcp_is_acked);
- if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf)
- queue_delayed_work(rds_wq, &conn->c_send_w, 0);
+ rcu_read_lock();
+ if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf &&
+ !rds_destroy_pending(cp->cp_conn))
+ queue_delayed_work(rds_wq, &cp->cp_send_w, 0);
+ rcu_read_unlock();
out:
- read_unlock(&sk->sk_callback_lock);
+ read_unlock_bh(&sk->sk_callback_lock);
/*
* write_space is only called when data leaves tcp's send queue if