summaryrefslogtreecommitdiff
path: root/net/rds
diff options
context:
space:
mode:
Diffstat (limited to 'net/rds')
-rw-r--r--net/rds/af_rds.c95
-rw-r--r--net/rds/ib_recv.c23
-rw-r--r--net/rds/rds.h11
-rw-r--r--net/rds/recv.c22
-rw-r--r--net/rds/send.c12
-rw-r--r--net/rds/stats.c3
6 files changed, 145 insertions, 21 deletions
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 2b969f99ef13..2977137c28eb 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -705,7 +705,7 @@ static int rds_create(struct net *net, struct socket *sock, int protocol,
if (sock->type != SOCK_SEQPACKET || protocol)
return -ESOCKTNOSUPPORT;
- sk = sk_alloc(net, AF_RDS, GFP_ATOMIC, &rds_proto, kern);
+ sk = sk_alloc(net, AF_RDS, GFP_KERNEL, &rds_proto, kern);
if (!sk)
return -ENOMEM;
@@ -741,6 +741,10 @@ static void rds_sock_inc_info(struct socket *sock, unsigned int len,
spin_lock_bh(&rds_sock_lock);
list_for_each_entry(rs, &rds_sock_list, rs_item) {
+ /* This option only supports IPv4 sockets. */
+ if (!ipv6_addr_v4mapped(&rs->rs_bound_addr))
+ continue;
+
read_lock(&rs->rs_recv_lock);
/* XXX too lazy to maintain counts.. */
@@ -762,21 +766,60 @@ static void rds_sock_inc_info(struct socket *sock, unsigned int len,
lens->each = sizeof(struct rds_info_message);
}
+#if IS_ENABLED(CONFIG_IPV6)
+static void rds6_sock_inc_info(struct socket *sock, unsigned int len,
+ struct rds_info_iterator *iter,
+ struct rds_info_lengths *lens)
+{
+ struct rds_incoming *inc;
+ unsigned int total = 0;
+ struct rds_sock *rs;
+
+ len /= sizeof(struct rds6_info_message);
+
+ spin_lock_bh(&rds_sock_lock);
+
+ list_for_each_entry(rs, &rds_sock_list, rs_item) {
+ read_lock(&rs->rs_recv_lock);
+
+ list_for_each_entry(inc, &rs->rs_recv_queue, i_item) {
+ total++;
+ if (total <= len)
+ rds6_inc_info_copy(inc, iter, &inc->i_saddr,
+ &rs->rs_bound_addr, 1);
+ }
+
+ read_unlock(&rs->rs_recv_lock);
+ }
+
+ spin_unlock_bh(&rds_sock_lock);
+
+ lens->nr = total;
+ lens->each = sizeof(struct rds6_info_message);
+}
+#endif
+
static void rds_sock_info(struct socket *sock, unsigned int len,
struct rds_info_iterator *iter,
struct rds_info_lengths *lens)
{
struct rds_info_socket sinfo;
+ unsigned int cnt = 0;
struct rds_sock *rs;
len /= sizeof(struct rds_info_socket);
spin_lock_bh(&rds_sock_lock);
- if (len < rds_sock_count)
+ if (len < rds_sock_count) {
+ cnt = rds_sock_count;
goto out;
+ }
list_for_each_entry(rs, &rds_sock_list, rs_item) {
+ /* This option only supports IPv4 sockets. */
+ if (!ipv6_addr_v4mapped(&rs->rs_bound_addr))
+ continue;
sinfo.sndbuf = rds_sk_sndbuf(rs);
sinfo.rcvbuf = rds_sk_rcvbuf(rs);
sinfo.bound_addr = rs->rs_bound_addr_v4;
@@ -786,15 +829,51 @@ static void rds_sock_info(struct socket *sock, unsigned int len,
sinfo.inum = sock_i_ino(rds_rs_to_sk(rs));
rds_info_copy(iter, &sinfo, sizeof(sinfo));
+ cnt++;
}
out:
- lens->nr = rds_sock_count;
+ lens->nr = cnt;
lens->each = sizeof(struct rds_info_socket);
spin_unlock_bh(&rds_sock_lock);
}
+#if IS_ENABLED(CONFIG_IPV6)
+static void rds6_sock_info(struct socket *sock, unsigned int len,
+ struct rds_info_iterator *iter,
+ struct rds_info_lengths *lens)
+{
+ struct rds6_info_socket sinfo6;
+ struct rds_sock *rs;
+
+ len /= sizeof(struct rds6_info_socket);
+
+ spin_lock_bh(&rds_sock_lock);
+
+ if (len < rds_sock_count)
+ goto out;
+
+ list_for_each_entry(rs, &rds_sock_list, rs_item) {
+ sinfo6.sndbuf = rds_sk_sndbuf(rs);
+ sinfo6.rcvbuf = rds_sk_rcvbuf(rs);
+ sinfo6.bound_addr = rs->rs_bound_addr;
+ sinfo6.connected_addr = rs->rs_conn_addr;
+ sinfo6.bound_port = rs->rs_bound_port;
+ sinfo6.connected_port = rs->rs_conn_port;
+ sinfo6.inum = sock_i_ino(rds_rs_to_sk(rs));
+
+ rds_info_copy(iter, &sinfo6, sizeof(sinfo6));
+ }
+
+ out:
+ lens->nr = rds_sock_count;
+ lens->each = sizeof(struct rds6_info_socket);
+
+ spin_unlock_bh(&rds_sock_lock);
+}
+#endif
+
static void rds_exit(void)
{
sock_unregister(rds_family_ops.family);
@@ -808,6 +887,10 @@ static void rds_exit(void)
rds_bind_lock_destroy();
rds_info_deregister_func(RDS_INFO_SOCKETS, rds_sock_info);
rds_info_deregister_func(RDS_INFO_RECV_MESSAGES, rds_sock_inc_info);
+#if IS_ENABLED(CONFIG_IPV6)
+ rds_info_deregister_func(RDS6_INFO_SOCKETS, rds6_sock_info);
+ rds_info_deregister_func(RDS6_INFO_RECV_MESSAGES, rds6_sock_inc_info);
+#endif
}
module_exit(rds_exit);
@@ -845,6 +928,10 @@ static int rds_init(void)
rds_info_register_func(RDS_INFO_SOCKETS, rds_sock_info);
rds_info_register_func(RDS_INFO_RECV_MESSAGES, rds_sock_inc_info);
+#if IS_ENABLED(CONFIG_IPV6)
+ rds_info_register_func(RDS6_INFO_SOCKETS, rds6_sock_info);
+ rds_info_register_func(RDS6_INFO_RECV_MESSAGES, rds6_sock_inc_info);
+#endif
goto out;
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 3cae88cbdaa0..a0f99bbf362c 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -385,6 +385,7 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp)
unsigned int posted = 0;
int ret = 0;
bool can_wait = !!(gfp & __GFP_DIRECT_RECLAIM);
+ bool must_wake = false;
u32 pos;
/* the goal here is to just make sure that someone, somewhere
@@ -405,6 +406,7 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp)
recv = &ic->i_recvs[pos];
ret = rds_ib_recv_refill_one(conn, recv, gfp);
if (ret) {
+ must_wake = true;
break;
}
@@ -423,6 +425,11 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp)
}
posted++;
+
+ if ((posted > 128 && need_resched()) || posted > 8192) {
+ must_wake = true;
+ break;
+ }
}
/* We're doing flow control - update the window. */
@@ -445,10 +452,13 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp)
* if we should requeue.
*/
if (rds_conn_up(conn) &&
- ((can_wait && rds_ib_ring_low(&ic->i_recv_ring)) ||
+ (must_wake ||
+ (can_wait && rds_ib_ring_low(&ic->i_recv_ring)) ||
rds_ib_ring_empty(&ic->i_recv_ring))) {
queue_delayed_work(rds_wq, &conn->c_recv_w, 1);
}
+ if (can_wait)
+ cond_resched();
}
/*
@@ -1038,9 +1048,14 @@ int rds_ib_recv_init(void)
si_meminfo(&si);
rds_ib_sysctl_max_recv_allocation = si.totalram / 3 * PAGE_SIZE / RDS_FRAG_SIZE;
- rds_ib_incoming_slab = kmem_cache_create("rds_ib_incoming",
- sizeof(struct rds_ib_incoming),
- 0, SLAB_HWCACHE_ALIGN, NULL);
+ rds_ib_incoming_slab =
+ kmem_cache_create_usercopy("rds_ib_incoming",
+ sizeof(struct rds_ib_incoming),
+ 0, SLAB_HWCACHE_ALIGN,
+ offsetof(struct rds_ib_incoming,
+ ii_inc.i_usercopy),
+ sizeof(struct rds_inc_usercopy),
+ NULL);
if (!rds_ib_incoming_slab)
goto out;
diff --git a/net/rds/rds.h b/net/rds/rds.h
index f0066d168499..53e86911773a 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -271,6 +271,12 @@ struct rds_ext_header_rdma_dest {
#define RDS_MSG_RX_END 2
#define RDS_MSG_RX_CMSG 3
+/* The following values are whitelisted for usercopy */
+struct rds_inc_usercopy {
+ rds_rdma_cookie_t rdma_cookie;
+ ktime_t rx_tstamp;
+};
+
struct rds_incoming {
refcount_t i_refcount;
struct list_head i_item;
@@ -280,8 +286,7 @@ struct rds_incoming {
unsigned long i_rx_jiffies;
struct in6_addr i_saddr;
- rds_rdma_cookie_t i_rdma_cookie;
- ktime_t i_rx_tstamp;
+ struct rds_inc_usercopy i_usercopy;
u64 i_rx_lat_trace[RDS_RX_MAX_TRACES];
};
@@ -717,7 +722,7 @@ struct rds_statistics {
uint64_t s_cong_send_blocked;
uint64_t s_recv_bytes_added_to_socket;
uint64_t s_recv_bytes_removed_from_socket;
-
+ uint64_t s_send_stuck_rm;
};
/* af_rds.c */
diff --git a/net/rds/recv.c b/net/rds/recv.c
index 853de4876088..7e451c82595b 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -47,8 +47,8 @@ void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
INIT_LIST_HEAD(&inc->i_item);
inc->i_conn = conn;
inc->i_saddr = *saddr;
- inc->i_rdma_cookie = 0;
- inc->i_rx_tstamp = ktime_set(0, 0);
+ inc->i_usercopy.rdma_cookie = 0;
+ inc->i_usercopy.rx_tstamp = ktime_set(0, 0);
memset(inc->i_rx_lat_trace, 0, sizeof(inc->i_rx_lat_trace));
}
@@ -62,8 +62,8 @@ void rds_inc_path_init(struct rds_incoming *inc, struct rds_conn_path *cp,
inc->i_conn = cp->cp_conn;
inc->i_conn_path = cp;
inc->i_saddr = *saddr;
- inc->i_rdma_cookie = 0;
- inc->i_rx_tstamp = ktime_set(0, 0);
+ inc->i_usercopy.rdma_cookie = 0;
+ inc->i_usercopy.rx_tstamp = ktime_set(0, 0);
}
EXPORT_SYMBOL_GPL(rds_inc_path_init);
@@ -186,7 +186,7 @@ static void rds_recv_incoming_exthdrs(struct rds_incoming *inc, struct rds_sock
case RDS_EXTHDR_RDMA_DEST:
/* We ignore the size for now. We could stash it
* somewhere and use it for error checking. */
- inc->i_rdma_cookie = rds_rdma_make_cookie(
+ inc->i_usercopy.rdma_cookie = rds_rdma_make_cookie(
be32_to_cpu(buffer.rdma_dest.h_rdma_rkey),
be32_to_cpu(buffer.rdma_dest.h_rdma_offset));
@@ -380,7 +380,7 @@ void rds_recv_incoming(struct rds_connection *conn, struct in6_addr *saddr,
be32_to_cpu(inc->i_hdr.h_len),
inc->i_hdr.h_dport);
if (sock_flag(sk, SOCK_RCVTSTAMP))
- inc->i_rx_tstamp = ktime_get_real();
+ inc->i_usercopy.rx_tstamp = ktime_get_real();
rds_inc_addref(inc);
inc->i_rx_lat_trace[RDS_MSG_RX_END] = local_clock();
list_add_tail(&inc->i_item, &rs->rs_recv_queue);
@@ -540,16 +540,18 @@ static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg,
{
int ret = 0;
- if (inc->i_rdma_cookie) {
+ if (inc->i_usercopy.rdma_cookie) {
ret = put_cmsg(msg, SOL_RDS, RDS_CMSG_RDMA_DEST,
- sizeof(inc->i_rdma_cookie), &inc->i_rdma_cookie);
+ sizeof(inc->i_usercopy.rdma_cookie),
+ &inc->i_usercopy.rdma_cookie);
if (ret)
goto out;
}
- if ((inc->i_rx_tstamp != 0) &&
+ if ((inc->i_usercopy.rx_tstamp != 0) &&
sock_flag(rds_rs_to_sk(rs), SOCK_RCVTSTAMP)) {
- struct __kernel_old_timeval tv = ns_to_kernel_old_timeval(inc->i_rx_tstamp);
+ struct __kernel_old_timeval tv =
+ ns_to_kernel_old_timeval(inc->i_usercopy.rx_tstamp);
if (!sock_flag(rds_rs_to_sk(rs), SOCK_TSTAMP_NEW)) {
ret = put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
diff --git a/net/rds/send.c b/net/rds/send.c
index 031b1e97a466..9ce552abf9e9 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -145,6 +145,7 @@ int rds_send_xmit(struct rds_conn_path *cp)
LIST_HEAD(to_be_dropped);
int batch_count;
unsigned long send_gen = 0;
+ int same_rm = 0;
restart:
batch_count = 0;
@@ -200,6 +201,17 @@ restart:
rm = cp->cp_xmit_rm;
+ if (!rm) {
+ same_rm = 0;
+ } else {
+ same_rm++;
+ if (same_rm >= 4096) {
+ rds_stats_inc(s_send_stuck_rm);
+ ret = -EAGAIN;
+ break;
+ }
+ }
+
/*
* If between sending messages, we can send a pending congestion
* map update.
diff --git a/net/rds/stats.c b/net/rds/stats.c
index 73be187d389e..9e87da43c004 100644
--- a/net/rds/stats.c
+++ b/net/rds/stats.c
@@ -76,6 +76,9 @@ static const char *const rds_stat_names[] = {
"cong_update_received",
"cong_send_error",
"cong_send_blocked",
+ "recv_bytes_added_to_sock",
+ "recv_bytes_freed_fromsock",
+ "send_stuck_rm",
};
void rds_stats_info_copy(struct rds_info_iterator *iter,