summaryrefslogtreecommitdiff
path: root/net/ipv4/tcp.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r--net/ipv4/tcp.c88
1 files changed, 61 insertions, 27 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 6edc441b3702..71a956fbfc55 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -302,8 +302,6 @@ EXPORT_PER_CPU_SYMBOL_GPL(tcp_tw_isn);
long sysctl_tcp_mem[3] __read_mostly;
EXPORT_IPV6_MOD(sysctl_tcp_mem);
-atomic_long_t tcp_memory_allocated ____cacheline_aligned_in_smp; /* Current allocated memory. */
-EXPORT_IPV6_MOD(tcp_memory_allocated);
DEFINE_PER_CPU(int, tcp_memory_per_cpu_fw_alloc);
EXPORT_PER_CPU_SYMBOL_GPL(tcp_memory_per_cpu_fw_alloc);
@@ -1059,6 +1057,7 @@ int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *copied,
int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
{
+ struct net_devmem_dmabuf_binding *binding = NULL;
struct tcp_sock *tp = tcp_sk(sk);
struct ubuf_info *uarg = NULL;
struct sk_buff *skb;
@@ -1066,11 +1065,20 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
int flags, err, copied = 0;
int mss_now = 0, size_goal, copied_syn = 0;
int process_backlog = 0;
+ int sockc_err = 0;
int zc = 0;
long timeo;
flags = msg->msg_flags;
+ sockc = (struct sockcm_cookie){ .tsflags = READ_ONCE(sk->sk_tsflags) };
+ if (msg->msg_controllen) {
+ sockc_err = sock_cmsg_send(sk, msg, &sockc);
+ /* Don't return error until MSG_FASTOPEN has been processed;
+ * that may succeed even if the cmsg is invalid.
+ */
+ }
+
if ((flags & MSG_ZEROCOPY) && size) {
if (msg->msg_ubuf) {
uarg = msg->msg_ubuf;
@@ -1078,7 +1086,8 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
zc = MSG_ZEROCOPY;
} else if (sock_flag(sk, SOCK_ZEROCOPY)) {
skb = tcp_write_queue_tail(sk);
- uarg = msg_zerocopy_realloc(sk, size, skb_zcopy(skb));
+ uarg = msg_zerocopy_realloc(sk, size, skb_zcopy(skb),
+ !sockc_err && sockc.dmabuf_id);
if (!uarg) {
err = -ENOBUFS;
goto out_err;
@@ -1087,12 +1096,27 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
zc = MSG_ZEROCOPY;
else
uarg_to_msgzc(uarg)->zerocopy = 0;
+
+ if (!sockc_err && sockc.dmabuf_id) {
+ binding = net_devmem_get_binding(sk, sockc.dmabuf_id);
+ if (IS_ERR(binding)) {
+ err = PTR_ERR(binding);
+ binding = NULL;
+ goto out_err;
+ }
+ }
}
} else if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES) && size) {
if (sk->sk_route_caps & NETIF_F_SG)
zc = MSG_SPLICE_PAGES;
}
+ if (!sockc_err && sockc.dmabuf_id &&
+ (!(flags & MSG_ZEROCOPY) || !sock_flag(sk, SOCK_ZEROCOPY))) {
+ err = -EINVAL;
+ goto out_err;
+ }
+
if (unlikely(flags & MSG_FASTOPEN ||
inet_test_bit(DEFER_CONNECT, sk)) &&
!tp->repair) {
@@ -1131,13 +1155,9 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
/* 'common' sending to sendq */
}
- sockc = (struct sockcm_cookie) { .tsflags = READ_ONCE(sk->sk_tsflags)};
- if (msg->msg_controllen) {
- err = sock_cmsg_send(sk, msg, &sockc);
- if (unlikely(err)) {
- err = -EINVAL;
- goto out_err;
- }
+ if (sockc_err) {
+ err = sockc_err;
+ goto out_err;
}
/* This should be in poll */
@@ -1154,12 +1174,14 @@ restart:
goto do_error;
while (msg_data_left(msg)) {
- ssize_t copy = 0;
+ int copy = 0;
skb = tcp_write_queue_tail(sk);
if (skb)
copy = size_goal - skb->len;
+ trace_tcp_sendmsg_locked(sk, msg, skb, size_goal);
+
if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) {
bool first_skb;
@@ -1256,7 +1278,8 @@ new_segment:
goto wait_for_space;
}
- err = skb_zerocopy_iter_stream(sk, skb, msg, copy, uarg);
+ err = skb_zerocopy_iter_stream(sk, skb, msg, copy, uarg,
+ binding);
if (err == -EMSGSIZE || err == -EEXIST) {
tcp_mark_push(tp, skb);
goto new_segment;
@@ -1272,8 +1295,7 @@ new_segment:
if (!copy)
goto wait_for_space;
- err = skb_splice_from_iter(skb, &msg->msg_iter, copy,
- sk->sk_allocation);
+ err = skb_splice_from_iter(skb, &msg->msg_iter, copy);
if (err < 0) {
if (err == -EMSGSIZE) {
tcp_mark_push(tp, skb);
@@ -1337,6 +1359,8 @@ out_nopush:
/* msg->msg_ubuf is pinned by the caller so we don't take extra refs */
if (uarg && !msg->msg_ubuf)
net_zcopy_put(uarg);
+ if (binding)
+ net_devmem_dmabuf_binding_put(binding);
return copied + copied_syn;
do_error:
@@ -1354,6 +1378,9 @@ out_err:
sk->sk_write_space(sk);
tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
}
+ if (binding)
+ net_devmem_dmabuf_binding_put(binding);
+
return err;
}
EXPORT_SYMBOL_GPL(tcp_sendmsg_locked);
@@ -3407,6 +3434,7 @@ int tcp_disconnect(struct sock *sk, int flags)
tp->rack.reo_wnd_persist = 0;
tp->rack.dsack_seen = 0;
tp->syn_data_acked = 0;
+ tp->syn_fastopen_child = 0;
tp->rx_opt.saw_tstamp = 0;
tp->rx_opt.dsack = 0;
tp->rx_opt.num_sacks = 0;
@@ -3723,6 +3751,19 @@ int tcp_set_window_clamp(struct sock *sk, int val)
return 0;
}
+int tcp_sock_set_maxseg(struct sock *sk, int val)
+{
+ /* Values greater than interface MTU won't take effect. However
+ * at the point when this call is done we typically don't yet
+ * know which interface is going to be used
+ */
+ if (val && (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW))
+ return -EINVAL;
+
+ tcp_sk(sk)->rx_opt.user_mss = val;
+ return 0;
+}
+
/*
* Socket option code for TCP.
*/
@@ -3855,15 +3896,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
switch (optname) {
case TCP_MAXSEG:
- /* Values greater than interface MTU won't take effect. However
- * at the point when this call is done we typically don't yet
- * know which interface is going to be used
- */
- if (val && (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW)) {
- err = -EINVAL;
- break;
- }
- tp->rx_opt.user_mss = val;
+ err = tcp_sock_set_maxseg(sk, val);
break;
case TCP_NODELAY:
@@ -4162,6 +4195,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_options |= TCPI_OPT_SYN_DATA;
if (tp->tcp_usec_ts)
info->tcpi_options |= TCPI_OPT_USEC_TS;
+ if (tp->syn_fastopen_child)
+ info->tcpi_options |= TCPI_OPT_TFO_CHILD;
info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto);
info->tcpi_ato = jiffies_to_usecs(min_t(u32, icsk->icsk_ack.ato,
@@ -5020,9 +5055,8 @@ static void __init tcp_struct_check(void)
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, reordering);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, notsent_lowat);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, gso_segs);
- CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, lost_skb_hint);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, retransmit_skb_hint);
- CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_tx, 40);
+ CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_tx, 32);
/* TXRX read-mostly hotpath cache lines */
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, tsoffset);
@@ -5194,7 +5228,7 @@ void __init tcp_init(void)
/* Set per-socket limits to no more than 1/128 the pressure threshold */
limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7);
max_wshare = min(4UL*1024*1024, limit);
- max_rshare = min(6UL*1024*1024, limit);
+ max_rshare = min(32UL*1024*1024, limit);
init_net.ipv4.sysctl_tcp_wmem[0] = PAGE_SIZE;
init_net.ipv4.sysctl_tcp_wmem[1] = 16*1024;
@@ -5210,6 +5244,6 @@ void __init tcp_init(void)
tcp_v4_init();
tcp_metrics_init();
BUG_ON(tcp_register_congestion_control(&tcp_reno) != 0);
- tcp_tasklet_init();
+ tcp_tsq_work_init();
mptcp_init();
}