diff options
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r-- | net/ipv4/tcp.c | 88 |
1 files changed, 61 insertions, 27 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 6edc441b3702..71a956fbfc55 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -302,8 +302,6 @@ EXPORT_PER_CPU_SYMBOL_GPL(tcp_tw_isn); long sysctl_tcp_mem[3] __read_mostly; EXPORT_IPV6_MOD(sysctl_tcp_mem); -atomic_long_t tcp_memory_allocated ____cacheline_aligned_in_smp; /* Current allocated memory. */ -EXPORT_IPV6_MOD(tcp_memory_allocated); DEFINE_PER_CPU(int, tcp_memory_per_cpu_fw_alloc); EXPORT_PER_CPU_SYMBOL_GPL(tcp_memory_per_cpu_fw_alloc); @@ -1059,6 +1057,7 @@ int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *copied, int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) { + struct net_devmem_dmabuf_binding *binding = NULL; struct tcp_sock *tp = tcp_sk(sk); struct ubuf_info *uarg = NULL; struct sk_buff *skb; @@ -1066,11 +1065,20 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) int flags, err, copied = 0; int mss_now = 0, size_goal, copied_syn = 0; int process_backlog = 0; + int sockc_err = 0; int zc = 0; long timeo; flags = msg->msg_flags; + sockc = (struct sockcm_cookie){ .tsflags = READ_ONCE(sk->sk_tsflags) }; + if (msg->msg_controllen) { + sockc_err = sock_cmsg_send(sk, msg, &sockc); + /* Don't return error until MSG_FASTOPEN has been processed; + * that may succeed even if the cmsg is invalid. + */ + } + if ((flags & MSG_ZEROCOPY) && size) { if (msg->msg_ubuf) { uarg = msg->msg_ubuf; @@ -1078,7 +1086,8 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) zc = MSG_ZEROCOPY; } else if (sock_flag(sk, SOCK_ZEROCOPY)) { skb = tcp_write_queue_tail(sk); - uarg = msg_zerocopy_realloc(sk, size, skb_zcopy(skb)); + uarg = msg_zerocopy_realloc(sk, size, skb_zcopy(skb), + !sockc_err && sockc.dmabuf_id); if (!uarg) { err = -ENOBUFS; goto out_err; @@ -1087,12 +1096,27 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) zc = MSG_ZEROCOPY; else uarg_to_msgzc(uarg)->zerocopy = 0; + + if (!sockc_err && sockc.dmabuf_id) { + binding = net_devmem_get_binding(sk, sockc.dmabuf_id); + if (IS_ERR(binding)) { + err = PTR_ERR(binding); + binding = NULL; + goto out_err; + } + } } } else if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES) && size) { if (sk->sk_route_caps & NETIF_F_SG) zc = MSG_SPLICE_PAGES; } + if (!sockc_err && sockc.dmabuf_id && + (!(flags & MSG_ZEROCOPY) || !sock_flag(sk, SOCK_ZEROCOPY))) { + err = -EINVAL; + goto out_err; + } + if (unlikely(flags & MSG_FASTOPEN || inet_test_bit(DEFER_CONNECT, sk)) && !tp->repair) { @@ -1131,13 +1155,9 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) /* 'common' sending to sendq */ } - sockc = (struct sockcm_cookie) { .tsflags = READ_ONCE(sk->sk_tsflags)}; - if (msg->msg_controllen) { - err = sock_cmsg_send(sk, msg, &sockc); - if (unlikely(err)) { - err = -EINVAL; - goto out_err; - } + if (sockc_err) { + err = sockc_err; + goto out_err; } /* This should be in poll */ @@ -1154,12 +1174,14 @@ restart: goto do_error; while (msg_data_left(msg)) { - ssize_t copy = 0; + int copy = 0; skb = tcp_write_queue_tail(sk); if (skb) copy = size_goal - skb->len; + trace_tcp_sendmsg_locked(sk, msg, skb, size_goal); + if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) { bool first_skb; @@ -1256,7 +1278,8 @@ new_segment: goto wait_for_space; } - err = skb_zerocopy_iter_stream(sk, skb, msg, copy, uarg); + err = skb_zerocopy_iter_stream(sk, skb, msg, copy, uarg, + binding); if (err == -EMSGSIZE || err == -EEXIST) { tcp_mark_push(tp, skb); goto new_segment; @@ -1272,8 +1295,7 @@ new_segment: if (!copy) goto wait_for_space; - err = skb_splice_from_iter(skb, &msg->msg_iter, copy, - sk->sk_allocation); + err = skb_splice_from_iter(skb, &msg->msg_iter, copy); if (err < 0) { if (err == -EMSGSIZE) { tcp_mark_push(tp, skb); @@ -1337,6 +1359,8 @@ out_nopush: /* msg->msg_ubuf is pinned by the caller so we don't take extra refs */ if (uarg && !msg->msg_ubuf) net_zcopy_put(uarg); + if (binding) + net_devmem_dmabuf_binding_put(binding); return copied + copied_syn; do_error: @@ -1354,6 +1378,9 @@ out_err: sk->sk_write_space(sk); tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED); } + if (binding) + net_devmem_dmabuf_binding_put(binding); + return err; } EXPORT_SYMBOL_GPL(tcp_sendmsg_locked); @@ -3407,6 +3434,7 @@ int tcp_disconnect(struct sock *sk, int flags) tp->rack.reo_wnd_persist = 0; tp->rack.dsack_seen = 0; tp->syn_data_acked = 0; + tp->syn_fastopen_child = 0; tp->rx_opt.saw_tstamp = 0; tp->rx_opt.dsack = 0; tp->rx_opt.num_sacks = 0; @@ -3723,6 +3751,19 @@ int tcp_set_window_clamp(struct sock *sk, int val) return 0; } +int tcp_sock_set_maxseg(struct sock *sk, int val) +{ + /* Values greater than interface MTU won't take effect. However + * at the point when this call is done we typically don't yet + * know which interface is going to be used + */ + if (val && (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW)) + return -EINVAL; + + tcp_sk(sk)->rx_opt.user_mss = val; + return 0; +} + /* * Socket option code for TCP. */ @@ -3855,15 +3896,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname, switch (optname) { case TCP_MAXSEG: - /* Values greater than interface MTU won't take effect. However - * at the point when this call is done we typically don't yet - * know which interface is going to be used - */ - if (val && (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW)) { - err = -EINVAL; - break; - } - tp->rx_opt.user_mss = val; + err = tcp_sock_set_maxseg(sk, val); break; case TCP_NODELAY: @@ -4162,6 +4195,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_options |= TCPI_OPT_SYN_DATA; if (tp->tcp_usec_ts) info->tcpi_options |= TCPI_OPT_USEC_TS; + if (tp->syn_fastopen_child) + info->tcpi_options |= TCPI_OPT_TFO_CHILD; info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto); info->tcpi_ato = jiffies_to_usecs(min_t(u32, icsk->icsk_ack.ato, @@ -5020,9 +5055,8 @@ static void __init tcp_struct_check(void) CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, reordering); CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, notsent_lowat); CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, gso_segs); - CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, lost_skb_hint); CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, retransmit_skb_hint); - CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_tx, 40); + CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_tx, 32); /* TXRX read-mostly hotpath cache lines */ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, tsoffset); @@ -5194,7 +5228,7 @@ void __init tcp_init(void) /* Set per-socket limits to no more than 1/128 the pressure threshold */ limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7); max_wshare = min(4UL*1024*1024, limit); - max_rshare = min(6UL*1024*1024, limit); + max_rshare = min(32UL*1024*1024, limit); init_net.ipv4.sysctl_tcp_wmem[0] = PAGE_SIZE; init_net.ipv4.sysctl_tcp_wmem[1] = 16*1024; @@ -5210,6 +5244,6 @@ void __init tcp_init(void) tcp_v4_init(); tcp_metrics_init(); BUG_ON(tcp_register_congestion_control(&tcp_reno) != 0); - tcp_tasklet_init(); + tcp_tsq_work_init(); mptcp_init(); } |