diff options
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r-- | net/ipv4/tcp.c | 91 |
1 files changed, 62 insertions, 29 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index bbb3d39c69af..6ab82e1a1d41 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -292,7 +292,7 @@ EXPORT_PER_CPU_SYMBOL_GPL(tcp_orphan_count); long sysctl_tcp_mem[3] __read_mostly; EXPORT_SYMBOL(sysctl_tcp_mem); -atomic_long_t tcp_memory_allocated; /* Current allocated memory. */ +atomic_long_t tcp_memory_allocated ____cacheline_aligned_in_smp; /* Current allocated memory. */ EXPORT_SYMBOL(tcp_memory_allocated); #if IS_ENABLED(CONFIG_SMC) @@ -303,7 +303,7 @@ EXPORT_SYMBOL(tcp_have_smc); /* * Current number of TCP sockets. */ -struct percpu_counter tcp_sockets_allocated; +struct percpu_counter tcp_sockets_allocated ____cacheline_aligned_in_smp; EXPORT_SYMBOL(tcp_sockets_allocated); /* @@ -456,7 +456,6 @@ void tcp_init_sock(struct sock *sk) WRITE_ONCE(sk->sk_rcvbuf, sock_net(sk)->ipv4.sysctl_tcp_rmem[1]); sk_sockets_allocated_inc(sk); - sk->sk_route_forced_caps = NETIF_F_GSO; } EXPORT_SYMBOL(tcp_init_sock); @@ -546,10 +545,11 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) if (state != TCP_SYN_SENT && (state != TCP_SYN_RECV || rcu_access_pointer(tp->fastopen_rsk))) { int target = sock_rcvlowat(sk, 0, INT_MAX); + u16 urg_data = READ_ONCE(tp->urg_data); - if (READ_ONCE(tp->urg_seq) == READ_ONCE(tp->copied_seq) && - !sock_flag(sk, SOCK_URGINLINE) && - tp->urg_data) + if (unlikely(urg_data) && + READ_ONCE(tp->urg_seq) == READ_ONCE(tp->copied_seq) && + !sock_flag(sk, SOCK_URGINLINE)) target++; if (tcp_stream_is_readable(sk, target)) @@ -574,7 +574,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) } else mask |= EPOLLOUT | EPOLLWRNORM; - if (tp->urg_data & TCP_URG_VALID) + if (urg_data & TCP_URG_VALID) mask |= EPOLLPRI; } else if (state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) { /* Active TCP fastopen socket with defer_connect @@ -608,7 +608,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) unlock_sock_fast(sk, slow); break; case SIOCATMARK: - answ = tp->urg_data && + answ = READ_ONCE(tp->urg_data) && READ_ONCE(tp->urg_seq) == READ_ONCE(tp->copied_seq); break; case SIOCOUTQ: @@ -1466,7 +1466,7 @@ static int tcp_recv_urg(struct sock *sk, struct msghdr *msg, int len, int flags) char c = tp->urg_data; if (!(flags & MSG_PEEK)) - tp->urg_data = TCP_URG_READ; + WRITE_ONCE(tp->urg_data, TCP_URG_READ); /* Read urgent data. */ msg->msg_flags |= MSG_OOB; @@ -1580,6 +1580,36 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied) tcp_send_ack(sk); } +void __sk_defer_free_flush(struct sock *sk) +{ + struct llist_node *head; + struct sk_buff *skb, *n; + + head = llist_del_all(&sk->defer_list); + llist_for_each_entry_safe(skb, n, head, ll_node) { + prefetch(n); + skb_mark_not_on_list(skb); + __kfree_skb(skb); + } +} +EXPORT_SYMBOL(__sk_defer_free_flush); + +static void tcp_eat_recv_skb(struct sock *sk, struct sk_buff *skb) +{ + __skb_unlink(skb, &sk->sk_receive_queue); + if (likely(skb->destructor == sock_rfree)) { + sock_rfree(skb); + skb->destructor = NULL; + skb->sk = NULL; + if (!skb_queue_empty(&sk->sk_receive_queue) || + !llist_empty(&sk->defer_list)) { + llist_add(&skb->ll_node, &sk->defer_list); + return; + } + } + __kfree_skb(skb); +} + static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) { struct sk_buff *skb; @@ -1599,7 +1629,7 @@ static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) * splitted a fat GRO packet, while we released socket lock * in skb_splice_bits() */ - sk_eat_skb(sk, skb); + tcp_eat_recv_skb(sk, skb); } return NULL; } @@ -1633,7 +1663,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, len = skb->len - offset; /* Stop reading if we hit a patch of urgent data */ - if (tp->urg_data) { + if (unlikely(tp->urg_data)) { u32 urg_offset = tp->urg_seq - seq; if (urg_offset < len) len = urg_offset; @@ -1665,11 +1695,11 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, continue; } if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) { - sk_eat_skb(sk, skb); + tcp_eat_recv_skb(sk, skb); ++seq; break; } - sk_eat_skb(sk, skb); + tcp_eat_recv_skb(sk, skb); if (!desc->count) break; WRITE_ONCE(tp->copied_seq, seq); @@ -2329,7 +2359,7 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, u32 offset; /* Are we at urgent data? Stop if we have read anything or have SIGURG pending. */ - if (tp->urg_data && tp->urg_seq == *seq) { + if (unlikely(tp->urg_data) && tp->urg_seq == *seq) { if (copied) break; if (signal_pending(current)) { @@ -2372,10 +2402,10 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, break; if (copied) { - if (sk->sk_err || + if (!timeo || + sk->sk_err || sk->sk_state == TCP_CLOSE || (sk->sk_shutdown & RCV_SHUTDOWN) || - !timeo || signal_pending(current)) break; } else { @@ -2409,13 +2439,12 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, } } - tcp_cleanup_rbuf(sk, copied); - if (copied >= target) { /* Do not sleep, just process backlog. */ - release_sock(sk); - lock_sock(sk); + __sk_flush_backlog(sk); } else { + tcp_cleanup_rbuf(sk, copied); + sk_defer_free_flush(sk); sk_wait_data(sk, &timeo, last); } @@ -2435,7 +2464,7 @@ found_ok_skb: used = len; /* Do we have urgent data here? */ - if (tp->urg_data) { + if (unlikely(tp->urg_data)) { u32 urg_offset = tp->urg_seq - *seq; if (urg_offset < used) { if (!urg_offset) { @@ -2469,8 +2498,8 @@ found_ok_skb: tcp_rcv_space_adjust(sk); skip_copy: - if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) { - tp->urg_data = 0; + if (unlikely(tp->urg_data) && after(tp->copied_seq, tp->urg_seq)) { + WRITE_ONCE(tp->urg_data, 0); tcp_fast_path_check(sk); } @@ -2485,14 +2514,14 @@ skip_copy: if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) goto found_fin_ok; if (!(flags & MSG_PEEK)) - sk_eat_skb(sk, skb); + tcp_eat_recv_skb(sk, skb); continue; found_fin_ok: /* Process the FIN. */ WRITE_ONCE(*seq, *seq + 1); if (!(flags & MSG_PEEK)) - sk_eat_skb(sk, skb); + tcp_eat_recv_skb(sk, skb); break; } while (len > 0); @@ -2534,6 +2563,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, ret = tcp_recvmsg_locked(sk, msg, len, nonblock, flags, &tss, &cmsg_flags); release_sock(sk); + sk_defer_free_flush(sk); if (cmsg_flags && ret >= 0) { if (cmsg_flags & TCP_CMSG_TS) @@ -2964,7 +2994,7 @@ int tcp_disconnect(struct sock *sk, int flags) tcp_clear_xmit_timers(sk); __skb_queue_purge(&sk->sk_receive_queue); WRITE_ONCE(tp->copied_seq, tp->rcv_nxt); - tp->urg_data = 0; + WRITE_ONCE(tp->urg_data, 0); tcp_write_queue_purge(sk); tcp_fastopen_active_disable_ofo_check(sk); skb_rbtree_purge(&tp->out_of_order_queue); @@ -3059,7 +3089,7 @@ int tcp_disconnect(struct sock *sk, int flags) sk->sk_frag.page = NULL; sk->sk_frag.offset = 0; } - + sk_defer_free_flush(sk); sk_error_report(sk); return 0; } @@ -3774,10 +3804,12 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) tcp_get_info_chrono_stats(tp, info); info->tcpi_segs_out = tp->segs_out; - info->tcpi_segs_in = tp->segs_in; + + /* segs_in and data_segs_in can be updated from tcp_segs_in() from BH */ + info->tcpi_segs_in = READ_ONCE(tp->segs_in); + info->tcpi_data_segs_in = READ_ONCE(tp->data_segs_in); info->tcpi_min_rtt = tcp_min_rtt(tp); - info->tcpi_data_segs_in = tp->data_segs_in; info->tcpi_data_segs_out = tp->data_segs_out; info->tcpi_delivery_rate_app_limited = tp->rate_app_limited ? 1 : 0; @@ -4186,6 +4218,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level, err = BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sk, level, optname, &zc, &len, err); release_sock(sk); + sk_defer_free_flush(sk); if (len >= offsetofend(struct tcp_zerocopy_receive, msg_flags)) goto zerocopy_rcv_cmsg; switch (len) { |