diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-12 07:54:15 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-12 07:54:15 -0800 |
commit | ce38aa9cbed3d109355b0169b520362c409c0541 (patch) | |
tree | 621511c34edd22ac30ca12f78f0d478245b4ccd7 /net/ipv4/tcp.c | |
parent | 69973b830859bc6529a7a0468ba0d80ee5117826 (diff) | |
parent | d84701ecbcd6ad63faa7a9c18ad670d1c4d561c0 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
1) Platform regulatory domain support for ath10k, from Bartosz
Markowski.
2) Centralize min/max MTU checking, thus removing tons of duplicated
code all of the the various drivers. From Jarod Wilson.
3) Support ingress actions in act_mirred, from Shmulik Ladkani.
4) Improve device adjacency tracking, from David Ahern.
5) Add support for LED triggers on PHY link state changes, from Zach
Brown.
6) Improve UDP socket memory accounting, from Paolo Abeni.
7) Set SK_MEM_QUANTUM to a fixed size of 4096, instead of PAGE_SIZE.
From Eric Dumazet.
8) Collapse TCP SKBs at retransmit time even if the right side SKB has
frags. Also from Eric Dumazet.
9) Add IP_RECVFRAGSIZE and IPV6_RECVFRAGSIZE cmsgs, from Willem de
Bruijn.
10) Support routing by UID, from Lorenzo Colitti.
11) Handle L3 domain binding (ie. VRF) for RAW sockets, from David
Ahern.
12) tcp_get_info() can run lockless, from Eric Dumazet.
13) 4-tuple UDP hashing in SFC driver, from Edward Cree.
14) Avoid reorders in GRO code, from Eric Dumazet.
15) IPV6 Segment Routing support, from David Lebrun.
16) Support MPLS push and pop for L3 packets in openvswitch, from Jiri
Benc.
17) Add LRU datastructure support for BPF, Martin KaFai Lau.
18) VF support in liquidio driver, from Raghu Vatsavayi.
19) Multiqueue support in alx driver, from Tobias Regnery.
20) Networking cgroup BPF support, from Daniel Mack.
21) TCP chronograph measurements, from Francis Yan.
22) XDP support for qed driver, from Yuval Mintz.
23) BPF based lwtunnels, from Thomas Graf.
24) Consistent FIB dumping to offloading drivers, from Ido Schimmel.
25) Many optimizations for UDP under high load, from Eric Dumazet.
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1522 commits)
netfilter: nft_counter: rework atomic dump and reset
e1000: use disable_hardirq() for e1000_netpoll()
i40e: don't truncate match_method assignment
net: ethernet: ti: netcp: add support of cpts
net: phy: phy drivers should not set SUPPORTED_[Asym_]Pause
net: l2tp: ppp: change PPPOL2TP_MSG_* => L2TP_MSG_*
net: l2tp: deprecate PPPOL2TP_MSG_* in favour of L2TP_MSG_*
net: l2tp: export debug flags to UAPI
net: ethernet: stmmac: remove private tx queue lock
net: ethernet: sxgbe: remove private tx queue lock
net: bridge: shorten ageing time on topology change
net: bridge: add helper to set topology change
net: bridge: add helper to offload ageing time
net: nicvf: use new api ethtool_{get|set}_link_ksettings
net: ethernet: ti: cpsw: sync rates for channels in dual emac mode
net: ethernet: ti: cpsw: re-split res only when speed is changed
net: ethernet: ti: cpsw: combine budget and weight split and check
net: ethernet: ti: cpsw: don't start queue twice
net: ethernet: ti: cpsw: use same macros to get active slave
net: mvneta: select GENERIC_ALLOCATOR
...
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r-- | net/ipv4/tcp.c | 116 |
1 files changed, 83 insertions, 33 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 814af89c1bd3..1ef3165114ba 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -279,7 +279,6 @@ #include <asm/uaccess.h> #include <asm/ioctls.h> -#include <asm/unaligned.h> #include <net/busy_poll.h> int sysctl_tcp_min_tso_segs __read_mostly = 2; @@ -405,7 +404,6 @@ void tcp_init_sock(struct sock *sk) tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tp->snd_cwnd_clamp = ~0; tp->mss_cache = TCP_MSS_DEFAULT; - u64_stats_init(&tp->syncp); tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering; tcp_enable_early_retrans(tp); @@ -665,9 +663,9 @@ static void tcp_push(struct sock *sk, int flags, int mss_now, if (tcp_should_autocork(sk, skb, size_goal)) { /* avoid atomic op if TSQ_THROTTLED bit is already set */ - if (!test_bit(TSQ_THROTTLED, &tp->tsq_flags)) { + if (!test_bit(TSQ_THROTTLED, &sk->sk_tsq_flags)) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAUTOCORKING); - set_bit(TSQ_THROTTLED, &tp->tsq_flags); + set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags); } /* It is possible TX completion already happened * before we set TSQ_THROTTLED. @@ -998,8 +996,11 @@ do_error: goto out; out_err: /* make sure we wake any epoll edge trigger waiter */ - if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN)) + if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && + err == -EAGAIN)) { sk->sk_write_space(sk); + tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED); + } return sk_stream_error(sk, flags, err); } @@ -1333,8 +1334,11 @@ do_error: out_err: err = sk_stream_error(sk, flags, err); /* make sure we wake any epoll edge trigger waiter */ - if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN)) + if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && + err == -EAGAIN)) { sk->sk_write_space(sk); + tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED); + } release_sock(sk); return err; } @@ -2302,7 +2306,7 @@ EXPORT_SYMBOL(tcp_disconnect); static inline bool tcp_can_repair_sock(const struct sock *sk) { return ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN) && - ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_ESTABLISHED)); + (sk->sk_state != TCP_LISTEN); } static int tcp_repair_set_window(struct tcp_sock *tp, char __user *optbuf, int len) @@ -2704,15 +2708,33 @@ int compat_tcp_setsockopt(struct sock *sk, int level, int optname, EXPORT_SYMBOL(compat_tcp_setsockopt); #endif +static void tcp_get_info_chrono_stats(const struct tcp_sock *tp, + struct tcp_info *info) +{ + u64 stats[__TCP_CHRONO_MAX], total = 0; + enum tcp_chrono i; + + for (i = TCP_CHRONO_BUSY; i < __TCP_CHRONO_MAX; ++i) { + stats[i] = tp->chrono_stat[i - 1]; + if (i == tp->chrono_type) + stats[i] += tcp_time_stamp - tp->chrono_start; + stats[i] *= USEC_PER_SEC / HZ; + total += stats[i]; + } + + info->tcpi_busy_time = total; + info->tcpi_rwnd_limited = stats[TCP_CHRONO_RWND_LIMITED]; + info->tcpi_sndbuf_limited = stats[TCP_CHRONO_SNDBUF_LIMITED]; +} + /* Return information about state of tcp endpoint in API format. */ void tcp_get_info(struct sock *sk, struct tcp_info *info) { const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */ const struct inet_connection_sock *icsk = inet_csk(sk); u32 now = tcp_time_stamp, intv; - unsigned int start; - int notsent_bytes; u64 rate64; + bool slow; u32 rate; memset(info, 0, sizeof(*info)); @@ -2721,6 +2743,27 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_state = sk_state_load(sk); + /* Report meaningful fields for all TCP states, including listeners */ + rate = READ_ONCE(sk->sk_pacing_rate); + rate64 = rate != ~0U ? rate : ~0ULL; + info->tcpi_pacing_rate = rate64; + + rate = READ_ONCE(sk->sk_max_pacing_rate); + rate64 = rate != ~0U ? rate : ~0ULL; + info->tcpi_max_pacing_rate = rate64; + + info->tcpi_reordering = tp->reordering; + info->tcpi_snd_cwnd = tp->snd_cwnd; + + if (info->tcpi_state == TCP_LISTEN) { + /* listeners aliased fields : + * tcpi_unacked -> Number of children ready for accept() + * tcpi_sacked -> max backlog + */ + info->tcpi_unacked = sk->sk_ack_backlog; + info->tcpi_sacked = sk->sk_max_ack_backlog; + return; + } info->tcpi_ca_state = icsk->icsk_ca_state; info->tcpi_retransmits = icsk->icsk_retransmits; info->tcpi_probes = icsk->icsk_probes_out; @@ -2748,13 +2791,9 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_snd_mss = tp->mss_cache; info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss; - if (info->tcpi_state == TCP_LISTEN) { - info->tcpi_unacked = sk->sk_ack_backlog; - info->tcpi_sacked = sk->sk_max_ack_backlog; - } else { - info->tcpi_unacked = tp->packets_out; - info->tcpi_sacked = tp->sacked_out; - } + info->tcpi_unacked = tp->packets_out; + info->tcpi_sacked = tp->sacked_out; + info->tcpi_lost = tp->lost_out; info->tcpi_retrans = tp->retrans_out; info->tcpi_fackets = tp->fackets_out; @@ -2768,34 +2807,25 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_rtt = tp->srtt_us >> 3; info->tcpi_rttvar = tp->mdev_us >> 2; info->tcpi_snd_ssthresh = tp->snd_ssthresh; - info->tcpi_snd_cwnd = tp->snd_cwnd; info->tcpi_advmss = tp->advmss; - info->tcpi_reordering = tp->reordering; info->tcpi_rcv_rtt = jiffies_to_usecs(tp->rcv_rtt_est.rtt)>>3; info->tcpi_rcv_space = tp->rcvq_space.space; info->tcpi_total_retrans = tp->total_retrans; - rate = READ_ONCE(sk->sk_pacing_rate); - rate64 = rate != ~0U ? rate : ~0ULL; - put_unaligned(rate64, &info->tcpi_pacing_rate); + slow = lock_sock_fast(sk); - rate = READ_ONCE(sk->sk_max_pacing_rate); - rate64 = rate != ~0U ? rate : ~0ULL; - put_unaligned(rate64, &info->tcpi_max_pacing_rate); + info->tcpi_bytes_acked = tp->bytes_acked; + info->tcpi_bytes_received = tp->bytes_received; + info->tcpi_notsent_bytes = max_t(int, 0, tp->write_seq - tp->snd_nxt); + tcp_get_info_chrono_stats(tp, info); + + unlock_sock_fast(sk, slow); - do { - start = u64_stats_fetch_begin_irq(&tp->syncp); - put_unaligned(tp->bytes_acked, &info->tcpi_bytes_acked); - put_unaligned(tp->bytes_received, &info->tcpi_bytes_received); - } while (u64_stats_fetch_retry_irq(&tp->syncp, start)); info->tcpi_segs_out = tp->segs_out; info->tcpi_segs_in = tp->segs_in; - notsent_bytes = READ_ONCE(tp->write_seq) - READ_ONCE(tp->snd_nxt); - info->tcpi_notsent_bytes = max(0, notsent_bytes); - info->tcpi_min_rtt = tcp_min_rtt(tp); info->tcpi_data_segs_in = tp->data_segs_in; info->tcpi_data_segs_out = tp->data_segs_out; @@ -2806,11 +2836,31 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) if (rate && intv) { rate64 = (u64)rate * tp->mss_cache * USEC_PER_SEC; do_div(rate64, intv); - put_unaligned(rate64, &info->tcpi_delivery_rate); + info->tcpi_delivery_rate = rate64; } } EXPORT_SYMBOL_GPL(tcp_get_info); +struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk) +{ + const struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *stats; + struct tcp_info info; + + stats = alloc_skb(3 * nla_total_size_64bit(sizeof(u64)), GFP_ATOMIC); + if (!stats) + return NULL; + + tcp_get_info_chrono_stats(tp, &info); + nla_put_u64_64bit(stats, TCP_NLA_BUSY, + info.tcpi_busy_time, TCP_NLA_PAD); + nla_put_u64_64bit(stats, TCP_NLA_RWND_LIMITED, + info.tcpi_rwnd_limited, TCP_NLA_PAD); + nla_put_u64_64bit(stats, TCP_NLA_SNDBUF_LIMITED, + info.tcpi_sndbuf_limited, TCP_NLA_PAD); + return stats; +} + static int do_tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { |