summaryrefslogtreecommitdiff
path: root/include/net/tcp.h
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-08-15 15:04:25 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2018-08-15 15:04:25 -0700
commit9a76aba02a37718242d7cdc294f0a3901928aa57 (patch)
tree2040d038f85d2120f21af83b0793efd5af1864e3 /include/net/tcp.h
parent0a957467c5fd46142bc9c52758ffc552d4c5e2f7 (diff)
parent26a1ccc6c117be8e33e0410fce8c5298b0015b99 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: "Highlights: - Gustavo A. R. Silva keeps working on the implicit switch fallthru changes. - Support 802.11ax High-Efficiency wireless in cfg80211 et al, From Luca Coelho. - Re-enable ASPM in r8169, from Kai-Heng Feng. - Add virtual XFRM interfaces, which avoids all of the limitations of existing IPSEC tunnels. From Steffen Klassert. - Convert GRO over to use a hash table, so that when we have many flows active we don't traverse a long list during accumluation. - Many new self tests for routing, TC, tunnels, etc. Too many contributors to mention them all, but I'm really happy to keep seeing this stuff. - Hardware timestamping support for dpaa_eth/fsl-fman from Yangbo Lu. - Lots of cleanups and fixes in L2TP code from Guillaume Nault. - Add IPSEC offload support to netdevsim, from Shannon Nelson. - Add support for slotting with non-uniform distribution to netem packet scheduler, from Yousuk Seung. - Add UDP GSO support to mlx5e, from Boris Pismenny. - Support offloading of Team LAG in NFP, from John Hurley. - Allow to configure TX queue selection based upon RX queue, from Amritha Nambiar. - Support ethtool ring size configuration in aquantia, from Anton Mikaev. - Support DSCP and flowlabel per-transport in SCTP, from Xin Long. - Support list based batching and stack traversal of SKBs, this is very exciting work. From Edward Cree. - Busyloop optimizations in vhost_net, from Toshiaki Makita. - Introduce the ETF qdisc, which allows time based transmissions. IGB can offload this in hardware. From Vinicius Costa Gomes. - Add parameter support to devlink, from Moshe Shemesh. - Several multiplication and division optimizations for BPF JIT in nfp driver, from Jiong Wang. - Lots of prepatory work to make more of the packet scheduler layer lockless, when possible, from Vlad Buslov. - Add ACK filter and NAT awareness to sch_cake packet scheduler, from Toke Høiland-Jørgensen. - Support regions and region snapshots in devlink, from Alex Vesker. - Allow to attach XDP programs to both HW and SW at the same time on a given device, with initial support in nfp. From Jakub Kicinski. - Add TLS RX offload and support in mlx5, from Ilya Lesokhin. - Use PHYLIB in r8169 driver, from Heiner Kallweit. - All sorts of changes to support Spectrum 2 in mlxsw driver, from Ido Schimmel. - PTP support in mv88e6xxx DSA driver, from Andrew Lunn. - Make TCP_USER_TIMEOUT socket option more accurate, from Jon Maxwell. - Support for templates in packet scheduler classifier, from Jiri Pirko. - IPV6 support in RDS, from Ka-Cheong Poon. - Native tproxy support in nf_tables, from Máté Eckl. - Maintain IP fragment queue in an rbtree, but optimize properly for in-order frags. From Peter Oskolkov. - Improvde handling of ACKs on hole repairs, from Yuchung Cheng" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1996 commits) bpf: test: fix spelling mistake "REUSEEPORT" -> "REUSEPORT" hv/netvsc: Fix NULL dereference at single queue mode fallback net: filter: mark expected switch fall-through xen-netfront: fix warn message as irq device name has '/' cxgb4: Add new T5 PCI device ids 0x50af and 0x50b0 net: dsa: mv88e6xxx: missing unlock on error path rds: fix building with IPV6=m inet/connection_sock: prefer _THIS_IP_ to current_text_addr net: dsa: mv88e6xxx: bitwise vs logical bug net: sock_diag: Fix spectre v1 gadget in __sock_diag_cmd() ieee802154: hwsim: using right kind of iteration net: hns3: Add vlan filter setting by ethtool command -K net: hns3: Set tx ring' tc info when netdev is up net: hns3: Remove tx ring BD len register in hns3_enet net: hns3: Fix desc num set to default when setting channel net: hns3: Fix for phy link issue when using marvell phy driver net: hns3: Fix for information of phydev lost problem when down/up net: hns3: Fix for command format parsing error in hclge_is_all_function_id_zero net: hns3: Add support for serdes loopback selftest bnxt_en: take coredump_record structure off stack ...
Diffstat (limited to 'include/net/tcp.h')
-rw-r--r--include/net/tcp.h58
1 files changed, 50 insertions, 8 deletions
diff --git a/include/net/tcp.h b/include/net/tcp.h
index cd3ecda9386a..d196901c9dba 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -36,6 +36,7 @@
#include <net/inet_hashtables.h>
#include <net/checksum.h>
#include <net/request_sock.h>
+#include <net/sock_reuseport.h>
#include <net/sock.h>
#include <net/snmp.h>
#include <net/ip.h>
@@ -473,19 +474,45 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb);
*/
static inline void tcp_synq_overflow(const struct sock *sk)
{
- unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
- unsigned long now = jiffies;
+ unsigned int last_overflow;
+ unsigned int now = jiffies;
- if (time_after(now, last_overflow + HZ))
+ if (sk->sk_reuseport) {
+ struct sock_reuseport *reuse;
+
+ reuse = rcu_dereference(sk->sk_reuseport_cb);
+ if (likely(reuse)) {
+ last_overflow = READ_ONCE(reuse->synq_overflow_ts);
+ if (time_after32(now, last_overflow + HZ))
+ WRITE_ONCE(reuse->synq_overflow_ts, now);
+ return;
+ }
+ }
+
+ last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
+ if (time_after32(now, last_overflow + HZ))
tcp_sk(sk)->rx_opt.ts_recent_stamp = now;
}
/* syncookies: no recent synqueue overflow on this listening socket? */
static inline bool tcp_synq_no_recent_overflow(const struct sock *sk)
{
- unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
+ unsigned int last_overflow;
+ unsigned int now = jiffies;
+
+ if (sk->sk_reuseport) {
+ struct sock_reuseport *reuse;
+
+ reuse = rcu_dereference(sk->sk_reuseport_cb);
+ if (likely(reuse)) {
+ last_overflow = READ_ONCE(reuse->synq_overflow_ts);
+ return time_after32(now, last_overflow +
+ TCP_SYNCOOKIE_VALID);
+ }
+ }
- return time_after(jiffies, last_overflow + TCP_SYNCOOKIE_VALID);
+ last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
+ return time_after32(now, last_overflow + TCP_SYNCOOKIE_VALID);
}
static inline u32 tcp_cookie_time(void)
@@ -963,6 +990,8 @@ struct rate_sample {
u32 prior_delivered; /* tp->delivered at "prior_mstamp" */
s32 delivered; /* number of packets delivered over interval */
long interval_us; /* time for tp->delivered to incr "delivered" */
+ u32 snd_interval_us; /* snd interval for delivered packets */
+ u32 rcv_interval_us; /* rcv interval for delivered packets */
long rtt_us; /* RTT of last (S)ACKed packet (or -1) */
int losses; /* number of packets marked lost upon ACK */
u32 acked_sacked; /* number of packets newly (S)ACKed upon ACK */
@@ -1194,6 +1223,17 @@ static inline bool tcp_is_cwnd_limited(const struct sock *sk)
return tp->is_cwnd_limited;
}
+/* BBR congestion control needs pacing.
+ * Same remark for SO_MAX_PACING_RATE.
+ * sch_fq packet scheduler is efficiently handling pacing,
+ * but is not always installed/used.
+ * Return true if TCP stack should pace packets itself.
+ */
+static inline bool tcp_needs_internal_pacing(const struct sock *sk)
+{
+ return smp_load_acquire(&sk->sk_pacing_status) == SK_PACING_NEEDED;
+}
+
/* Something is really bad, we could not queue an additional packet,
* because qdisc is full or receiver sent a 0 window.
* We do not want to add fuel to the fire, or abort too early,
@@ -1371,7 +1411,8 @@ static inline bool tcp_paws_check(const struct tcp_options_received *rx_opt,
{
if ((s32)(rx_opt->ts_recent - rx_opt->rcv_tsval) <= paws_win)
return true;
- if (unlikely(get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS))
+ if (unlikely(!time_before32(ktime_get_seconds(),
+ rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS)))
return true;
/*
* Some OSes send SYN and SYNACK messages with tsval=0 tsecr=0,
@@ -1401,7 +1442,8 @@ static inline bool tcp_paws_reject(const struct tcp_options_received *rx_opt,
However, we can relax time bounds for RST segments to MSL.
*/
- if (rst && get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_MSL)
+ if (rst && !time_before32(ktime_get_seconds(),
+ rx_opt->ts_recent_stamp + TCP_PAWS_MSL))
return false;
return true;
}
@@ -1787,7 +1829,7 @@ void tcp_v4_destroy_sock(struct sock *sk);
struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
netdev_features_t features);
-struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb);
+struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb);
int tcp_gro_complete(struct sk_buff *skb);
void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr);