From 52452c542559ac980b48dbf22a30ee7fa0af507c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 19 Mar 2015 19:04:19 -0700 Subject: inet: drop prev pointer handling in request sock When request sock are put in ehash table, the whole notion of having a previous request to update dl_next is pointless. Also, following patch will get rid of big purge timer, so we want to delete a request sock without holding listener lock. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ddd0b1f25b96..19c3770f1e97 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -458,12 +458,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) } switch (sk->sk_state) { - struct request_sock *req, **prev; + struct request_sock *req; case TCP_LISTEN: if (sock_owned_by_user(sk)) goto out; - req = inet_csk_search_req(sk, &prev, th->dest, + req = inet_csk_search_req(sk, th->dest, iph->daddr, iph->saddr); if (!req) goto out; @@ -484,7 +484,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) * created socket, and POSIX does not want network * errors returned from accept(). */ - inet_csk_reqsk_queue_drop(sk, req, prev); + inet_csk_reqsk_queue_drop(sk, req); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); goto out; @@ -1392,15 +1392,14 @@ EXPORT_SYMBOL(tcp_v4_syn_recv_sock); static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) { - struct tcphdr *th = tcp_hdr(skb); + const struct tcphdr *th = tcp_hdr(skb); const struct iphdr *iph = ip_hdr(skb); + struct request_sock *req; struct sock *nsk; - struct request_sock **prev; - /* Find possible connection requests. */ - struct request_sock *req = inet_csk_search_req(sk, &prev, th->source, - iph->saddr, iph->daddr); + + req = inet_csk_search_req(sk, th->source, iph->saddr, iph->daddr); if (req) - return tcp_check_req(sk, skb, req, prev, false); + return tcp_check_req(sk, skb, req, false); nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr, th->source, iph->daddr, th->dest, inet_iif(skb)); -- cgit From fa76ce7328b289b6edd476e24eb52fd634261720 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 19 Mar 2015 19:04:20 -0700 Subject: inet: get rid of central tcp/dccp listener timer One of the major issue for TCP is the SYNACK rtx handling, done by inet_csk_reqsk_queue_prune(), fired by the keepalive timer of a TCP_LISTEN socket. This function runs for awful long times, with socket lock held, meaning that other cpus needing this lock have to spin for hundred of ms. SYNACK are sent in huge bursts, likely to cause severe drops anyway. This model was OK 15 years ago when memory was very tight. We now can afford to have a timer per request sock. Timer invocations no longer need to lock the listener, and can be run from all cpus in parallel. With following patch increasing somaxconn width to 32 bits, I tested a listener with more than 4 million active request sockets, and a steady SYNFLOOD of ~200,000 SYN per second. Host was sending ~830,000 SYNACK per second. This is ~100 times more what we could achieve before this patch. Later, we will get rid of the listener hash and use ehash instead. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 19c3770f1e97..5554b8f33d41 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -475,6 +475,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) if (seq != tcp_rsk(req)->snt_isn) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); + reqsk_put(req); goto out; } @@ -486,6 +487,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) */ inet_csk_reqsk_queue_drop(sk, req); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); + reqsk_put(req); goto out; case TCP_SYN_SENT: @@ -1398,8 +1400,11 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) struct sock *nsk; req = inet_csk_search_req(sk, th->source, iph->saddr, iph->daddr); - if (req) - return tcp_check_req(sk, skb, req, false); + if (req) { + nsk = tcp_check_req(sk, skb, req, false); + reqsk_put(req); + return nsk; + } nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr, th->source, iph->daddr, th->dest, inet_iif(skb)); @@ -2208,7 +2213,7 @@ static void get_openreq4(const struct request_sock *req, struct seq_file *f, int i, kuid_t uid) { const struct inet_request_sock *ireq = inet_rsk(req); - long delta = req->expires - jiffies; + long delta = req->rsk_timer.expires - jiffies; seq_printf(f, "%4d: %08X:%04X %08X:%04X" " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK", -- cgit