From 4ce7e93cb3fe87db5b700050172dc41def9834b3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 1 Apr 2016 08:52:22 -0700 Subject: tcp: rate limit ACK sent by SYN_RECV request sockets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Attackers like to use SYNFLOOD targeting one 5-tuple, as they hit a single RX queue (and cpu) on the victim. If they use random sequence numbers in their SYN, we detect they do not match the expected window and send back an ACK. This patch adds a rate limitation, so that the effect of such attacks is limited to ingress only. We roughly double our ability to absorb such attacks. Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Cc: Neal Cardwell Cc: Maciej Żenczykowski Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_minisocks.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net/ipv4/tcp_minisocks.c') diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index acb366dd61e6..4c53e7c86586 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -704,7 +704,10 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, tcp_rsk(req)->rcv_nxt, tcp_rsk(req)->rcv_nxt + req->rsk_rcv_wnd)) { /* Out of window: send ACK and drop. */ - if (!(flg & TCP_FLAG_RST)) + if (!(flg & TCP_FLAG_RST) && + !tcp_oow_rate_limited(sock_net(sk), skb, + LINUX_MIB_TCPACKSKIPPEDSYNRECV, + &tcp_rsk(req)->last_oow_ack_time)) req->rsk_ops->send_ack(sk, skb, req); if (paws_reject) NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); -- cgit From 90bbcc608369a1b46089b0f5aa22b8ea31ffa12e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 27 Apr 2016 16:44:32 -0700 Subject: net: tcp: rename TCP_INC_STATS_BH Rename TCP_INC_STATS_BH() to __TCP_INC_STATS() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_minisocks.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv4/tcp_minisocks.c') diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 4c53e7c86586..0be6bfeab553 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -545,7 +545,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->rack.mstamp.v64 = 0; newtp->rack.advanced = 0; - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS); + __TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS); } return newsk; } @@ -729,7 +729,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, * "fourth, check the SYN bit" */ if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) { - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS); + __TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS); goto embryonic_reset; } -- cgit From 02a1d6e7a6bb025a77da77012190e1efc1970f1c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 27 Apr 2016 16:44:39 -0700 Subject: net: rename NET_{ADD|INC}_STATS_BH() Rename NET_INC_STATS_BH() to __NET_INC_STATS() and NET_ADD_STATS_BH() to __NET_ADD_STATS() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_minisocks.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net/ipv4/tcp_minisocks.c') diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 0be6bfeab553..ffbfecdae471 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -235,7 +235,7 @@ kill: } if (paws_reject) - NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_PAWSESTABREJECTED); + __NET_INC_STATS(twsk_net(tw), LINUX_MIB_PAWSESTABREJECTED); if (!th->rst) { /* In this case we must reset the TIMEWAIT timer. @@ -337,7 +337,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) * socket up. We've got bigger problems than * non-graceful socket closings. */ - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPTIMEWAITOVERFLOW); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEWAITOVERFLOW); } tcp_update_metrics(sk); @@ -710,7 +710,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, &tcp_rsk(req)->last_oow_ack_time)) req->rsk_ops->send_ack(sk, skb, req); if (paws_reject) - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); return NULL; } @@ -752,7 +752,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, if (req->num_timeout < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { inet_rsk(req)->acked = 1; - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP); return NULL; } @@ -791,7 +791,7 @@ embryonic_reset: } if (!fastopen) { inet_csk_reqsk_queue_drop(sk, req); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_EMBRYONICRSTS); } return NULL; } -- cgit From c10d9310edf5aa4a676991139d1a43ec7d87e56b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 29 Apr 2016 14:16:47 -0700 Subject: tcp: do not assume TCP code is non preemptible We want to to make TCP stack preemptible, as draining prequeue and backlog queues can take lot of time. Many SNMP updates were assuming that BH (and preemption) was disabled. Need to convert some __NET_INC_STATS() calls to NET_INC_STATS() and some __TCP_INC_STATS() to TCP_INC_STATS() Before using this_cpu_ptr(net->ipv4.tcp_sk) in tcp_v4_send_reset() and tcp_v4_send_ack(), we add an explicit preempt disabled section. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp_minisocks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4/tcp_minisocks.c') diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index ffbfecdae471..4b95ec4ed2c8 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -337,7 +337,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) * socket up. We've got bigger problems than * non-graceful socket closings. */ - __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEWAITOVERFLOW); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEWAITOVERFLOW); } tcp_update_metrics(sk); -- cgit