summaryrefslogtreecommitdiff
path: root/net/netfilter/ipvs
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2016-07-24 22:02:36 -0700
committerDavid S. Miller <davem@davemloft.net>2016-07-24 22:02:36 -0700
commitc42d7121fbee1ee30fd9221d594e9c5a4bc1fed6 (patch)
tree37ccf6ebfc28dc4aad92e9d3831d8825eafb7a5a /net/netfilter/ipvs
parentde0ba9a0d8909996f9e293d311c2cc459fa77d67 (diff)
parent4b512e1c1f8de6b9ceb796ecef8658e0a083cab7 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
Pablo Neira Ayuso says: ==================== Netfilter/IPVS updates for net-next The following patchset contains Netfilter/IPVS updates for net-next, they are: 1) Count pre-established connections as active in "least connection" schedulers such that pre-established connections to avoid overloading backend servers on peak demands, from Michal Kubecek via Simon Horman. 2) Address a race condition when resizing the conntrack table by caching the bucket size when fulling iterating over the hashtable in these three possible scenarios: 1) dump via /proc/net/nf_conntrack, 2) unlinking userspace helper and 3) unlinking custom conntrack timeout. From Liping Zhang. 3) Revisit early_drop() path to perform lockless traversal on conntrack eviction under stress, use del_timer() as synchronization point to avoid two CPUs evicting the same entry, from Florian Westphal. 4) Move NAT hlist_head to nf_conn object, this simplifies the existing NAT extension and it doesn't increase size since recent patches to align nf_conn, from Florian. 5) Use rhashtable for the by-source NAT hashtable, also from Florian. 6) Don't allow --physdev-is-out from OUTPUT chain, just like --physdev-out is not either, from Hangbin Liu. 7) Automagically set on nf_conntrack counters if the user tries to match ct bytes/packets from nftables, from Liping Zhang. 8) Remove possible_net_t fields in nf_tables set objects since we just simply pass the net pointer to the backend set type implementations. 9) Fix possible off-by-one in h323, from Toby DiPasquale. 10) early_drop() may be called from ctnetlink patch, so we must hold rcu read size lock from them too, this amends Florian's patch #3 coming in this batch, from Liping Zhang. 11) Use binary search to validate jump offset in x_tables, this addresses the O(n!) validation that was introduced recently resolve security issues with unpriviledge namespaces, from Florian. 12) Fix reference leak to connlabel in error path of nft_ct, from Zhang. 13) Three updates for nft_log: Fix log prefix leak in error path. Bail out on loglevel larger than debug in nft_log and set on the new NF_LOG_F_COPY_LEN flag when snaplen is specified. Again from Zhang. 14) Allow to filter rule dumps in nf_tables based on table and chain names. 15) Simplify connlabel to always use 128 bits to store labels and get rid of unused function in xt_connlabel, from Florian. 16) Replace set_expect_timeout() by mod_timer() from the h323 conntrack helper, by Gao Feng. 17) Put back x_tables module reference in nft_compat on error, from Liping Zhang. 18) Add a reference count to the x_tables extensions cache in nft_compat, so we can remove them when unused and avoid a crash if the extensions are rmmod, again from Zhang. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/netfilter/ipvs')
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c25
1 files changed, 23 insertions, 2 deletions
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index d7024b2ed769..5117bcb7d2f0 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -395,6 +395,20 @@ static const char *const tcp_state_name_table[IP_VS_TCP_S_LAST+1] = {
[IP_VS_TCP_S_LAST] = "BUG!",
};
+static const bool tcp_state_active_table[IP_VS_TCP_S_LAST] = {
+ [IP_VS_TCP_S_NONE] = false,
+ [IP_VS_TCP_S_ESTABLISHED] = true,
+ [IP_VS_TCP_S_SYN_SENT] = true,
+ [IP_VS_TCP_S_SYN_RECV] = true,
+ [IP_VS_TCP_S_FIN_WAIT] = false,
+ [IP_VS_TCP_S_TIME_WAIT] = false,
+ [IP_VS_TCP_S_CLOSE] = false,
+ [IP_VS_TCP_S_CLOSE_WAIT] = false,
+ [IP_VS_TCP_S_LAST_ACK] = false,
+ [IP_VS_TCP_S_LISTEN] = false,
+ [IP_VS_TCP_S_SYNACK] = true,
+};
+
#define sNO IP_VS_TCP_S_NONE
#define sES IP_VS_TCP_S_ESTABLISHED
#define sSS IP_VS_TCP_S_SYN_SENT
@@ -418,6 +432,13 @@ static const char * tcp_state_name(int state)
return tcp_state_name_table[state] ? tcp_state_name_table[state] : "?";
}
+static bool tcp_state_active(int state)
+{
+ if (state >= IP_VS_TCP_S_LAST)
+ return false;
+ return tcp_state_active_table[state];
+}
+
static struct tcp_states_t tcp_states [] = {
/* INPUT */
/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
@@ -540,12 +561,12 @@ set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
if (dest) {
if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
- (new_state != IP_VS_TCP_S_ESTABLISHED)) {
+ !tcp_state_active(new_state)) {
atomic_dec(&dest->activeconns);
atomic_inc(&dest->inactconns);
cp->flags |= IP_VS_CONN_F_INACTIVE;
} else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
- (new_state == IP_VS_TCP_S_ESTABLISHED)) {
+ tcp_state_active(new_state)) {
atomic_inc(&dest->activeconns);
atomic_dec(&dest->inactconns);
cp->flags &= ~IP_VS_CONN_F_INACTIVE;