summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/net/netfilter/nf_conntrack.h3
-rw-r--r--include/net/netfilter/nf_nat.h1
-rw-r--r--net/ipv4/netfilter/arp_tables.c1
-rw-r--r--net/ipv4/netfilter/ip_tables.c1
-rw-r--r--net/ipv6/netfilter/ip6_tables.c1
-rw-r--r--net/netfilter/core.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c8
-rw-r--r--net/netfilter/nf_nat_core.c146
-rw-r--r--net/netfilter/xt_hashlimit.c16
9 files changed, 88 insertions, 91 deletions
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index fdc9c64a1c94..8f3bd30511de 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -17,7 +17,6 @@
#include <linux/bitops.h>
#include <linux/compiler.h>
#include <linux/atomic.h>
-#include <linux/rhashtable.h>
#include <linux/netfilter/nf_conntrack_tcp.h>
#include <linux/netfilter/nf_conntrack_dccp.h>
@@ -77,7 +76,7 @@ struct nf_conn {
possible_net_t ct_net;
#if IS_ENABLED(CONFIG_NF_NAT)
- struct rhlist_head nat_bysource;
+ struct hlist_node nat_bysource;
#endif
/* all members below initialized via memset */
u8 __nfct_init_offset[0];
diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index 05c82a1a4267..b71701302e61 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -1,6 +1,5 @@
#ifndef _NF_NAT_H
#define _NF_NAT_H
-#include <linux/rhashtable.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter/nf_nat.h>
#include <net/netfilter/nf_conntrack_tuple.h>
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index e04457198f93..9e2770fd00be 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -629,6 +629,7 @@ static void get_counters(const struct xt_table_info *t,
ADD_COUNTER(counters[i], bcnt, pcnt);
++i;
+ cond_resched();
}
}
}
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 576cba2b57e9..39286e543ee6 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -776,6 +776,7 @@ get_counters(const struct xt_table_info *t,
ADD_COUNTER(counters[i], bcnt, pcnt);
++i; /* macro does multi eval of i */
+ cond_resched();
}
}
}
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 54b1e75eded1..01bd3ee5ebc6 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -795,6 +795,7 @@ get_counters(const struct xt_table_info *t,
ADD_COUNTER(counters[i], bcnt, pcnt);
++i;
+ cond_resched();
}
}
}
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 04fe25abc5f6..52cd2901a097 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -215,7 +215,7 @@ static void *__nf_hook_entries_try_shrink(struct nf_hook_entries __rcu **pp)
if (skip == hook_entries)
goto out_assign;
- if (WARN_ON(skip == 0))
+ if (skip == 0)
return NULL;
hook_entries -= skip;
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index e1efa446b305..57c8ee66491e 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -24,9 +24,13 @@ sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
if (sh) {
sch = skb_header_pointer(skb, iph->len + sizeof(_sctph),
sizeof(_schunkh), &_schunkh);
- if (sch && (sch->type == SCTP_CID_INIT ||
- sysctl_sloppy_sctp(ipvs)))
+ if (sch) {
+ if (sch->type == SCTP_CID_ABORT ||
+ !(sysctl_sloppy_sctp(ipvs) ||
+ sch->type == SCTP_CID_INIT))
+ return 1;
ports = &sh->source;
+ }
}
} else {
ports = skb_header_pointer(
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 40573aa6c133..f393a7086025 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -30,19 +30,17 @@
#include <net/netfilter/nf_conntrack_zones.h>
#include <linux/netfilter/nf_nat.h>
+static spinlock_t nf_nat_locks[CONNTRACK_LOCKS];
+
static DEFINE_MUTEX(nf_nat_proto_mutex);
static const struct nf_nat_l3proto __rcu *nf_nat_l3protos[NFPROTO_NUMPROTO]
__read_mostly;
static const struct nf_nat_l4proto __rcu **nf_nat_l4protos[NFPROTO_NUMPROTO]
__read_mostly;
-struct nf_nat_conn_key {
- const struct net *net;
- const struct nf_conntrack_tuple *tuple;
- const struct nf_conntrack_zone *zone;
-};
-
-static struct rhltable nf_nat_bysource_table;
+static struct hlist_head *nf_nat_bysource __read_mostly;
+static unsigned int nf_nat_htable_size __read_mostly;
+static unsigned int nf_nat_hash_rnd __read_mostly;
inline const struct nf_nat_l3proto *
__nf_nat_l3proto_find(u8 family)
@@ -118,17 +116,19 @@ int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family)
EXPORT_SYMBOL(nf_xfrm_me_harder);
#endif /* CONFIG_XFRM */
-static u32 nf_nat_bysource_hash(const void *data, u32 len, u32 seed)
+/* We keep an extra hash for each conntrack, for fast searching. */
+static unsigned int
+hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple)
{
- const struct nf_conntrack_tuple *t;
- const struct nf_conn *ct = data;
+ unsigned int hash;
+
+ get_random_once(&nf_nat_hash_rnd, sizeof(nf_nat_hash_rnd));
- t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
/* Original src, to ensure we map it consistently if poss. */
+ hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32),
+ tuple->dst.protonum ^ nf_nat_hash_rnd ^ net_hash_mix(n));
- seed ^= net_hash_mix(nf_ct_net(ct));
- return jhash2((const u32 *)&t->src, sizeof(t->src) / sizeof(u32),
- t->dst.protonum ^ seed);
+ return reciprocal_scale(hash, nf_nat_htable_size);
}
/* Is this tuple already taken? (not by us) */
@@ -184,28 +184,6 @@ same_src(const struct nf_conn *ct,
t->src.u.all == tuple->src.u.all);
}
-static int nf_nat_bysource_cmp(struct rhashtable_compare_arg *arg,
- const void *obj)
-{
- const struct nf_nat_conn_key *key = arg->key;
- const struct nf_conn *ct = obj;
-
- if (!same_src(ct, key->tuple) ||
- !net_eq(nf_ct_net(ct), key->net) ||
- !nf_ct_zone_equal(ct, key->zone, IP_CT_DIR_ORIGINAL))
- return 1;
-
- return 0;
-}
-
-static struct rhashtable_params nf_nat_bysource_params = {
- .head_offset = offsetof(struct nf_conn, nat_bysource),
- .obj_hashfn = nf_nat_bysource_hash,
- .obj_cmpfn = nf_nat_bysource_cmp,
- .nelem_hint = 256,
- .min_size = 1024,
-};
-
/* Only called for SRC manip */
static int
find_appropriate_src(struct net *net,
@@ -216,26 +194,22 @@ find_appropriate_src(struct net *net,
struct nf_conntrack_tuple *result,
const struct nf_nat_range *range)
{
+ unsigned int h = hash_by_src(net, tuple);
const struct nf_conn *ct;
- struct nf_nat_conn_key key = {
- .net = net,
- .tuple = tuple,
- .zone = zone
- };
- struct rhlist_head *hl, *h;
-
- hl = rhltable_lookup(&nf_nat_bysource_table, &key,
- nf_nat_bysource_params);
-
- rhl_for_each_entry_rcu(ct, h, hl, nat_bysource) {
- nf_ct_invert_tuplepr(result,
- &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
- result->dst = tuple->dst;
- if (in_range(l3proto, l4proto, result, range))
- return 1;
+ hlist_for_each_entry_rcu(ct, &nf_nat_bysource[h], nat_bysource) {
+ if (same_src(ct, tuple) &&
+ net_eq(net, nf_ct_net(ct)) &&
+ nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) {
+ /* Copy source part from reply tuple. */
+ nf_ct_invert_tuplepr(result,
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+ result->dst = tuple->dst;
+
+ if (in_range(l3proto, l4proto, result, range))
+ return 1;
+ }
}
-
return 0;
}
@@ -408,6 +382,7 @@ nf_nat_setup_info(struct nf_conn *ct,
const struct nf_nat_range *range,
enum nf_nat_manip_type maniptype)
{
+ struct net *net = nf_ct_net(ct);
struct nf_conntrack_tuple curr_tuple, new_tuple;
/* Can't setup nat info for confirmed ct. */
@@ -416,7 +391,9 @@ nf_nat_setup_info(struct nf_conn *ct,
WARN_ON(maniptype != NF_NAT_MANIP_SRC &&
maniptype != NF_NAT_MANIP_DST);
- BUG_ON(nf_nat_initialized(ct, maniptype));
+
+ if (WARN_ON(nf_nat_initialized(ct, maniptype)))
+ return NF_DROP;
/* What we've got will look like inverse of reply. Normally
* this is what is in the conntrack, except for prior
@@ -447,19 +424,16 @@ nf_nat_setup_info(struct nf_conn *ct,
}
if (maniptype == NF_NAT_MANIP_SRC) {
- struct nf_nat_conn_key key = {
- .net = nf_ct_net(ct),
- .tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
- .zone = nf_ct_zone(ct),
- };
- int err;
-
- err = rhltable_insert_key(&nf_nat_bysource_table,
- &key,
- &ct->nat_bysource,
- nf_nat_bysource_params);
- if (err)
- return NF_DROP;
+ unsigned int srchash;
+ spinlock_t *lock;
+
+ srchash = hash_by_src(net,
+ &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ lock = &nf_nat_locks[srchash % ARRAY_SIZE(nf_nat_locks)];
+ spin_lock_bh(lock);
+ hlist_add_head_rcu(&ct->nat_bysource,
+ &nf_nat_bysource[srchash]);
+ spin_unlock_bh(lock);
}
/* It's done. */
@@ -553,6 +527,16 @@ static int nf_nat_proto_remove(struct nf_conn *i, void *data)
return i->status & IPS_NAT_MASK ? 1 : 0;
}
+static void __nf_nat_cleanup_conntrack(struct nf_conn *ct)
+{
+ unsigned int h;
+
+ h = hash_by_src(nf_ct_net(ct), &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ spin_lock_bh(&nf_nat_locks[h % ARRAY_SIZE(nf_nat_locks)]);
+ hlist_del_rcu(&ct->nat_bysource);
+ spin_unlock_bh(&nf_nat_locks[h % ARRAY_SIZE(nf_nat_locks)]);
+}
+
static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
{
if (nf_nat_proto_remove(ct, data))
@@ -568,8 +552,7 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
* will delete entry from already-freed table.
*/
clear_bit(IPS_SRC_NAT_DONE_BIT, &ct->status);
- rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource,
- nf_nat_bysource_params);
+ __nf_nat_cleanup_conntrack(ct);
/* don't delete conntrack. Although that would make things a lot
* simpler, we'd end up flushing all conntracks on nat rmmod.
@@ -698,8 +681,7 @@ EXPORT_SYMBOL_GPL(nf_nat_l3proto_unregister);
static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
{
if (ct->status & IPS_SRC_NAT_DONE)
- rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource,
- nf_nat_bysource_params);
+ __nf_nat_cleanup_conntrack(ct);
}
static struct nf_ct_ext_type nat_extend __read_mostly = {
@@ -821,19 +803,27 @@ static struct nf_ct_helper_expectfn follow_master_nat = {
static int __init nf_nat_init(void)
{
- int ret;
+ int ret, i;
- ret = rhltable_init(&nf_nat_bysource_table, &nf_nat_bysource_params);
- if (ret)
- return ret;
+ /* Leave them the same for the moment. */
+ nf_nat_htable_size = nf_conntrack_htable_size;
+ if (nf_nat_htable_size < ARRAY_SIZE(nf_nat_locks))
+ nf_nat_htable_size = ARRAY_SIZE(nf_nat_locks);
+
+ nf_nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, 0);
+ if (!nf_nat_bysource)
+ return -ENOMEM;
ret = nf_ct_extend_register(&nat_extend);
if (ret < 0) {
- rhltable_destroy(&nf_nat_bysource_table);
+ nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
printk(KERN_ERR "nf_nat_core: Unable to register extension\n");
return ret;
}
+ for (i = 0; i < ARRAY_SIZE(nf_nat_locks); i++)
+ spin_lock_init(&nf_nat_locks[i]);
+
nf_ct_helper_expectfn_register(&follow_master_nat);
BUG_ON(nfnetlink_parse_nat_setup_hook != NULL);
@@ -863,8 +853,8 @@ static void __exit nf_nat_cleanup(void)
for (i = 0; i < NFPROTO_NUMPROTO; i++)
kfree(nf_nat_l4protos[i]);
-
- rhltable_destroy(&nf_nat_bysource_table);
+ synchronize_net();
+ nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
}
MODULE_LICENSE("GPL");
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 10d48234f5f4..5da8746f7b88 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -35,6 +35,7 @@
#include <linux/netfilter_ipv6/ip6_tables.h>
#include <linux/netfilter/xt_hashlimit.h>
#include <linux/mutex.h>
+#include <linux/kernel.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
@@ -279,7 +280,7 @@ static int htable_create(struct net *net, struct hashlimit_cfg3 *cfg,
size = cfg->size;
} else {
size = (totalram_pages << PAGE_SHIFT) / 16384 /
- sizeof(struct list_head);
+ sizeof(struct hlist_head);
if (totalram_pages > 1024 * 1024 * 1024 / PAGE_SIZE)
size = 8192;
if (size < 16)
@@ -287,7 +288,7 @@ static int htable_create(struct net *net, struct hashlimit_cfg3 *cfg,
}
/* FIXME: don't use vmalloc() here or anywhere else -HW */
hinfo = vmalloc(sizeof(struct xt_hashlimit_htable) +
- sizeof(struct list_head) * size);
+ sizeof(struct hlist_head) * size);
if (hinfo == NULL)
return -ENOMEM;
*out_hinfo = hinfo;
@@ -527,12 +528,12 @@ static u64 user2rate(u64 user)
}
}
-static u64 user2rate_bytes(u64 user)
+static u64 user2rate_bytes(u32 user)
{
u64 r;
- r = user ? 0xFFFFFFFFULL / user : 0xFFFFFFFFULL;
- r = (r - 1) << 4;
+ r = user ? U32_MAX / user : U32_MAX;
+ r = (r - 1) << XT_HASHLIMIT_BYTE_SHIFT;
return r;
}
@@ -588,7 +589,8 @@ static void rateinfo_init(struct dsthash_ent *dh,
dh->rateinfo.prev_window = 0;
dh->rateinfo.current_rate = 0;
if (hinfo->cfg.mode & XT_HASHLIMIT_BYTES) {
- dh->rateinfo.rate = user2rate_bytes(hinfo->cfg.avg);
+ dh->rateinfo.rate =
+ user2rate_bytes((u32)hinfo->cfg.avg);
if (hinfo->cfg.burst)
dh->rateinfo.burst =
hinfo->cfg.burst * dh->rateinfo.rate;
@@ -870,7 +872,7 @@ static int hashlimit_mt_check_common(const struct xt_mtchk_param *par,
/* Check for overflow. */
if (revision >= 3 && cfg->mode & XT_HASHLIMIT_RATE_MATCH) {
- if (cfg->avg == 0) {
+ if (cfg->avg == 0 || cfg->avg > U32_MAX) {
pr_info("hashlimit invalid rate\n");
return -ERANGE;
}