summaryrefslogtreecommitdiff
path: root/net/netfilter/nf_conntrack_core.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-07-27 12:03:20 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2016-07-27 12:03:20 -0700
commit468fc7ed5537615efe671d94248446ac24679773 (patch)
tree27bc9de792e863d6ec1630927b77ac9e7dabb38a /net/netfilter/nf_conntrack_core.c
parent08fd8c17686c6b09fa410a26d516548dd80ff147 (diff)
parent36232012344b8db67052432742deaf17f82e70e6 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) Unified UDP encapsulation offload methods for drivers, from Alexander Duyck. 2) Make DSA binding more sane, from Andrew Lunn. 3) Support QCA9888 chips in ath10k, from Anilkumar Kolli. 4) Several workqueue usage cleanups, from Bhaktipriya Shridhar. 5) Add XDP (eXpress Data Path), essentially running BPF programs on RX packets as soon as the device sees them, with the option to mirror the packet on TX via the same interface. From Brenden Blanco and others. 6) Allow qdisc/class stats dumps to run lockless, from Eric Dumazet. 7) Add VLAN support to b53 and bcm_sf2, from Florian Fainelli. 8) Simplify netlink conntrack entry layout, from Florian Westphal. 9) Add ipv4 forwarding support to mlxsw spectrum driver, from Ido Schimmel, Yotam Gigi, and Jiri Pirko. 10) Add SKB array infrastructure and convert tun and macvtap over to it. From Michael S Tsirkin and Jason Wang. 11) Support qdisc packet injection in pktgen, from John Fastabend. 12) Add neighbour monitoring framework to TIPC, from Jon Paul Maloy. 13) Add NV congestion control support to TCP, from Lawrence Brakmo. 14) Add GSO support to SCTP, from Marcelo Ricardo Leitner. 15) Allow GRO and RPS to function on macsec devices, from Paolo Abeni. 16) Support MPLS over IPV4, from Simon Horman. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1622 commits) xgene: Fix build warning with ACPI disabled. be2net: perform temperature query in adapter regardless of its interface state l2tp: Correctly return -EBADF from pppol2tp_getname. net/mlx5_core/health: Remove deprecated create_singlethread_workqueue net: ipmr/ip6mr: update lastuse on entry change macsec: ensure rx_sa is set when validation is disabled tipc: dump monitor attributes tipc: add a function to get the bearer name tipc: get monitor threshold for the cluster tipc: make cluster size threshold for monitoring configurable tipc: introduce constants for tipc address validation net: neigh: disallow transition to NUD_STALE if lladdr is unchanged in neigh_update() MAINTAINERS: xgene: Add driver and documentation path Documentation: dtb: xgene: Add MDIO node dtb: xgene: Add MDIO node drivers: net: xgene: ethtool: Use phy_ethtool_gset and sset drivers: net: xgene: Use exported functions drivers: net: xgene: Enable MDIO driver drivers: net: xgene: Add backward compatibility drivers: net: phy: xgene: Add MDIO driver ...
Diffstat (limited to 'net/netfilter/nf_conntrack_core.c')
-rw-r--r--net/netfilter/nf_conntrack_core.c191
1 files changed, 98 insertions, 93 deletions
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 4cbda4bd8926..dd2c43abf9e2 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -348,16 +348,10 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
tmpl->status = IPS_TEMPLATE;
write_pnet(&tmpl->ct_net, net);
-
- if (nf_ct_zone_add(tmpl, flags, zone) < 0)
- goto out_free;
-
+ nf_ct_zone_add(tmpl, zone);
atomic_set(&tmpl->ct_general.use, 0);
return tmpl;
-out_free:
- kfree(tmpl);
- return NULL;
}
EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc);
@@ -487,6 +481,23 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
net_eq(net, nf_ct_net(ct));
}
+/* must be called with rcu read lock held */
+void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize)
+{
+ struct hlist_nulls_head *hptr;
+ unsigned int sequence, hsz;
+
+ do {
+ sequence = read_seqcount_begin(&nf_conntrack_generation);
+ hsz = nf_conntrack_htable_size;
+ hptr = nf_conntrack_hash;
+ } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+
+ *hash = hptr;
+ *hsize = hsz;
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_get_ht);
+
/*
* Warning :
* - Caller must take a reference on returned object
@@ -845,67 +856,69 @@ EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken);
/* There's a small race here where we may free a just-assured
connection. Too bad: we're in trouble anyway. */
-static noinline int early_drop(struct net *net, unsigned int _hash)
+static unsigned int early_drop_list(struct net *net,
+ struct hlist_nulls_head *head)
{
- /* Use oldest entry, which is roughly LRU */
struct nf_conntrack_tuple_hash *h;
- struct nf_conn *tmp;
struct hlist_nulls_node *n;
- unsigned int i, hash, sequence;
- struct nf_conn *ct = NULL;
- spinlock_t *lockp;
- bool ret = false;
+ unsigned int drops = 0;
+ struct nf_conn *tmp;
- i = 0;
+ hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) {
+ tmp = nf_ct_tuplehash_to_ctrack(h);
- local_bh_disable();
-restart:
- sequence = read_seqcount_begin(&nf_conntrack_generation);
- for (; i < NF_CT_EVICTION_RANGE; i++) {
- hash = scale_hash(_hash++);
- lockp = &nf_conntrack_locks[hash % CONNTRACK_LOCKS];
- nf_conntrack_lock(lockp);
- if (read_seqcount_retry(&nf_conntrack_generation, sequence)) {
- spin_unlock(lockp);
- goto restart;
- }
- hlist_nulls_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash],
- hnnode) {
- tmp = nf_ct_tuplehash_to_ctrack(h);
-
- if (test_bit(IPS_ASSURED_BIT, &tmp->status) ||
- !net_eq(nf_ct_net(tmp), net) ||
- nf_ct_is_dying(tmp))
- continue;
-
- if (atomic_inc_not_zero(&tmp->ct_general.use)) {
- ct = tmp;
- break;
- }
- }
+ if (test_bit(IPS_ASSURED_BIT, &tmp->status) ||
+ !net_eq(nf_ct_net(tmp), net) ||
+ nf_ct_is_dying(tmp))
+ continue;
- spin_unlock(lockp);
- if (ct)
- break;
+ if (!atomic_inc_not_zero(&tmp->ct_general.use))
+ continue;
+
+ /* kill only if still in same netns -- might have moved due to
+ * SLAB_DESTROY_BY_RCU rules.
+ *
+ * We steal the timer reference. If that fails timer has
+ * already fired or someone else deleted it. Just drop ref
+ * and move to next entry.
+ */
+ if (net_eq(nf_ct_net(tmp), net) &&
+ nf_ct_is_confirmed(tmp) &&
+ del_timer(&tmp->timeout) &&
+ nf_ct_delete(tmp, 0, 0))
+ drops++;
+
+ nf_ct_put(tmp);
}
- local_bh_enable();
+ return drops;
+}
- if (!ct)
- return false;
+static noinline int early_drop(struct net *net, unsigned int _hash)
+{
+ unsigned int i;
- /* kill only if in same netns -- might have moved due to
- * SLAB_DESTROY_BY_RCU rules
- */
- if (net_eq(nf_ct_net(ct), net) && del_timer(&ct->timeout)) {
- if (nf_ct_delete(ct, 0, 0)) {
- NF_CT_STAT_INC_ATOMIC(net, early_drop);
- ret = true;
+ for (i = 0; i < NF_CT_EVICTION_RANGE; i++) {
+ struct hlist_nulls_head *ct_hash;
+ unsigned hash, sequence, drops;
+
+ rcu_read_lock();
+ do {
+ sequence = read_seqcount_begin(&nf_conntrack_generation);
+ hash = scale_hash(_hash++);
+ ct_hash = nf_conntrack_hash;
+ } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+
+ drops = early_drop_list(net, &ct_hash[hash]);
+ rcu_read_unlock();
+
+ if (drops) {
+ NF_CT_STAT_ADD_ATOMIC(net, early_drop, drops);
+ return true;
}
}
- nf_ct_put(ct);
- return ret;
+ return false;
}
static struct nf_conn *
@@ -951,16 +964,13 @@ __nf_conntrack_alloc(struct net *net,
offsetof(struct nf_conn, proto) -
offsetof(struct nf_conn, __nfct_init_offset[0]));
- if (zone && nf_ct_zone_add(ct, GFP_ATOMIC, zone) < 0)
- goto out_free;
+ nf_ct_zone_add(ct, zone);
/* Because we use RCU lookups, we set ct_general.use to zero before
* this is inserted in any list.
*/
atomic_set(&ct->ct_general.use, 0);
return ct;
-out_free:
- kmem_cache_free(nf_conntrack_cachep, ct);
out:
atomic_dec(&net->ct.count);
return ERR_PTR(-ENOMEM);
@@ -1364,14 +1374,6 @@ bool __nf_ct_kill_acct(struct nf_conn *ct,
}
EXPORT_SYMBOL_GPL(__nf_ct_kill_acct);
-#ifdef CONFIG_NF_CONNTRACK_ZONES
-static struct nf_ct_ext_type nf_ct_zone_extend __read_mostly = {
- .len = sizeof(struct nf_conntrack_zone),
- .align = __alignof__(struct nf_conntrack_zone),
- .id = NF_CT_EXT_ZONE,
-};
-#endif
-
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
#include <linux/netfilter/nfnetlink.h>
@@ -1554,9 +1556,6 @@ void nf_conntrack_cleanup_end(void)
nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size);
-#ifdef CONFIG_NF_CONNTRACK_ZONES
- nf_ct_extend_unregister(&nf_ct_zone_extend);
-#endif
nf_conntrack_proto_fini();
nf_conntrack_seqadj_fini();
nf_conntrack_labels_fini();
@@ -1646,24 +1645,14 @@ void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
}
EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable);
-int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
+int nf_conntrack_hash_resize(unsigned int hashsize)
{
- int i, bucket, rc;
- unsigned int hashsize, old_size;
+ int i, bucket;
+ unsigned int old_size;
struct hlist_nulls_head *hash, *old_hash;
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
- if (current->nsproxy->net_ns != &init_net)
- return -EOPNOTSUPP;
-
- /* On boot, we can set this without any fancy locking. */
- if (!nf_conntrack_htable_size)
- return param_set_uint(val, kp);
-
- rc = kstrtouint(val, 0, &hashsize);
- if (rc)
- return rc;
if (!hashsize)
return -EINVAL;
@@ -1671,6 +1660,12 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
if (!hash)
return -ENOMEM;
+ old_size = nf_conntrack_htable_size;
+ if (old_size == hashsize) {
+ nf_ct_free_hashtable(hash, hashsize);
+ return 0;
+ }
+
local_bh_disable();
nf_conntrack_all_lock();
write_seqcount_begin(&nf_conntrack_generation);
@@ -1706,6 +1701,25 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
nf_ct_free_hashtable(old_hash, old_size);
return 0;
}
+
+int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
+{
+ unsigned int hashsize;
+ int rc;
+
+ if (current->nsproxy->net_ns != &init_net)
+ return -EOPNOTSUPP;
+
+ /* On boot, we can set this without any fancy locking. */
+ if (!nf_conntrack_htable_size)
+ return param_set_uint(val, kp);
+
+ rc = kstrtouint(val, 0, &hashsize);
+ if (rc)
+ return rc;
+
+ return nf_conntrack_hash_resize(hashsize);
+}
EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
@@ -1762,7 +1776,7 @@ int nf_conntrack_init_start(void)
nf_conntrack_cachep = kmem_cache_create("nf_conntrack",
sizeof(struct nf_conn), 0,
- SLAB_DESTROY_BY_RCU, NULL);
+ SLAB_DESTROY_BY_RCU | SLAB_HWCACHE_ALIGN, NULL);
if (!nf_conntrack_cachep)
goto err_cachep;
@@ -1802,11 +1816,6 @@ int nf_conntrack_init_start(void)
if (ret < 0)
goto err_seqadj;
-#ifdef CONFIG_NF_CONNTRACK_ZONES
- ret = nf_ct_extend_register(&nf_ct_zone_extend);
- if (ret < 0)
- goto err_extend;
-#endif
ret = nf_conntrack_proto_init();
if (ret < 0)
goto err_proto;
@@ -1822,10 +1831,6 @@ int nf_conntrack_init_start(void)
return 0;
err_proto:
-#ifdef CONFIG_NF_CONNTRACK_ZONES
- nf_ct_extend_unregister(&nf_ct_zone_extend);
-err_extend:
-#endif
nf_conntrack_seqadj_fini();
err_seqadj:
nf_conntrack_labels_fini();