summaryrefslogtreecommitdiff
path: root/net/netfilter
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2016-09-06 12:45:26 -0700
committerDavid S. Miller <davem@davemloft.net>2016-09-06 12:45:26 -0700
commit60175ccdf46ac5076725cb3e66f6bc2e2766ad2b (patch)
treea5433388291a0151a00c54db596d2baf8ce649ab /net/netfilter
parent2f5281ba2a8feaf6f0aee93356f350855bb530fc (diff)
parent779994fa3636d46848edb402fe7517968e036e6f (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
Pablo Neira Ayuso says: ==================== Netfilter updates for net-next The following patchset contains Netfilter updates for your net-next tree. Most relevant updates are the removal of per-conntrack timers to use a workqueue/garbage collection approach instead from Florian Westphal, the hash and numgen expression for nf_tables from Laura Garcia, updates on nf_tables hash set to honor the NLM_F_EXCL flag, removal of ip_conntrack sysctl and many other incremental updates on our Netfilter codebase. More specifically, they are: 1) Retrieve only 4 bytes to fetch ports in case of non-linear skb transport area in dccp, sctp, tcp, udp and udplite protocol conntrackers, from Gao Feng. 2) Missing whitespace on error message in physdev match, from Hangbin Liu. 3) Skip redundant IPv4 checksum calculation in nf_dup_ipv4, from Liping Zhang. 4) Add nf_ct_expires() helper function and use it, from Florian Westphal. 5) Replace opencoded nf_ct_kill() call in IPVS conntrack support, also from Florian. 6) Rename nf_tables set implementation to nft_set_{name}.c 7) Introduce the hash expression to allow arbitrary hashing of selector concatenations, from Laura Garcia Liebana. 8) Remove ip_conntrack sysctl backward compatibility code, this code has been around for long time already, and we have two interfaces to do this already: nf_conntrack sysctl and ctnetlink. 9) Use nf_conntrack_get_ht() helper function whenever possible, instead of opencoding fetch of hashtable pointer and size, patch from Liping Zhang. 10) Add quota expression for nf_tables. 11) Add number generator expression for nf_tables, this supports incremental and random generators that can be combined with maps, very useful for load balancing purpose, again from Laura Garcia Liebana. 12) Fix a typo in a debug message in FTP conntrack helper, from Colin Ian King. 13) Introduce a nft_chain_parse_hook() helper function to parse chain hook configuration, this is used by a follow up patch to perform better chain update validation. 14) Add rhashtable_lookup_get_insert_key() to rhashtable and use it from the nft_set_hash implementation to honor the NLM_F_EXCL flag. 15) Missing nulls check in nf_conntrack from nf_conntrack_tuple_taken(), patch from Florian Westphal. 16) Don't use the DYING bit to know if the conntrack event has been already delivered, instead a state variable to track event re-delivery states, also from Florian. 17) Remove the per-conntrack timer, use the workqueue approach that was discussed during the NFWS, from Florian Westphal. 18) Use the netlink conntrack table dump path to kill stale entries, again from Florian. 19) Add a garbage collector to get rid of stale conntracks, from Florian. 20) Reschedule garbage collector if eviction rate is high. 21) Get rid of the __nf_ct_kill_acct() helper. 22) Use ARPHRD_ETHER instead of hardcoded 1 from ARP logger. 23) Make nf_log_set() interface assertive on unsupported families. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/netfilter')
-rw-r--r--net/netfilter/Kconfig22
-rw-r--r--net/netfilter/Makefile7
-rw-r--r--net/netfilter/ipvs/ip_vs_nfct.c7
-rw-r--r--net/netfilter/nf_conntrack_core.c229
-rw-r--r--net/netfilter/nf_conntrack_ecache.c22
-rw-r--r--net/netfilter/nf_conntrack_ftp.c2
-rw-r--r--net/netfilter/nf_conntrack_netlink.c44
-rw-r--r--net/netfilter/nf_conntrack_pptp.c3
-rw-r--r--net/netfilter/nf_conntrack_proto.c81
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c3
-rw-r--r--net/netfilter/nf_conntrack_proto_generic.c39
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c89
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c131
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c53
-rw-r--r--net/netfilter/nf_conntrack_proto_udplite.c3
-rw-r--r--net/netfilter/nf_conntrack_standalone.c3
-rw-r--r--net/netfilter/nf_log.c8
-rw-r--r--net/netfilter/nf_nat_core.c6
-rw-r--r--net/netfilter/nf_tables_api.c203
-rw-r--r--net/netfilter/nft_hash.c417
-rw-r--r--net/netfilter/nft_numgen.c192
-rw-r--r--net/netfilter/nft_quota.c121
-rw-r--r--net/netfilter/nft_set_hash.c404
-rw-r--r--net/netfilter/nft_set_rbtree.c (renamed from net/netfilter/nft_rbtree.c)12
-rw-r--r--net/netfilter/xt_conntrack.c4
-rw-r--r--net/netfilter/xt_physdev.c4
26 files changed, 1186 insertions, 923 deletions
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 9266ceebd112..e8d56d9a4df2 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -474,6 +474,12 @@ config NFT_META
This option adds the "meta" expression that you can use to match and
to set packet metainformation such as the packet mark.
+config NFT_NUMGEN
+ tristate "Netfilter nf_tables number generator module"
+ help
+ This option adds the number generator expression used to perform
+ incremental counting and random numbers bound to a upper limit.
+
config NFT_CT
depends on NF_CONNTRACK
tristate "Netfilter nf_tables conntrack module"
@@ -481,13 +487,13 @@ config NFT_CT
This option adds the "meta" expression that you can use to match
connection tracking information such as the flow state.
-config NFT_RBTREE
+config NFT_SET_RBTREE
tristate "Netfilter nf_tables rbtree set module"
help
This option adds the "rbtree" set type (Red Black tree) that is used
to build interval-based sets.
-config NFT_HASH
+config NFT_SET_HASH
tristate "Netfilter nf_tables hash set module"
help
This option adds the "hash" set type that is used to build one-way
@@ -542,6 +548,12 @@ config NFT_QUEUE
This is required if you intend to use the userspace queueing
infrastructure (also known as NFQUEUE) from nftables.
+config NFT_QUOTA
+ tristate "Netfilter nf_tables quota module"
+ help
+ This option adds the "quota" expression that you can use to match
+ enforce bytes quotas.
+
config NFT_REJECT
default m if NETFILTER_ADVANCED=n
tristate "Netfilter nf_tables reject support"
@@ -563,6 +575,12 @@ config NFT_COMPAT
x_tables match/target extensions over the nf_tables
framework.
+config NFT_HASH
+ tristate "Netfilter nf_tables hash module"
+ help
+ This option adds the "hash" expression that you can use to perform
+ a hash operation on registers.
+
if NF_TABLES_NETDEV
config NF_DUP_NETDEV
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 69134541d65b..0c8581100ac6 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -80,18 +80,21 @@ obj-$(CONFIG_NF_TABLES_NETDEV) += nf_tables_netdev.o
obj-$(CONFIG_NFT_COMPAT) += nft_compat.o
obj-$(CONFIG_NFT_EXTHDR) += nft_exthdr.o
obj-$(CONFIG_NFT_META) += nft_meta.o
+obj-$(CONFIG_NFT_NUMGEN) += nft_numgen.o
obj-$(CONFIG_NFT_CT) += nft_ct.o
obj-$(CONFIG_NFT_LIMIT) += nft_limit.o
obj-$(CONFIG_NFT_NAT) += nft_nat.o
obj-$(CONFIG_NFT_QUEUE) += nft_queue.o
+obj-$(CONFIG_NFT_QUOTA) += nft_quota.o
obj-$(CONFIG_NFT_REJECT) += nft_reject.o
obj-$(CONFIG_NFT_REJECT_INET) += nft_reject_inet.o
-obj-$(CONFIG_NFT_RBTREE) += nft_rbtree.o
-obj-$(CONFIG_NFT_HASH) += nft_hash.o
+obj-$(CONFIG_NFT_SET_RBTREE) += nft_set_rbtree.o
+obj-$(CONFIG_NFT_SET_HASH) += nft_set_hash.o
obj-$(CONFIG_NFT_COUNTER) += nft_counter.o
obj-$(CONFIG_NFT_LOG) += nft_log.o
obj-$(CONFIG_NFT_MASQ) += nft_masq.o
obj-$(CONFIG_NFT_REDIR) += nft_redir.o
+obj-$(CONFIG_NFT_HASH) += nft_hash.o
# nf_tables netdev
obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o
diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c
index f04fd8df210b..fc230d99aa3b 100644
--- a/net/netfilter/ipvs/ip_vs_nfct.c
+++ b/net/netfilter/ipvs/ip_vs_nfct.c
@@ -281,13 +281,10 @@ void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
h = nf_conntrack_find_get(cp->ipvs->net, &nf_ct_zone_dflt, &tuple);
if (h) {
ct = nf_ct_tuplehash_to_ctrack(h);
- /* Show what happens instead of calling nf_ct_kill() */
- if (del_timer(&ct->timeout)) {
- IP_VS_DBG(7, "%s: ct=%p, deleted conntrack timer for tuple="
+ if (nf_ct_kill(ct)) {
+ IP_VS_DBG(7, "%s: ct=%p, deleted conntrack for tuple="
FMT_TUPLE "\n",
__func__, ct, ARG_TUPLE(&tuple));
- if (ct->timeout.function)
- ct->timeout.function(ct->timeout.data);
} else {
IP_VS_DBG(7, "%s: ct=%p, no conntrack timer for tuple="
FMT_TUPLE "\n",
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index dd2c43abf9e2..ac1db4019d5c 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -72,12 +72,24 @@ EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock);
struct hlist_nulls_head *nf_conntrack_hash __read_mostly;
EXPORT_SYMBOL_GPL(nf_conntrack_hash);
+struct conntrack_gc_work {
+ struct delayed_work dwork;
+ u32 last_bucket;
+ bool exiting;
+};
+
static __read_mostly struct kmem_cache *nf_conntrack_cachep;
static __read_mostly spinlock_t nf_conntrack_locks_all_lock;
-static __read_mostly seqcount_t nf_conntrack_generation;
static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
static __read_mostly bool nf_conntrack_locks_all;
+#define GC_MAX_BUCKETS_DIV 64u
+#define GC_MAX_BUCKETS 8192u
+#define GC_INTERVAL (5 * HZ)
+#define GC_MAX_EVICTS 256u
+
+static struct conntrack_gc_work conntrack_gc_work;
+
void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
{
spin_lock(lock);
@@ -164,7 +176,7 @@ unsigned int nf_conntrack_htable_size __read_mostly;
EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
unsigned int nf_conntrack_max __read_mostly;
-EXPORT_SYMBOL_GPL(nf_conntrack_max);
+seqcount_t nf_conntrack_generation __read_mostly;
DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
@@ -372,7 +384,6 @@ destroy_conntrack(struct nf_conntrack *nfct)
pr_debug("destroy_conntrack(%p)\n", ct);
NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
- NF_CT_ASSERT(!timer_pending(&ct->timeout));
if (unlikely(nf_ct_is_template(ct))) {
nf_ct_tmpl_free(ct);
@@ -435,35 +446,30 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
{
struct nf_conn_tstamp *tstamp;
+ if (test_and_set_bit(IPS_DYING_BIT, &ct->status))
+ return false;
+
tstamp = nf_conn_tstamp_find(ct);
if (tstamp && tstamp->stop == 0)
tstamp->stop = ktime_get_real_ns();
- if (nf_ct_is_dying(ct))
- goto delete;
-
if (nf_conntrack_event_report(IPCT_DESTROY, ct,
portid, report) < 0) {
- /* destroy event was not delivered */
+ /* destroy event was not delivered. nf_ct_put will
+ * be done by event cache worker on redelivery.
+ */
nf_ct_delete_from_lists(ct);
nf_conntrack_ecache_delayed_work(nf_ct_net(ct));
return false;
}
nf_conntrack_ecache_work(nf_ct_net(ct));
- set_bit(IPS_DYING_BIT, &ct->status);
- delete:
nf_ct_delete_from_lists(ct);
nf_ct_put(ct);
return true;
}
EXPORT_SYMBOL_GPL(nf_ct_delete);
-static void death_by_timeout(unsigned long ul_conntrack)
-{
- nf_ct_delete((struct nf_conn *)ul_conntrack, 0, 0);
-}
-
static inline bool
nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
const struct nf_conntrack_tuple *tuple,
@@ -481,22 +487,17 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
net_eq(net, nf_ct_net(ct));
}
-/* must be called with rcu read lock held */
-void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize)
+/* caller must hold rcu readlock and none of the nf_conntrack_locks */
+static void nf_ct_gc_expired(struct nf_conn *ct)
{
- struct hlist_nulls_head *hptr;
- unsigned int sequence, hsz;
+ if (!atomic_inc_not_zero(&ct->ct_general.use))
+ return;
- do {
- sequence = read_seqcount_begin(&nf_conntrack_generation);
- hsz = nf_conntrack_htable_size;
- hptr = nf_conntrack_hash;
- } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+ if (nf_ct_should_gc(ct))
+ nf_ct_kill(ct);
- *hash = hptr;
- *hsize = hsz;
+ nf_ct_put(ct);
}
-EXPORT_SYMBOL_GPL(nf_conntrack_get_ht);
/*
* Warning :
@@ -510,16 +511,24 @@ ____nf_conntrack_find(struct net *net, const struct nf_conntrack_zone *zone,
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_head *ct_hash;
struct hlist_nulls_node *n;
- unsigned int bucket, sequence;
+ unsigned int bucket, hsize;
begin:
- do {
- sequence = read_seqcount_begin(&nf_conntrack_generation);
- bucket = scale_hash(hash);
- ct_hash = nf_conntrack_hash;
- } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+ nf_conntrack_get_ht(&ct_hash, &hsize);
+ bucket = reciprocal_scale(hash, hsize);
hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[bucket], hnnode) {
+ struct nf_conn *ct;
+
+ ct = nf_ct_tuplehash_to_ctrack(h);
+ if (nf_ct_is_expired(ct)) {
+ nf_ct_gc_expired(ct);
+ continue;
+ }
+
+ if (nf_ct_is_dying(ct))
+ continue;
+
if (nf_ct_key_equal(h, tuple, zone, net)) {
NF_CT_STAT_INC_ATOMIC(net, found);
return h;
@@ -618,7 +627,6 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
zone, net))
goto out;
- add_timer(&ct->timeout);
smp_wmb();
/* The caller holds a reference to this object */
atomic_set(&ct->ct_general.use, 2);
@@ -771,8 +779,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
/* Timer relative to confirmation time, not original
setting time, otherwise we'd get timer wrap in
weird delay cases. */
- ct->timeout.expires += jiffies;
- add_timer(&ct->timeout);
+ ct->timeout += nfct_time_stamp;
atomic_inc(&ct->ct_general.use);
ct->status |= IPS_CONFIRMED;
@@ -823,29 +830,41 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone;
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_head *ct_hash;
- unsigned int hash, sequence;
+ unsigned int hash, hsize;
struct hlist_nulls_node *n;
struct nf_conn *ct;
zone = nf_ct_zone(ignored_conntrack);
rcu_read_lock();
- do {
- sequence = read_seqcount_begin(&nf_conntrack_generation);
- hash = hash_conntrack(net, tuple);
- ct_hash = nf_conntrack_hash;
- } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+ begin:
+ nf_conntrack_get_ht(&ct_hash, &hsize);
+ hash = __hash_conntrack(net, tuple, hsize);
hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[hash], hnnode) {
ct = nf_ct_tuplehash_to_ctrack(h);
- if (ct != ignored_conntrack &&
- nf_ct_key_equal(h, tuple, zone, net)) {
+
+ if (ct == ignored_conntrack)
+ continue;
+
+ if (nf_ct_is_expired(ct)) {
+ nf_ct_gc_expired(ct);
+ continue;
+ }
+
+ if (nf_ct_key_equal(h, tuple, zone, net)) {
NF_CT_STAT_INC_ATOMIC(net, found);
rcu_read_unlock();
return 1;
}
NF_CT_STAT_INC_ATOMIC(net, searched);
}
+
+ if (get_nulls_value(n) != hash) {
+ NF_CT_STAT_INC_ATOMIC(net, search_restart);
+ goto begin;
+ }
+
rcu_read_unlock();
return 0;
@@ -867,6 +886,11 @@ static unsigned int early_drop_list(struct net *net,
hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) {
tmp = nf_ct_tuplehash_to_ctrack(h);
+ if (nf_ct_is_expired(tmp)) {
+ nf_ct_gc_expired(tmp);
+ continue;
+ }
+
if (test_bit(IPS_ASSURED_BIT, &tmp->status) ||
!net_eq(nf_ct_net(tmp), net) ||
nf_ct_is_dying(tmp))
@@ -884,7 +908,6 @@ static unsigned int early_drop_list(struct net *net,
*/
if (net_eq(nf_ct_net(tmp), net) &&
nf_ct_is_confirmed(tmp) &&
- del_timer(&tmp->timeout) &&
nf_ct_delete(tmp, 0, 0))
drops++;
@@ -900,14 +923,11 @@ static noinline int early_drop(struct net *net, unsigned int _hash)
for (i = 0; i < NF_CT_EVICTION_RANGE; i++) {
struct hlist_nulls_head *ct_hash;
- unsigned hash, sequence, drops;
+ unsigned int hash, hsize, drops;
rcu_read_lock();
- do {
- sequence = read_seqcount_begin(&nf_conntrack_generation);
- hash = scale_hash(_hash++);
- ct_hash = nf_conntrack_hash;
- } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+ nf_conntrack_get_ht(&ct_hash, &hsize);
+ hash = reciprocal_scale(_hash++, hsize);
drops = early_drop_list(net, &ct_hash[hash]);
rcu_read_unlock();
@@ -921,6 +941,69 @@ static noinline int early_drop(struct net *net, unsigned int _hash)
return false;
}
+static void gc_worker(struct work_struct *work)
+{
+ unsigned int i, goal, buckets = 0, expired_count = 0;
+ unsigned long next_run = GC_INTERVAL;
+ unsigned int ratio, scanned = 0;
+ struct conntrack_gc_work *gc_work;
+
+ gc_work = container_of(work, struct conntrack_gc_work, dwork.work);
+
+ goal = min(nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV, GC_MAX_BUCKETS);
+ i = gc_work->last_bucket;
+
+ do {
+ struct nf_conntrack_tuple_hash *h;
+ struct hlist_nulls_head *ct_hash;
+ struct hlist_nulls_node *n;
+ unsigned int hashsz;
+ struct nf_conn *tmp;
+
+ i++;
+ rcu_read_lock();
+
+ nf_conntrack_get_ht(&ct_hash, &hashsz);
+ if (i >= hashsz)
+ i = 0;
+
+ hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) {
+ tmp = nf_ct_tuplehash_to_ctrack(h);
+
+ scanned++;
+ if (nf_ct_is_expired(tmp)) {
+ nf_ct_gc_expired(tmp);
+ expired_count++;
+ continue;
+ }
+ }
+
+ /* could check get_nulls_value() here and restart if ct
+ * was moved to another chain. But given gc is best-effort
+ * we will just continue with next hash slot.
+ */
+ rcu_read_unlock();
+ cond_resched_rcu_qs();
+ } while (++buckets < goal &&
+ expired_count < GC_MAX_EVICTS);
+
+ if (gc_work->exiting)
+ return;
+
+ ratio = scanned ? expired_count * 100 / scanned : 0;
+ if (ratio >= 90)
+ next_run = 0;
+
+ gc_work->last_bucket = i;
+ schedule_delayed_work(&gc_work->dwork, next_run);
+}
+
+static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
+{
+ INIT_DELAYED_WORK(&gc_work->dwork, gc_worker);
+ gc_work->exiting = false;
+}
+
static struct nf_conn *
__nf_conntrack_alloc(struct net *net,
const struct nf_conntrack_zone *zone,
@@ -957,8 +1040,6 @@ __nf_conntrack_alloc(struct net *net,
/* save hash for reusing when confirming */
*(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash;
ct->status = 0;
- /* Don't set timer yet: wait for confirmation */
- setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct);
write_pnet(&ct->ct_net, net);
memset(&ct->__nfct_init_offset[0], 0,
offsetof(struct nf_conn, proto) -
@@ -1332,7 +1413,6 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
unsigned long extra_jiffies,
int do_acct)
{
- NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct);
NF_CT_ASSERT(skb);
/* Only update if this is not a fixed timeout */
@@ -1340,39 +1420,25 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
goto acct;
/* If not in hash table, timer will not be active yet */
- if (!nf_ct_is_confirmed(ct)) {
- ct->timeout.expires = extra_jiffies;
- } else {
- unsigned long newtime = jiffies + extra_jiffies;
-
- /* Only update the timeout if the new timeout is at least
- HZ jiffies from the old timeout. Need del_timer for race
- avoidance (may already be dying). */
- if (newtime - ct->timeout.expires >= HZ)
- mod_timer_pending(&ct->timeout, newtime);
- }
+ if (nf_ct_is_confirmed(ct))
+ extra_jiffies += nfct_time_stamp;
+ ct->timeout = extra_jiffies;
acct:
if (do_acct)
nf_ct_acct_update(ct, ctinfo, skb->len);
}
EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
-bool __nf_ct_kill_acct(struct nf_conn *ct,
- enum ip_conntrack_info ctinfo,
- const struct sk_buff *skb,
- int do_acct)
+bool nf_ct_kill_acct(struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ const struct sk_buff *skb)
{
- if (do_acct)
- nf_ct_acct_update(ct, ctinfo, skb->len);
+ nf_ct_acct_update(ct, ctinfo, skb->len);
- if (del_timer(&ct->timeout)) {
- ct->timeout.function((unsigned long)ct);
- return true;
- }
- return false;
+ return nf_ct_delete(ct, 0, 0);
}
-EXPORT_SYMBOL_GPL(__nf_ct_kill_acct);
+EXPORT_SYMBOL_GPL(nf_ct_kill_acct);
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
@@ -1505,11 +1571,8 @@ void nf_ct_iterate_cleanup(struct net *net,
while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) {
/* Time to push up daises... */
- if (del_timer(&ct->timeout))
- nf_ct_delete(ct, portid, report);
-
- /* ... else the timer will get him soon. */
+ nf_ct_delete(ct, portid, report);
nf_ct_put(ct);
cond_resched();
}
@@ -1545,6 +1608,7 @@ static int untrack_refs(void)
void nf_conntrack_cleanup_start(void)
{
+ conntrack_gc_work.exiting = true;
RCU_INIT_POINTER(ip_ct_attach, NULL);
}
@@ -1554,6 +1618,7 @@ void nf_conntrack_cleanup_end(void)
while (untrack_refs() > 0)
schedule();
+ cancel_delayed_work_sync(&conntrack_gc_work.dwork);
nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size);
nf_conntrack_proto_fini();
@@ -1828,6 +1893,10 @@ int nf_conntrack_init_start(void)
}
/* - and look it like as a confirmed connection */
nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);
+
+ conntrack_gc_work_init(&conntrack_gc_work);
+ schedule_delayed_work(&conntrack_gc_work.dwork, GC_INTERVAL);
+
return 0;
err_proto:
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index d28011b42845..da9df2d56e66 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -49,8 +49,13 @@ static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu)
hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) {
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+ struct nf_conntrack_ecache *e;
- if (nf_ct_is_dying(ct))
+ if (!nf_ct_is_confirmed(ct))
+ continue;
+
+ e = nf_ct_ecache_find(ct);
+ if (!e || e->state != NFCT_ECACHE_DESTROY_FAIL)
continue;
if (nf_conntrack_event(IPCT_DESTROY, ct)) {
@@ -58,8 +63,7 @@ static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu)
break;
}
- /* we've got the event delivered, now it's dying */
- set_bit(IPS_DYING_BIT, &ct->status);
+ e->state = NFCT_ECACHE_DESTROY_SENT;
refs[evicted] = ct;
if (++evicted >= ARRAY_SIZE(refs)) {
@@ -130,7 +134,7 @@ int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct,
if (!e)
goto out_unlock;
- if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct)) {
+ if (nf_ct_is_confirmed(ct)) {
struct nf_ct_event item = {
.ct = ct,
.portid = e->portid ? e->portid : portid,
@@ -150,11 +154,13 @@ int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct,
* triggered by a process, we store the PORTID
* to include it in the retransmission.
*/
- if (eventmask & (1 << IPCT_DESTROY) &&
- e->portid == 0 && portid != 0)
- e->portid = portid;
- else
+ if (eventmask & (1 << IPCT_DESTROY)) {
+ if (e->portid == 0 && portid != 0)
+ e->portid = portid;
+ e->state = NFCT_ECACHE_DESTROY_FAIL;
+ } else {
e->missed |= eventmask;
+ }
} else {
e->missed &= ~missed;
}
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 43147005bea3..b6934b5edf7a 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -237,7 +237,7 @@ static int try_eprt(const char *data, size_t dlen, struct nf_conntrack_man *cmd,
}
delim = data[0];
if (isdigit(delim) || delim < 33 || delim > 126 || data[2] != delim) {
- pr_debug("try_eprt: invalid delimitter.\n");
+ pr_debug("try_eprt: invalid delimiter.\n");
return 0;
}
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index fdfc71f416b7..c052b712c49f 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -149,10 +149,7 @@ nla_put_failure:
static int ctnetlink_dump_timeout(struct sk_buff *skb, const struct nf_conn *ct)
{
- long timeout = ((long)ct->timeout.expires - (long)jiffies) / HZ;
-
- if (timeout < 0)
- timeout = 0;
+ long timeout = nf_ct_expires(ct) / HZ;
if (nla_put_be32(skb, CTA_TIMEOUT, htonl(timeout)))
goto nla_put_failure;
@@ -818,14 +815,23 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
struct hlist_nulls_node *n;
struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
u_int8_t l3proto = nfmsg->nfgen_family;
- int res;
+ struct nf_conn *nf_ct_evict[8];
+ int res, i;
spinlock_t *lockp;
last = (struct nf_conn *)cb->args[1];
+ i = 0;
local_bh_disable();
for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) {
restart:
+ while (i) {
+ i--;
+ if (nf_ct_should_gc(nf_ct_evict[i]))
+ nf_ct_kill(nf_ct_evict[i]);
+ nf_ct_put(nf_ct_evict[i]);
+ }
+
lockp = &nf_conntrack_locks[cb->args[0] % CONNTRACK_LOCKS];
nf_conntrack_lock(lockp);
if (cb->args[0] >= nf_conntrack_htable_size) {
@@ -837,6 +843,13 @@ restart:
if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
continue;
ct = nf_ct_tuplehash_to_ctrack(h);
+ if (nf_ct_is_expired(ct)) {
+ if (i < ARRAY_SIZE(nf_ct_evict) &&
+ atomic_inc_not_zero(&ct->ct_general.use))
+ nf_ct_evict[i++] = ct;
+ continue;
+ }
+
if (!net_eq(net, nf_ct_net(ct)))
continue;
@@ -878,6 +891,13 @@ out:
if (last)
nf_ct_put(last);
+ while (i) {
+ i--;
+ if (nf_ct_should_gc(nf_ct_evict[i]))
+ nf_ct_kill(nf_ct_evict[i]);
+ nf_ct_put(nf_ct_evict[i]);
+ }
+
return skb->len;
}
@@ -1147,9 +1167,7 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
}
}
- if (del_timer(&ct->timeout))
- nf_ct_delete(ct, NETLINK_CB(skb).portid, nlmsg_report(nlh));
-
+ nf_ct_delete(ct, NETLINK_CB(skb).portid, nlmsg_report(nlh));
nf_ct_put(ct);
return 0;
@@ -1517,11 +1535,10 @@ static int ctnetlink_change_timeout(struct nf_conn *ct,
{
u_int32_t timeout = ntohl(nla_get_be32(cda[CTA_TIMEOUT]));
- if (!del_timer(&ct->timeout))
- return -ETIME;
+ ct->timeout = nfct_time_stamp + timeout * HZ;
- ct->timeout.expires = jiffies + timeout * HZ;
- add_timer(&ct->timeout);
+ if (test_bit(IPS_DYING_BIT, &ct->status))
+ return -ETIME;
return 0;
}
@@ -1719,9 +1736,8 @@ ctnetlink_create_conntrack(struct net *net,
if (!cda[CTA_TIMEOUT])
goto err1;
- ct->timeout.expires = ntohl(nla_get_be32(cda[CTA_TIMEOUT]));
- ct->timeout.expires = jiffies + ct->timeout.expires * HZ;
+ ct->timeout = nfct_time_stamp + ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ;
rcu_read_lock();
if (cda[CTA_HELP]) {
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 5588c7ae1ac2..f60a4755d71e 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -157,8 +157,7 @@ static int destroy_sibling_or_exp(struct net *net, struct nf_conn *ct,
pr_debug("setting timeout of conntrack %p to 0\n", sibling);
sibling->proto.gre.timeout = 0;
sibling->proto.gre.stream_timeout = 0;
- if (del_timer(&sibling->timeout))
- sibling->timeout.function((unsigned long)sibling);
+ nf_ct_kill(sibling);
nf_ct_put(sibling);
return 1;
} else {
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index b65d5864b6d9..8d2c7d8c666a 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -159,54 +159,6 @@ static int kill_l4proto(struct nf_conn *i, void *data)
nf_ct_l3num(i) == l4proto->l3proto;
}
-static struct nf_ip_net *nf_ct_l3proto_net(struct net *net,
- struct nf_conntrack_l3proto *l3proto)
-{
- if (l3proto->l3proto == PF_INET)
- return &net->ct.nf_ct_proto;
- else
- return NULL;
-}
-
-static int nf_ct_l3proto_register_sysctl(struct net *net,
- struct nf_conntrack_l3proto *l3proto)
-{
- int err = 0;
- struct nf_ip_net *in = nf_ct_l3proto_net(net, l3proto);
- /* nf_conntrack_l3proto_ipv6 doesn't support sysctl */
- if (in == NULL)
- return 0;
-
-#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
- if (in->ctl_table != NULL) {
- err = nf_ct_register_sysctl(net,
- &in->ctl_table_header,
- l3proto->ctl_table_path,
- in->ctl_table);
- if (err < 0) {
- kfree(in->ctl_table);
- in->ctl_table = NULL;
- }
- }
-#endif
- return err;
-}
-
-static void nf_ct_l3proto_unregister_sysctl(struct net *net,
- struct nf_conntrack_l3proto *l3proto)
-{
- struct nf_ip_net *in = nf_ct_l3proto_net(net, l3proto);
-
- if (in == NULL)
- return;
-#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
- if (in->ctl_table_header != NULL)
- nf_ct_unregister_sysctl(&in->ctl_table_header,
- &in->ctl_table,
- 0);
-#endif
-}
-
int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto)
{
int ret = 0;
@@ -241,7 +193,7 @@ EXPORT_SYMBOL_GPL(nf_ct_l3proto_register);
int nf_ct_l3proto_pernet_register(struct net *net,
struct nf_conntrack_l3proto *proto)
{
- int ret = 0;
+ int ret;
if (proto->init_net) {
ret = proto->init_net(net);
@@ -249,7 +201,7 @@ int nf_ct_l3proto_pernet_register(struct net *net,
return ret;
}
- return nf_ct_l3proto_register_sysctl(net, proto);
+ return 0;
}
EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_register);
@@ -272,8 +224,6 @@ EXPORT_SYMBOL_GPL(nf_ct_l3proto_unregister);
void nf_ct_l3proto_pernet_unregister(struct net *net,
struct nf_conntrack_l3proto *proto)
{
- nf_ct_l3proto_unregister_sysctl(net, proto);
-
/* Remove all contrack entries for this protocol */
nf_ct_iterate_cleanup(net, kill_l3proto, proto, 0, 0);
}
@@ -312,26 +262,6 @@ int nf_ct_l4proto_register_sysctl(struct net *net,
}
}
}
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
- if (l4proto->l3proto != AF_INET6 && pn->ctl_compat_table != NULL) {
- if (err < 0) {
- nf_ct_kfree_compat_sysctl_table(pn);
- goto out;
- }
- err = nf_ct_register_sysctl(net,
- &pn->ctl_compat_header,
- "net/ipv4/netfilter",
- pn->ctl_compat_table);
- if (err == 0)
- goto out;
-
- nf_ct_kfree_compat_sysctl_table(pn);
- nf_ct_unregister_sysctl(&pn->ctl_table_header,
- &pn->ctl_table,
- pn->users);
- }
-out:
-#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
#endif /* CONFIG_SYSCTL */
return err;
}
@@ -346,13 +276,6 @@ void nf_ct_l4proto_unregister_sysctl(struct net *net,
nf_ct_unregister_sysctl(&pn->ctl_table_header,
&pn->ctl_table,
pn->users);
-
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
- if (l4proto->l3proto != AF_INET6 && pn->ctl_compat_header != NULL)
- nf_ct_unregister_sysctl(&pn->ctl_compat_header,
- &pn->ctl_compat_table,
- 0);
-#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
#endif /* CONFIG_SYSCTL */
}
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 399a38fd685a..a45bee52dccc 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -402,7 +402,8 @@ static bool dccp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
{
struct dccp_hdr _hdr, *dh;
- dh = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+ /* Actually only need first 4 bytes to get ports. */
+ dh = skb_header_pointer(skb, dataoff, 4, &_hdr);
if (dh == NULL)
return false;
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index 86dc752e5349..d5868bad33a7 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -151,17 +151,6 @@ static struct ctl_table generic_sysctl_table[] = {
},
{ }
};
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
-static struct ctl_table generic_compat_sysctl_table[] = {
- {
- .procname = "ip_conntrack_generic_timeout",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- { }
-};
-#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
#endif /* CONFIG_SYSCTL */
static int generic_kmemdup_sysctl_table(struct nf_proto_net *pn,
@@ -179,40 +168,14 @@ static int generic_kmemdup_sysctl_table(struct nf_proto_net *pn,
return 0;
}
-static int generic_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
- struct nf_generic_net *gn)
-{
-#ifdef CONFIG_SYSCTL
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
- pn->ctl_compat_table = kmemdup(generic_compat_sysctl_table,
- sizeof(generic_compat_sysctl_table),
- GFP_KERNEL);
- if (!pn->ctl_compat_table)
- return -ENOMEM;
-
- pn->ctl_compat_table[0].data = &gn->timeout;
-#endif
-#endif
- return 0;
-}
-
static int generic_init_net(struct net *net, u_int16_t proto)
{
- int ret;
struct nf_generic_net *gn = generic_pernet(net);
struct nf_proto_net *pn = &gn->pn;
gn->timeout = nf_ct_generic_timeout;
- ret = generic_kmemdup_compat_sysctl_table(pn, gn);
- if (ret < 0)
- return ret;
-
- ret = generic_kmemdup_sysctl_table(pn, gn);
- if (ret < 0)
- nf_ct_kfree_compat_sysctl_table(pn);
-
- return ret;
+ return generic_kmemdup_sysctl_table(pn, gn);
}
static struct nf_proto_net *generic_get_net_proto(struct net *net)
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 1d7ab960a9e6..982ea62606c7 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -161,8 +161,8 @@ static bool sctp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
const struct sctphdr *hp;
struct sctphdr _hdr;
- /* Actually only need first 8 bytes. */
- hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
+ /* Actually only need first 4 bytes to get ports. */
+ hp = skb_header_pointer(skb, dataoff, 4, &_hdr);
if (hp == NULL)
return false;
@@ -705,54 +705,6 @@ static struct ctl_table sctp_sysctl_table[] = {
},
{ }
};
-
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
-static struct ctl_table sctp_compat_sysctl_table[] = {
- {
- .procname = "ip_conntrack_sctp_timeout_closed",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_sctp_timeout_cookie_wait",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_sctp_timeout_cookie_echoed",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_sctp_timeout_established",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_sctp_timeout_shutdown_sent",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_sctp_timeout_shutdown_recd",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_sctp_timeout_shutdown_ack_sent",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- { }
-};
-#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
#endif
static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn,
@@ -781,32 +733,8 @@ static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn,
return 0;
}
-static int sctp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
- struct sctp_net *sn)
-{
-#ifdef CONFIG_SYSCTL
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
- pn->ctl_compat_table = kmemdup(sctp_compat_sysctl_table,
- sizeof(sctp_compat_sysctl_table),
- GFP_KERNEL);
- if (!pn->ctl_compat_table)
- return -ENOMEM;
-
- pn->ctl_compat_table[0].data = &sn->timeouts[SCTP_CONNTRACK_CLOSED];
- pn->ctl_compat_table[1].data = &sn->timeouts[SCTP_CONNTRACK_COOKIE_WAIT];
- pn->ctl_compat_table[2].data = &sn->timeouts[SCTP_CONNTRACK_COOKIE_ECHOED];
- pn->ctl_compat_table[3].data = &sn->timeouts[SCTP_CONNTRACK_ESTABLISHED];
- pn->ctl_compat_table[4].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_SENT];
- pn->ctl_compat_table[5].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_RECD];
- pn->ctl_compat_table[6].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT];
-#endif
-#endif
- return 0;
-}
-
static int sctp_init_net(struct net *net, u_int16_t proto)
{
- int ret;
struct sctp_net *sn = sctp_pernet(net);
struct nf_proto_net *pn = &sn->pn;
@@ -817,18 +745,7 @@ static int sctp_init_net(struct net *net, u_int16_t proto)
sn->timeouts[i] = sctp_timeouts[i];
}
- if (proto == AF_INET) {
- ret = sctp_kmemdup_compat_sysctl_table(pn, sn);
- if (ret < 0)
- return ret;
-
- ret = sctp_kmemdup_sysctl_table(pn, sn);
- if (ret < 0)
- nf_ct_kfree_compat_sysctl_table(pn);
- } else
- ret = sctp_kmemdup_sysctl_table(pn, sn);
-
- return ret;
+ return sctp_kmemdup_sysctl_table(pn, sn);
}
static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 70c8381641a7..69f687740c76 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -282,8 +282,8 @@ static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
const struct tcphdr *hp;
struct tcphdr _hdr;
- /* Actually only need first 8 bytes. */
- hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
+ /* Actually only need first 4 bytes to get ports. */
+ hp = skb_header_pointer(skb, dataoff, 4, &_hdr);
if (hp == NULL)
return false;
@@ -1481,90 +1481,6 @@ static struct ctl_table tcp_sysctl_table[] = {
},
{ }
};
-
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
-static struct ctl_table tcp_compat_sysctl_table[] = {
- {
- .procname = "ip_conntrack_tcp_timeout_syn_sent",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_tcp_timeout_syn_sent2",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_tcp_timeout_syn_recv",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_tcp_timeout_established",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_tcp_timeout_fin_wait",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_tcp_timeout_close_wait",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_tcp_timeout_last_ack",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_tcp_timeout_time_wait",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_tcp_timeout_close",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_tcp_timeout_max_retrans",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_tcp_loose",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "ip_conntrack_tcp_be_liberal",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "ip_conntrack_tcp_max_retrans",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- { }
-};
-#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
#endif /* CONFIG_SYSCTL */
static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn,
@@ -1597,38 +1513,8 @@ static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn,
return 0;
}
-static int tcp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
- struct nf_tcp_net *tn)
-{
-#ifdef CONFIG_SYSCTL
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
- pn->ctl_compat_table = kmemdup(tcp_compat_sysctl_table,
- sizeof(tcp_compat_sysctl_table),
- GFP_KERNEL);
- if (!pn->ctl_compat_table)
- return -ENOMEM;
-
- pn->ctl_compat_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT];
- pn->ctl_compat_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT2];
- pn->ctl_compat_table[2].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV];
- pn->ctl_compat_table[3].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
- pn->ctl_compat_table[4].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT];
- pn->ctl_compat_table[5].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT];
- pn->ctl_compat_table[6].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK];
- pn->ctl_compat_table[7].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT];
- pn->ctl_compat_table[8].data = &tn->timeouts[TCP_CONNTRACK_CLOSE];
- pn->ctl_compat_table[9].data = &tn->timeouts[TCP_CONNTRACK_RETRANS];
- pn->ctl_compat_table[10].data = &tn->tcp_loose;
- pn->ctl_compat_table[11].data = &tn->tcp_be_liberal;
- pn->ctl_compat_table[12].data = &tn->tcp_max_retrans;
-#endif
-#endif
- return 0;
-}
-
static int tcp_init_net(struct net *net, u_int16_t proto)
{
- int ret;
struct nf_tcp_net *tn = tcp_pernet(net);
struct nf_proto_net *pn = &tn->pn;
@@ -1643,18 +1529,7 @@ static int tcp_init_net(struct net *net, u_int16_t proto)
tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
}
- if (proto == AF_INET) {
- ret = tcp_kmemdup_compat_sysctl_table(pn, tn);
- if (ret < 0)
- return ret;
-
- ret = tcp_kmemdup_sysctl_table(pn, tn);
- if (ret < 0)
- nf_ct_kfree_compat_sysctl_table(pn);
- } else
- ret = tcp_kmemdup_sysctl_table(pn, tn);
-
- return ret;
+ return tcp_kmemdup_sysctl_table(pn, tn);
}
static struct nf_proto_net *tcp_get_net_proto(struct net *net)
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 4fd040575ffe..20f35ed68030 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -44,8 +44,8 @@ static bool udp_pkt_to_tuple(const struct sk_buff *skb,
const struct udphdr *hp;
struct udphdr _hdr;
- /* Actually only need first 8 bytes. */
- hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+ /* Actually only need first 4 bytes to get ports. */
+ hp = skb_header_pointer(skb, dataoff, 4, &_hdr);
if (hp == NULL)
return false;
@@ -218,23 +218,6 @@ static struct ctl_table udp_sysctl_table[] = {
},
{ }
};
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
-static struct ctl_table udp_compat_sysctl_table[] = {
- {
- .procname = "ip_conntrack_udp_timeout",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_udp_timeout_stream",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- { }
-};
-#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
#endif /* CONFIG_SYSCTL */
static int udp_kmemdup_sysctl_table(struct nf_proto_net *pn,
@@ -254,27 +237,8 @@ static int udp_kmemdup_sysctl_table(struct nf_proto_net *pn,
return 0;
}
-static int udp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
- struct nf_udp_net *un)
-{
-#ifdef CONFIG_SYSCTL
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
- pn->ctl_compat_table = kmemdup(udp_compat_sysctl_table,
- sizeof(udp_compat_sysctl_table),
- GFP_KERNEL);
- if (!pn->ctl_compat_table)
- return -ENOMEM;
-
- pn->ctl_compat_table[0].data = &un->timeouts[UDP_CT_UNREPLIED];
- pn->ctl_compat_table[1].data = &un->timeouts[UDP_CT_REPLIED];
-#endif
-#endif
- return 0;
-}
-
static int udp_init_net(struct net *net, u_int16_t proto)
{
- int ret;
struct nf_udp_net *un = udp_pernet(net);
struct nf_proto_net *pn = &un->pn;
@@ -285,18 +249,7 @@ static int udp_init_net(struct net *net, u_int16_t proto)
un->timeouts[i] = udp_timeouts[i];
}
- if (proto == AF_INET) {
- ret = udp_kmemdup_compat_sysctl_table(pn, un);
- if (ret < 0)
- return ret;
-
- ret = udp_kmemdup_sysctl_table(pn, un);
- if (ret < 0)
- nf_ct_kfree_compat_sysctl_table(pn);
- } else
- ret = udp_kmemdup_sysctl_table(pn, un);
-
- return ret;
+ return udp_kmemdup_sysctl_table(pn, un);
}
static struct nf_proto_net *udp_get_net_proto(struct net *net)
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index 9d692f5adb94..029206e8dec4 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -54,7 +54,8 @@ static bool udplite_pkt_to_tuple(const struct sk_buff *skb,
const struct udphdr *hp;
struct udphdr _hdr;
- hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+ /* Actually only need first 4 bytes to get ports. */
+ hp = skb_header_pointer(skb, dataoff, 4, &_hdr);
if (hp == NULL)
return false;
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 9f267c3ffb39..3d9a316a3c77 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -228,8 +228,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
seq_printf(s, "%-8s %u %-8s %u %ld ",
l3proto->name, nf_ct_l3num(ct),
l4proto->name, nf_ct_protonum(ct),
- timer_pending(&ct->timeout)
- ? (long)(ct->timeout.expires - jiffies)/HZ : 0);
+ nf_ct_expires(ct) / HZ);
if (l4proto->print_conntrack)
l4proto->print_conntrack(s, ct);
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index aa5847a16713..30a17d649a83 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -39,12 +39,12 @@ static struct nf_logger *__find_logger(int pf, const char *str_logger)
return NULL;
}
-void nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger)
+int nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger)
{
const struct nf_logger *log;
- if (pf == NFPROTO_UNSPEC)
- return;
+ if (pf == NFPROTO_UNSPEC || pf >= ARRAY_SIZE(net->nf.nf_loggers))
+ return -EOPNOTSUPP;
mutex_lock(&nf_log_mutex);
log = nft_log_dereference(net->nf.nf_loggers[pf]);
@@ -52,6 +52,8 @@ void nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger)
rcu_assign_pointer(net->nf.nf_loggers[pf], logger);
mutex_unlock(&nf_log_mutex);
+
+ return 0;
}
EXPORT_SYMBOL(nf_log_set);
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index de31818417b8..81ae41f85d3a 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -565,16 +565,10 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
* Else, when the conntrack is destoyed, nf_nat_cleanup_conntrack()
* will delete entry from already-freed table.
*/
- if (!del_timer(&ct->timeout))
- return 1;
-
ct->status &= ~IPS_NAT_DONE_MASK;
-
rhashtable_remove_fast(&nf_nat_bysource_table, &ct->nat_bysource,
nf_nat_bysource_params);
- add_timer(&ct->timeout);
-
/* don't delete conntrack. Although that would make things a lot
* simpler, we'd end up flushing all conntracks on nat rmmod.
*/
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 7e1c876c7608..bd9715e5ff26 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1196,6 +1196,83 @@ static void nf_tables_chain_destroy(struct nft_chain *chain)
}
}
+struct nft_chain_hook {
+ u32 num;
+ u32 priority;
+ const struct nf_chain_type *type;
+ struct net_device *dev;
+};
+
+static int nft_chain_parse_hook(struct net *net,
+ const struct nlattr * const nla[],
+ struct nft_af_info *afi,
+ struct nft_chain_hook *hook, bool create)
+{
+ struct nlattr *ha[NFTA_HOOK_MAX + 1];
+ const struct nf_chain_type *type;
+ struct net_device *dev;
+ int err;
+
+ err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK],
+ nft_hook_policy);
+ if (err < 0)
+ return err;
+
+ if (ha[NFTA_HOOK_HOOKNUM] == NULL ||
+ ha[NFTA_HOOK_PRIORITY] == NULL)
+ return -EINVAL;
+
+ hook->num = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM]));
+ if (hook->num >= afi->nhooks)
+ return -EINVAL;
+
+ hook->priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY]));
+
+ type = chain_type[afi->family][NFT_CHAIN_T_DEFAULT];
+ if (nla[NFTA_CHAIN_TYPE]) {
+ type = nf_tables_chain_type_lookup(afi, nla[NFTA_CHAIN_TYPE],
+ create);
+ if (IS_ERR(type))
+ return PTR_ERR(type);
+ }
+ if (!(type->hook_mask & (1 << hook->num)))
+ return -EOPNOTSUPP;
+ if (!try_module_get(type->owner))
+ return -ENOENT;
+
+ hook->type = type;
+
+ hook->dev = NULL;
+ if (afi->flags & NFT_AF_NEEDS_DEV) {
+ char ifname[IFNAMSIZ];
+
+ if (!ha[NFTA_HOOK_DEV]) {
+ module_put(type->owner);
+ return -EOPNOTSUPP;
+ }
+
+ nla_strlcpy(ifname, ha[NFTA_HOOK_DEV], IFNAMSIZ);
+ dev = dev_get_by_name(net, ifname);
+ if (!dev) {
+ module_put(type->owner);
+ return -ENOENT;
+ }
+ hook->dev = dev;
+ } else if (ha[NFTA_HOOK_DEV]) {
+ module_put(type->owner);
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static void nft_chain_release_hook(struct nft_chain_hook *hook)
+{
+ module_put(hook->type->owner);
+ if (hook->dev != NULL)
+ dev_put(hook->dev);
+}
+
static int nf_tables_newchain(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
@@ -1206,10 +1283,8 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
struct nft_table *table;
struct nft_chain *chain;
struct nft_base_chain *basechain = NULL;
- struct nlattr *ha[NFTA_HOOK_MAX + 1];
u8 genmask = nft_genmask_next(net);
int family = nfmsg->nfgen_family;
- struct net_device *dev = NULL;
u8 policy = NF_ACCEPT;
u64 handle = 0;
unsigned int i;
@@ -1273,6 +1348,37 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
if (nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
+ if (nla[NFTA_CHAIN_HOOK]) {
+ struct nft_base_chain *basechain;
+ struct nft_chain_hook hook;
+ struct nf_hook_ops *ops;
+
+ if (!(chain->flags & NFT_BASE_CHAIN))
+ return -EBUSY;
+
+ err = nft_chain_parse_hook(net, nla, afi, &hook,
+ create);
+ if (err < 0)
+ return err;
+
+ basechain = nft_base_chain(chain);
+ if (basechain->type != hook.type) {
+ nft_chain_release_hook(&hook);
+ return -EBUSY;
+ }
+
+ for (i = 0; i < afi->nops; i++) {
+ ops = &basechain->ops[i];
+ if (ops->hooknum != hook.num ||
+ ops->priority != hook.priority ||
+ ops->dev != hook.dev) {
+ nft_chain_release_hook(&hook);
+ return -EBUSY;
+ }
+ }
+ nft_chain_release_hook(&hook);
+ }
+
if (nla[NFTA_CHAIN_HANDLE] && name) {
struct nft_chain *chain2;
@@ -1320,102 +1426,53 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
return -EOVERFLOW;
if (nla[NFTA_CHAIN_HOOK]) {
- const struct nf_chain_type *type;
+ struct nft_chain_hook hook;
struct nf_hook_ops *ops;
nf_hookfn *hookfn;
- u32 hooknum, priority;
-
- type = chain_type[family][NFT_CHAIN_T_DEFAULT];
- if (nla[NFTA_CHAIN_TYPE]) {
- type = nf_tables_chain_type_lookup(afi,
- nla[NFTA_CHAIN_TYPE],
- create);
- if (IS_ERR(type))
- return PTR_ERR(type);
- }
- err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK],
- nft_hook_policy);
+ err = nft_chain_parse_hook(net, nla, afi, &hook, create);
if (err < 0)
return err;
- if (ha[NFTA_HOOK_HOOKNUM] == NULL ||
- ha[NFTA_HOOK_PRIORITY] == NULL)
- return -EINVAL;
-
- hooknum = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM]));
- if (hooknum >= afi->nhooks)
- return -EINVAL;
- priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY]));
-
- if (!(type->hook_mask & (1 << hooknum)))
- return -EOPNOTSUPP;
- if (!try_module_get(type->owner))
- return -ENOENT;
- hookfn = type->hooks[hooknum];
-
- if (afi->flags & NFT_AF_NEEDS_DEV) {
- char ifname[IFNAMSIZ];
-
- if (!ha[NFTA_HOOK_DEV]) {
- module_put(type->owner);
- return -EOPNOTSUPP;
- }
-
- nla_strlcpy(ifname, ha[NFTA_HOOK_DEV], IFNAMSIZ);
- dev = dev_get_by_name(net, ifname);
- if (!dev) {
- module_put(type->owner);
- return -ENOENT;
- }
- } else if (ha[NFTA_HOOK_DEV]) {
- module_put(type->owner);
- return -EOPNOTSUPP;
- }
basechain = kzalloc(sizeof(*basechain), GFP_KERNEL);
if (basechain == NULL) {
- module_put(type->owner);
- if (dev != NULL)
- dev_put(dev);
+ nft_chain_release_hook(&hook);
return -ENOMEM;
}
- if (dev != NULL)
- strncpy(basechain->dev_name, dev->name, IFNAMSIZ);
+ if (hook.dev != NULL)
+ strncpy(basechain->dev_name, hook.dev->name, IFNAMSIZ);
if (nla[NFTA_CHAIN_COUNTERS]) {
stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
if (IS_ERR(stats)) {
- module_put(type->owner);
+ nft_chain_release_hook(&hook);
kfree(basechain);
- if (dev != NULL)
- dev_put(dev);
return PTR_ERR(stats);
}
basechain->stats = stats;
} else {
stats = netdev_alloc_pcpu_stats(struct nft_stats);
if (stats == NULL) {
- module_put(type->owner);
+ nft_chain_release_hook(&hook);
kfree(basechain);
- if (dev != NULL)
- dev_put(dev);
return -ENOMEM;
}
rcu_assign_pointer(basechain->stats, stats);
}
- basechain->type = type;
+ hookfn = hook.type->hooks[hook.num];
+ basechain->type = hook.type;
chain = &basechain->chain;
for (i = 0; i < afi->nops; i++) {
ops = &basechain->ops[i];
ops->pf = family;
- ops->hooknum = hooknum;
- ops->priority = priority;
+ ops->hooknum = hook.num;
+ ops->priority = hook.priority;
ops->priv = chain;
ops->hook = afi->hooks[ops->hooknum];
- ops->dev = dev;
+ ops->dev = hook.dev;
if (hookfn)
ops->hook = hookfn;
if (afi->hook_ops_init)
@@ -3426,12 +3483,12 @@ static int nft_setelem_parse_flags(const struct nft_set *set,
}
static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
- const struct nlattr *attr)
+ const struct nlattr *attr, u32 nlmsg_flags)
{
struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
struct nft_data_desc d1, d2;
struct nft_set_ext_tmpl tmpl;
- struct nft_set_ext *ext;
+ struct nft_set_ext *ext, *ext2;
struct nft_set_elem elem;
struct nft_set_binding *binding;
struct nft_userdata *udata;
@@ -3558,9 +3615,19 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
goto err4;
ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK;
- err = set->ops->insert(ctx->net, set, &elem);
- if (err < 0)
+ err = set->ops->insert(ctx->net, set, &elem, &ext2);
+ if (err) {
+ if (err == -EEXIST) {
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) &&
+ nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) &&
+ memcmp(nft_set_ext_data(ext),
+ nft_set_ext_data(ext2), set->dlen) != 0)
+ err = -EBUSY;
+ else if (!(nlmsg_flags & NLM_F_EXCL))
+ err = 0;
+ }
goto err5;
+ }
nft_trans_elem(trans) = elem;
list_add_tail(&trans->list, &ctx->net->nft.commit_list);
@@ -3616,7 +3683,7 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
!atomic_add_unless(&set->nelems, 1, set->size + set->ndeact))
return -ENFILE;
- err = nft_add_set_elem(&ctx, set, attr);
+ err = nft_add_set_elem(&ctx, set, attr, nlh->nlmsg_flags);
if (err < 0) {
atomic_dec(&set->nelems);
break;
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 564fa7929ed5..764251d31e46 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -1,395 +1,136 @@
/*
- * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2016 Laura Garcia <nevola@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
*/
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
-#include <linux/list.h>
-#include <linux/log2.h>
-#include <linux/jhash.h>
#include <linux/netlink.h>
-#include <linux/workqueue.h>
-#include <linux/rhashtable.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
-
-/* We target a hash table size of 4, element hint is 75% of final size */
-#define NFT_HASH_ELEMENT_HINT 3
+#include <net/netfilter/nf_tables_core.h>
+#include <linux/jhash.h>
struct nft_hash {
- struct rhashtable ht;
- struct delayed_work gc_work;
-};
-
-struct nft_hash_elem {
- struct rhash_head node;
- struct nft_set_ext ext;
+ enum nft_registers sreg:8;
+ enum nft_registers dreg:8;
+ u8 len;
+ u32 modulus;
+ u32 seed;
};
-struct nft_hash_cmp_arg {
- const struct nft_set *set;
- const u32 *key;
- u8 genmask;
-};
-
-static const struct rhashtable_params nft_hash_params;
-
-static inline u32 nft_hash_key(const void *data, u32 len, u32 seed)
-{
- const struct nft_hash_cmp_arg *arg = data;
-
- return jhash(arg->key, len, seed);
-}
-
-static inline u32 nft_hash_obj(const void *data, u32 len, u32 seed)
-{
- const struct nft_hash_elem *he = data;
-
- return jhash(nft_set_ext_key(&he->ext), len, seed);
-}
-
-static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg,
- const void *ptr)
-{
- const struct nft_hash_cmp_arg *x = arg->key;
- const struct nft_hash_elem *he = ptr;
-
- if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen))
- return 1;
- if (nft_set_elem_expired(&he->ext))
- return 1;
- if (!nft_set_elem_active(&he->ext, x->genmask))
- return 1;
- return 0;
-}
-
-static bool nft_hash_lookup(const struct net *net, const struct nft_set *set,
- const u32 *key, const struct nft_set_ext **ext)
-{
- struct nft_hash *priv = nft_set_priv(set);
- const struct nft_hash_elem *he;
- struct nft_hash_cmp_arg arg = {
- .genmask = nft_genmask_cur(net),
- .set = set,
- .key = key,
- };
-
- he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
- if (he != NULL)
- *ext = &he->ext;
-
- return !!he;
-}
-
-static bool nft_hash_update(struct nft_set *set, const u32 *key,
- void *(*new)(struct nft_set *,
- const struct nft_expr *,
- struct nft_regs *regs),
- const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_set_ext **ext)
-{
- struct nft_hash *priv = nft_set_priv(set);
- struct nft_hash_elem *he;
- struct nft_hash_cmp_arg arg = {
- .genmask = NFT_GENMASK_ANY,
- .set = set,
- .key = key,
- };
-
- he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
- if (he != NULL)
- goto out;
-
- he = new(set, expr, regs);
- if (he == NULL)
- goto err1;
- if (rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node,
- nft_hash_params))
- goto err2;
-out:
- *ext = &he->ext;
- return true;
-
-err2:
- nft_set_elem_destroy(set, he);
-err1:
- return false;
-}
-
-static int nft_hash_insert(const struct net *net, const struct nft_set *set,
- const struct nft_set_elem *elem)
-{
- struct nft_hash *priv = nft_set_priv(set);
- struct nft_hash_elem *he = elem->priv;
- struct nft_hash_cmp_arg arg = {
- .genmask = nft_genmask_next(net),
- .set = set,
- .key = elem->key.val.data,
- };
-
- return rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node,
- nft_hash_params);
-}
-
-static void nft_hash_activate(const struct net *net, const struct nft_set *set,
- const struct nft_set_elem *elem)
+static void nft_hash_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
{
- struct nft_hash_elem *he = elem->priv;
+ struct nft_hash *priv = nft_expr_priv(expr);
+ const void *data = &regs->data[priv->sreg];
- nft_set_elem_change_active(net, set, &he->ext);
- nft_set_elem_clear_busy(&he->ext);
+ regs->data[priv->dreg] =
+ reciprocal_scale(jhash(data, priv->len, priv->seed),
+ priv->modulus);
}
-static void *nft_hash_deactivate(const struct net *net,
- const struct nft_set *set,
- const struct nft_set_elem *elem)
-{
- struct nft_hash *priv = nft_set_priv(set);
- struct nft_hash_elem *he;
- struct nft_hash_cmp_arg arg = {
- .genmask = nft_genmask_next(net),
- .set = set,
- .key = elem->key.val.data,
- };
-
- rcu_read_lock();
- he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
- if (he != NULL) {
- if (!nft_set_elem_mark_busy(&he->ext) ||
- !nft_is_active(net, &he->ext))
- nft_set_elem_change_active(net, set, &he->ext);
- else
- he = NULL;
- }
- rcu_read_unlock();
-
- return he;
-}
-
-static void nft_hash_remove(const struct nft_set *set,
- const struct nft_set_elem *elem)
-{
- struct nft_hash *priv = nft_set_priv(set);
- struct nft_hash_elem *he = elem->priv;
-
- rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params);
-}
-
-static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
- struct nft_set_iter *iter)
-{
- struct nft_hash *priv = nft_set_priv(set);
- struct nft_hash_elem *he;
- struct rhashtable_iter hti;
- struct nft_set_elem elem;
- int err;
-
- err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL);
- iter->err = err;
- if (err)
- return;
-
- err = rhashtable_walk_start(&hti);
- if (err && err != -EAGAIN) {
- iter->err = err;
- goto out;
- }
-
- while ((he = rhashtable_walk_next(&hti))) {
- if (IS_ERR(he)) {
- err = PTR_ERR(he);
- if (err != -EAGAIN) {
- iter->err = err;
- goto out;
- }
-
- continue;
- }
-
- if (iter->count < iter->skip)
- goto cont;
- if (nft_set_elem_expired(&he->ext))
- goto cont;
- if (!nft_set_elem_active(&he->ext, iter->genmask))
- goto cont;
-
- elem.priv = he;
-
- iter->err = iter->fn(ctx, set, iter, &elem);
- if (iter->err < 0)
- goto out;
-
-cont:
- iter->count++;
- }
-
-out:
- rhashtable_walk_stop(&hti);
- rhashtable_walk_exit(&hti);
-}
-
-static void nft_hash_gc(struct work_struct *work)
-{
- struct nft_set *set;
- struct nft_hash_elem *he;
- struct nft_hash *priv;
- struct nft_set_gc_batch *gcb = NULL;
- struct rhashtable_iter hti;
- int err;
-
- priv = container_of(work, struct nft_hash, gc_work.work);
- set = nft_set_container_of(priv);
-
- err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL);
- if (err)
- goto schedule;
-
- err = rhashtable_walk_start(&hti);
- if (err && err != -EAGAIN)
- goto out;
-
- while ((he = rhashtable_walk_next(&hti))) {
- if (IS_ERR(he)) {
- if (PTR_ERR(he) != -EAGAIN)
- goto out;
- continue;
- }
-
- if (!nft_set_elem_expired(&he->ext))
- continue;
- if (nft_set_elem_mark_busy(&he->ext))
- continue;
-
- gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
- if (gcb == NULL)
- goto out;
- rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params);
- atomic_dec(&set->nelems);
- nft_set_gc_batch_add(gcb, he);
- }
-out:
- rhashtable_walk_stop(&hti);
- rhashtable_walk_exit(&hti);
-
- nft_set_gc_batch_complete(gcb);
-schedule:
- queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
- nft_set_gc_interval(set));
-}
-
-static unsigned int nft_hash_privsize(const struct nlattr * const nla[])
-{
- return sizeof(struct nft_hash);
-}
-
-static const struct rhashtable_params nft_hash_params = {
- .head_offset = offsetof(struct nft_hash_elem, node),
- .hashfn = nft_hash_key,
- .obj_hashfn = nft_hash_obj,
- .obj_cmpfn = nft_hash_cmp,
- .automatic_shrinking = true,
+static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = {
+ [NFTA_HASH_SREG] = { .type = NLA_U32 },
+ [NFTA_HASH_DREG] = { .type = NLA_U32 },
+ [NFTA_HASH_LEN] = { .type = NLA_U32 },
+ [NFTA_HASH_MODULUS] = { .type = NLA_U32 },
+ [NFTA_HASH_SEED] = { .type = NLA_U32 },
};
-static int nft_hash_init(const struct nft_set *set,
- const struct nft_set_desc *desc,
+static int nft_hash_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
const struct nlattr * const tb[])
{
- struct nft_hash *priv = nft_set_priv(set);
- struct rhashtable_params params = nft_hash_params;
- int err;
+ struct nft_hash *priv = nft_expr_priv(expr);
+ u32 len;
- params.nelem_hint = desc->size ?: NFT_HASH_ELEMENT_HINT;
- params.key_len = set->klen;
+ if (!tb[NFTA_HASH_SREG] ||
+ !tb[NFTA_HASH_DREG] ||
+ !tb[NFTA_HASH_LEN] ||
+ !tb[NFTA_HASH_SEED] ||
+ !tb[NFTA_HASH_MODULUS])
+ return -EINVAL;
- err = rhashtable_init(&priv->ht, &params);
- if (err < 0)
- return err;
+ priv->sreg = nft_parse_register(tb[NFTA_HASH_SREG]);
+ priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]);
- INIT_DEFERRABLE_WORK(&priv->gc_work, nft_hash_gc);
- if (set->flags & NFT_SET_TIMEOUT)
- queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
- nft_set_gc_interval(set));
- return 0;
-}
+ len = ntohl(nla_get_be32(tb[NFTA_HASH_LEN]));
+ if (len == 0 || len > U8_MAX)
+ return -ERANGE;
-static void nft_hash_elem_destroy(void *ptr, void *arg)
-{
- nft_set_elem_destroy((const struct nft_set *)arg, ptr);
-}
+ priv->len = len;
-static void nft_hash_destroy(const struct nft_set *set)
-{
- struct nft_hash *priv = nft_set_priv(set);
+ priv->modulus = ntohl(nla_get_be32(tb[NFTA_HASH_MODULUS]));
+ if (priv->modulus <= 1)
+ return -ERANGE;
- cancel_delayed_work_sync(&priv->gc_work);
- rhashtable_free_and_destroy(&priv->ht, nft_hash_elem_destroy,
- (void *)set);
+ priv->seed = ntohl(nla_get_be32(tb[NFTA_HASH_SEED]));
+
+ return nft_validate_register_load(priv->sreg, len) &&
+ nft_validate_register_store(ctx, priv->dreg, NULL,
+ NFT_DATA_VALUE, sizeof(u32));
}
-static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
- struct nft_set_estimate *est)
+static int nft_hash_dump(struct sk_buff *skb,
+ const struct nft_expr *expr)
{
- unsigned int esize;
+ const struct nft_hash *priv = nft_expr_priv(expr);
- esize = sizeof(struct nft_hash_elem);
- if (desc->size) {
- est->size = sizeof(struct nft_hash) +
- roundup_pow_of_two(desc->size * 4 / 3) *
- sizeof(struct nft_hash_elem *) +
- desc->size * esize;
- } else {
- /* Resizing happens when the load drops below 30% or goes
- * above 75%. The average of 52.5% load (approximated by 50%)
- * is used for the size estimation of the hash buckets,
- * meaning we calculate two buckets per element.
- */
- est->size = esize + 2 * sizeof(struct nft_hash_elem *);
- }
+ if (nft_dump_register(skb, NFTA_HASH_SREG, priv->sreg))
+ goto nla_put_failure;
+ if (nft_dump_register(skb, NFTA_HASH_DREG, priv->dreg))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_HASH_LEN, htonl(priv->len)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_HASH_MODULUS, htonl(priv->modulus)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_HASH_SEED, htonl(priv->seed)))
+ goto nla_put_failure;
- est->class = NFT_SET_CLASS_O_1;
+ return 0;
- return true;
+nla_put_failure:
+ return -1;
}
-static struct nft_set_ops nft_hash_ops __read_mostly = {
- .privsize = nft_hash_privsize,
- .elemsize = offsetof(struct nft_hash_elem, ext),
- .estimate = nft_hash_estimate,
+static struct nft_expr_type nft_hash_type;
+static const struct nft_expr_ops nft_hash_ops = {
+ .type = &nft_hash_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_hash)),
+ .eval = nft_hash_eval,
.init = nft_hash_init,
- .destroy = nft_hash_destroy,
- .insert = nft_hash_insert,
- .activate = nft_hash_activate,
- .deactivate = nft_hash_deactivate,
- .remove = nft_hash_remove,
- .lookup = nft_hash_lookup,
- .update = nft_hash_update,
- .walk = nft_hash_walk,
- .features = NFT_SET_MAP | NFT_SET_TIMEOUT,
+ .dump = nft_hash_dump,
+};
+
+static struct nft_expr_type nft_hash_type __read_mostly = {
+ .name = "hash",
+ .ops = &nft_hash_ops,
+ .policy = nft_hash_policy,
+ .maxattr = NFTA_HASH_MAX,
.owner = THIS_MODULE,
};
static int __init nft_hash_module_init(void)
{
- return nft_register_set(&nft_hash_ops);
+ return nft_register_expr(&nft_hash_type);
}
static void __exit nft_hash_module_exit(void)
{
- nft_unregister_set(&nft_hash_ops);
+ nft_unregister_expr(&nft_hash_type);
}
module_init(nft_hash_module_init);
module_exit(nft_hash_module_exit);
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_SET();
+MODULE_AUTHOR("Laura Garcia <nevola@gmail.com>");
+MODULE_ALIAS_NFT_EXPR("hash");
diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c
new file mode 100644
index 000000000000..294745ecb0fc
--- /dev/null
+++ b/net/netfilter/nft_numgen.c
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2016 Laura Garcia <nevola@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <linux/static_key.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+
+static DEFINE_PER_CPU(struct rnd_state, nft_numgen_prandom_state);
+
+struct nft_ng_inc {
+ enum nft_registers dreg:8;
+ u32 until;
+ atomic_t counter;
+};
+
+static void nft_ng_inc_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_ng_inc *priv = nft_expr_priv(expr);
+ u32 nval, oval;
+
+ do {
+ oval = atomic_read(&priv->counter);
+ nval = (oval + 1 < priv->until) ? oval + 1 : 0;
+ } while (atomic_cmpxchg(&priv->counter, oval, nval) != oval);
+
+ memcpy(&regs->data[priv->dreg], &priv->counter, sizeof(u32));
+}
+
+static const struct nla_policy nft_ng_policy[NFTA_NG_MAX + 1] = {
+ [NFTA_NG_DREG] = { .type = NLA_U32 },
+ [NFTA_NG_UNTIL] = { .type = NLA_U32 },
+ [NFTA_NG_TYPE] = { .type = NLA_U32 },
+};
+
+static int nft_ng_inc_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_ng_inc *priv = nft_expr_priv(expr);
+
+ priv->until = ntohl(nla_get_be32(tb[NFTA_NG_UNTIL]));
+ if (priv->until == 0)
+ return -ERANGE;
+
+ priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]);
+ atomic_set(&priv->counter, 0);
+
+ return nft_validate_register_store(ctx, priv->dreg, NULL,
+ NFT_DATA_VALUE, sizeof(u32));
+}
+
+static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg,
+ u32 until, enum nft_ng_types type)
+{
+ if (nft_dump_register(skb, NFTA_NG_DREG, dreg))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_NG_UNTIL, htonl(until)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_NG_TYPE, htonl(type)))
+ goto nla_put_failure;
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static int nft_ng_inc_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_ng_inc *priv = nft_expr_priv(expr);
+
+ return nft_ng_dump(skb, priv->dreg, priv->until, NFT_NG_INCREMENTAL);
+}
+
+struct nft_ng_random {
+ enum nft_registers dreg:8;
+ u32 until;
+};
+
+static void nft_ng_random_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_ng_random *priv = nft_expr_priv(expr);
+ struct rnd_state *state = this_cpu_ptr(&nft_numgen_prandom_state);
+
+ regs->data[priv->dreg] = reciprocal_scale(prandom_u32_state(state),
+ priv->until);
+}
+
+static int nft_ng_random_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_ng_random *priv = nft_expr_priv(expr);
+
+ priv->until = ntohl(nla_get_be32(tb[NFTA_NG_UNTIL]));
+ if (priv->until == 0)
+ return -ERANGE;
+
+ prandom_init_once(&nft_numgen_prandom_state);
+
+ priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]);
+
+ return nft_validate_register_store(ctx, priv->dreg, NULL,
+ NFT_DATA_VALUE, sizeof(u32));
+}
+
+static int nft_ng_random_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_ng_random *priv = nft_expr_priv(expr);
+
+ return nft_ng_dump(skb, priv->dreg, priv->until, NFT_NG_RANDOM);
+}
+
+static struct nft_expr_type nft_ng_type;
+static const struct nft_expr_ops nft_ng_inc_ops = {
+ .type = &nft_ng_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_ng_inc)),
+ .eval = nft_ng_inc_eval,
+ .init = nft_ng_inc_init,
+ .dump = nft_ng_inc_dump,
+};
+
+static const struct nft_expr_ops nft_ng_random_ops = {
+ .type = &nft_ng_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_ng_random)),
+ .eval = nft_ng_random_eval,
+ .init = nft_ng_random_init,
+ .dump = nft_ng_random_dump,
+};
+
+static const struct nft_expr_ops *
+nft_ng_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
+{
+ u32 type;
+
+ if (!tb[NFTA_NG_DREG] ||
+ !tb[NFTA_NG_UNTIL] ||
+ !tb[NFTA_NG_TYPE])
+ return ERR_PTR(-EINVAL);
+
+ type = ntohl(nla_get_be32(tb[NFTA_NG_TYPE]));
+
+ switch (type) {
+ case NFT_NG_INCREMENTAL:
+ return &nft_ng_inc_ops;
+ case NFT_NG_RANDOM:
+ return &nft_ng_random_ops;
+ }
+
+ return ERR_PTR(-EINVAL);
+}
+
+static struct nft_expr_type nft_ng_type __read_mostly = {
+ .name = "numgen",
+ .select_ops = &nft_ng_select_ops,
+ .policy = nft_ng_policy,
+ .maxattr = NFTA_NG_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_ng_module_init(void)
+{
+ return nft_register_expr(&nft_ng_type);
+}
+
+static void __exit nft_ng_module_exit(void)
+{
+ nft_unregister_expr(&nft_ng_type);
+}
+
+module_init(nft_ng_module_init);
+module_exit(nft_ng_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Laura Garcia <nevola@gmail.com>");
+MODULE_ALIAS_NFT_EXPR("numgen");
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
new file mode 100644
index 000000000000..6eafbf987ed9
--- /dev/null
+++ b/net/netfilter/nft_quota.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2016 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/atomic.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_quota {
+ u64 quota;
+ bool invert;
+ atomic64_t remain;
+};
+
+static inline long nft_quota(struct nft_quota *priv,
+ const struct nft_pktinfo *pkt)
+{
+ return atomic64_sub_return(pkt->skb->len, &priv->remain);
+}
+
+static void nft_quota_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_quota *priv = nft_expr_priv(expr);
+
+ if (nft_quota(priv, pkt) < 0 && !priv->invert)
+ regs->verdict.code = NFT_BREAK;
+}
+
+static const struct nla_policy nft_quota_policy[NFTA_QUOTA_MAX + 1] = {
+ [NFTA_QUOTA_BYTES] = { .type = NLA_U64 },
+ [NFTA_QUOTA_FLAGS] = { .type = NLA_U32 },
+};
+
+static int nft_quota_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_quota *priv = nft_expr_priv(expr);
+ u32 flags = 0;
+ u64 quota;
+
+ if (!tb[NFTA_QUOTA_BYTES])
+ return -EINVAL;
+
+ quota = be64_to_cpu(nla_get_be64(tb[NFTA_QUOTA_BYTES]));
+ if (quota > S64_MAX)
+ return -EOVERFLOW;
+
+ if (tb[NFTA_QUOTA_FLAGS]) {
+ flags = ntohl(nla_get_be32(tb[NFTA_QUOTA_FLAGS]));
+ if (flags & ~NFT_QUOTA_F_INV)
+ return -EINVAL;
+ }
+
+ priv->quota = quota;
+ priv->invert = (flags & NFT_QUOTA_F_INV) ? true : false;
+ atomic64_set(&priv->remain, quota);
+
+ return 0;
+}
+
+static int nft_quota_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_quota *priv = nft_expr_priv(expr);
+ u32 flags = priv->invert ? NFT_QUOTA_F_INV : 0;
+
+ if (nla_put_be64(skb, NFTA_QUOTA_BYTES, cpu_to_be64(priv->quota),
+ NFTA_QUOTA_PAD) ||
+ nla_put_be32(skb, NFTA_QUOTA_FLAGS, htonl(flags)))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_quota_type;
+static const struct nft_expr_ops nft_quota_ops = {
+ .type = &nft_quota_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_quota)),
+ .eval = nft_quota_eval,
+ .init = nft_quota_init,
+ .dump = nft_quota_dump,
+};
+
+static struct nft_expr_type nft_quota_type __read_mostly = {
+ .name = "quota",
+ .ops = &nft_quota_ops,
+ .policy = nft_quota_policy,
+ .maxattr = NFTA_QUOTA_MAX,
+ .flags = NFT_EXPR_STATEFUL,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_quota_module_init(void)
+{
+ return nft_register_expr(&nft_quota_type);
+}
+
+static void __exit nft_quota_module_exit(void)
+{
+ nft_unregister_expr(&nft_quota_type);
+}
+
+module_init(nft_quota_module_init);
+module_exit(nft_quota_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NFT_EXPR("quota");
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
new file mode 100644
index 000000000000..3794cb2fc788
--- /dev/null
+++ b/net/netfilter/nft_set_hash.c
@@ -0,0 +1,404 @@
+/*
+ * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/log2.h>
+#include <linux/jhash.h>
+#include <linux/netlink.h>
+#include <linux/workqueue.h>
+#include <linux/rhashtable.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+/* We target a hash table size of 4, element hint is 75% of final size */
+#define NFT_HASH_ELEMENT_HINT 3
+
+struct nft_hash {
+ struct rhashtable ht;
+ struct delayed_work gc_work;
+};
+
+struct nft_hash_elem {
+ struct rhash_head node;
+ struct nft_set_ext ext;
+};
+
+struct nft_hash_cmp_arg {
+ const struct nft_set *set;
+ const u32 *key;
+ u8 genmask;
+};
+
+static const struct rhashtable_params nft_hash_params;
+
+static inline u32 nft_hash_key(const void *data, u32 len, u32 seed)
+{
+ const struct nft_hash_cmp_arg *arg = data;
+
+ return jhash(arg->key, len, seed);
+}
+
+static inline u32 nft_hash_obj(const void *data, u32 len, u32 seed)
+{
+ const struct nft_hash_elem *he = data;
+
+ return jhash(nft_set_ext_key(&he->ext), len, seed);
+}
+
+static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg,
+ const void *ptr)
+{
+ const struct nft_hash_cmp_arg *x = arg->key;
+ const struct nft_hash_elem *he = ptr;
+
+ if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen))
+ return 1;
+ if (nft_set_elem_expired(&he->ext))
+ return 1;
+ if (!nft_set_elem_active(&he->ext, x->genmask))
+ return 1;
+ return 0;
+}
+
+static bool nft_hash_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key, const struct nft_set_ext **ext)
+{
+ struct nft_hash *priv = nft_set_priv(set);
+ const struct nft_hash_elem *he;
+ struct nft_hash_cmp_arg arg = {
+ .genmask = nft_genmask_cur(net),
+ .set = set,
+ .key = key,
+ };
+
+ he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
+ if (he != NULL)
+ *ext = &he->ext;
+
+ return !!he;
+}
+
+static bool nft_hash_update(struct nft_set *set, const u32 *key,
+ void *(*new)(struct nft_set *,
+ const struct nft_expr *,
+ struct nft_regs *regs),
+ const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_set_ext **ext)
+{
+ struct nft_hash *priv = nft_set_priv(set);
+ struct nft_hash_elem *he;
+ struct nft_hash_cmp_arg arg = {
+ .genmask = NFT_GENMASK_ANY,
+ .set = set,
+ .key = key,
+ };
+
+ he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
+ if (he != NULL)
+ goto out;
+
+ he = new(set, expr, regs);
+ if (he == NULL)
+ goto err1;
+ if (rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node,
+ nft_hash_params))
+ goto err2;
+out:
+ *ext = &he->ext;
+ return true;
+
+err2:
+ nft_set_elem_destroy(set, he);
+err1:
+ return false;
+}
+
+static int nft_hash_insert(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem,
+ struct nft_set_ext **ext)
+{
+ struct nft_hash *priv = nft_set_priv(set);
+ struct nft_hash_elem *he = elem->priv;
+ struct nft_hash_cmp_arg arg = {
+ .genmask = nft_genmask_next(net),
+ .set = set,
+ .key = elem->key.val.data,
+ };
+ struct nft_hash_elem *prev;
+
+ prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node,
+ nft_hash_params);
+ if (IS_ERR(prev))
+ return PTR_ERR(prev);
+ if (prev) {
+ *ext = &prev->ext;
+ return -EEXIST;
+ }
+ return 0;
+}
+
+static void nft_hash_activate(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem)
+{
+ struct nft_hash_elem *he = elem->priv;
+
+ nft_set_elem_change_active(net, set, &he->ext);
+ nft_set_elem_clear_busy(&he->ext);
+}
+
+static void *nft_hash_deactivate(const struct net *net,
+ const struct nft_set *set,
+ const struct nft_set_elem *elem)
+{
+ struct nft_hash *priv = nft_set_priv(set);
+ struct nft_hash_elem *he;
+ struct nft_hash_cmp_arg arg = {
+ .genmask = nft_genmask_next(net),
+ .set = set,
+ .key = elem->key.val.data,
+ };
+
+ rcu_read_lock();
+ he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
+ if (he != NULL) {
+ if (!nft_set_elem_mark_busy(&he->ext) ||
+ !nft_is_active(net, &he->ext))
+ nft_set_elem_change_active(net, set, &he->ext);
+ else
+ he = NULL;
+ }
+ rcu_read_unlock();
+
+ return he;
+}
+
+static void nft_hash_remove(const struct nft_set *set,
+ const struct nft_set_elem *elem)
+{
+ struct nft_hash *priv = nft_set_priv(set);
+ struct nft_hash_elem *he = elem->priv;
+
+ rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params);
+}
+
+static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
+ struct nft_set_iter *iter)
+{
+ struct nft_hash *priv = nft_set_priv(set);
+ struct nft_hash_elem *he;
+ struct rhashtable_iter hti;
+ struct nft_set_elem elem;
+ int err;
+
+ err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL);
+ iter->err = err;
+ if (err)
+ return;
+
+ err = rhashtable_walk_start(&hti);
+ if (err && err != -EAGAIN) {
+ iter->err = err;
+ goto out;
+ }
+
+ while ((he = rhashtable_walk_next(&hti))) {
+ if (IS_ERR(he)) {
+ err = PTR_ERR(he);
+ if (err != -EAGAIN) {
+ iter->err = err;
+ goto out;
+ }
+
+ continue;
+ }
+
+ if (iter->count < iter->skip)
+ goto cont;
+ if (nft_set_elem_expired(&he->ext))
+ goto cont;
+ if (!nft_set_elem_active(&he->ext, iter->genmask))
+ goto cont;
+
+ elem.priv = he;
+
+ iter->err = iter->fn(ctx, set, iter, &elem);
+ if (iter->err < 0)
+ goto out;
+
+cont:
+ iter->count++;
+ }
+
+out:
+ rhashtable_walk_stop(&hti);
+ rhashtable_walk_exit(&hti);
+}
+
+static void nft_hash_gc(struct work_struct *work)
+{
+ struct nft_set *set;
+ struct nft_hash_elem *he;
+ struct nft_hash *priv;
+ struct nft_set_gc_batch *gcb = NULL;
+ struct rhashtable_iter hti;
+ int err;
+
+ priv = container_of(work, struct nft_hash, gc_work.work);
+ set = nft_set_container_of(priv);
+
+ err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL);
+ if (err)
+ goto schedule;
+
+ err = rhashtable_walk_start(&hti);
+ if (err && err != -EAGAIN)
+ goto out;
+
+ while ((he = rhashtable_walk_next(&hti))) {
+ if (IS_ERR(he)) {
+ if (PTR_ERR(he) != -EAGAIN)
+ goto out;
+ continue;
+ }
+
+ if (!nft_set_elem_expired(&he->ext))
+ continue;
+ if (nft_set_elem_mark_busy(&he->ext))
+ continue;
+
+ gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
+ if (gcb == NULL)
+ goto out;
+ rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params);
+ atomic_dec(&set->nelems);
+ nft_set_gc_batch_add(gcb, he);
+ }
+out:
+ rhashtable_walk_stop(&hti);
+ rhashtable_walk_exit(&hti);
+
+ nft_set_gc_batch_complete(gcb);
+schedule:
+ queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
+ nft_set_gc_interval(set));
+}
+
+static unsigned int nft_hash_privsize(const struct nlattr * const nla[])
+{
+ return sizeof(struct nft_hash);
+}
+
+static const struct rhashtable_params nft_hash_params = {
+ .head_offset = offsetof(struct nft_hash_elem, node),
+ .hashfn = nft_hash_key,
+ .obj_hashfn = nft_hash_obj,
+ .obj_cmpfn = nft_hash_cmp,
+ .automatic_shrinking = true,
+};
+
+static int nft_hash_init(const struct nft_set *set,
+ const struct nft_set_desc *desc,
+ const struct nlattr * const tb[])
+{
+ struct nft_hash *priv = nft_set_priv(set);
+ struct rhashtable_params params = nft_hash_params;
+ int err;
+
+ params.nelem_hint = desc->size ?: NFT_HASH_ELEMENT_HINT;
+ params.key_len = set->klen;
+
+ err = rhashtable_init(&priv->ht, &params);
+ if (err < 0)
+ return err;
+
+ INIT_DEFERRABLE_WORK(&priv->gc_work, nft_hash_gc);
+ if (set->flags & NFT_SET_TIMEOUT)
+ queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
+ nft_set_gc_interval(set));
+ return 0;
+}
+
+static void nft_hash_elem_destroy(void *ptr, void *arg)
+{
+ nft_set_elem_destroy((const struct nft_set *)arg, ptr);
+}
+
+static void nft_hash_destroy(const struct nft_set *set)
+{
+ struct nft_hash *priv = nft_set_priv(set);
+
+ cancel_delayed_work_sync(&priv->gc_work);
+ rhashtable_free_and_destroy(&priv->ht, nft_hash_elem_destroy,
+ (void *)set);
+}
+
+static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
+ struct nft_set_estimate *est)
+{
+ unsigned int esize;
+
+ esize = sizeof(struct nft_hash_elem);
+ if (desc->size) {
+ est->size = sizeof(struct nft_hash) +
+ roundup_pow_of_two(desc->size * 4 / 3) *
+ sizeof(struct nft_hash_elem *) +
+ desc->size * esize;
+ } else {
+ /* Resizing happens when the load drops below 30% or goes
+ * above 75%. The average of 52.5% load (approximated by 50%)
+ * is used for the size estimation of the hash buckets,
+ * meaning we calculate two buckets per element.
+ */
+ est->size = esize + 2 * sizeof(struct nft_hash_elem *);
+ }
+
+ est->class = NFT_SET_CLASS_O_1;
+
+ return true;
+}
+
+static struct nft_set_ops nft_hash_ops __read_mostly = {
+ .privsize = nft_hash_privsize,
+ .elemsize = offsetof(struct nft_hash_elem, ext),
+ .estimate = nft_hash_estimate,
+ .init = nft_hash_init,
+ .destroy = nft_hash_destroy,
+ .insert = nft_hash_insert,
+ .activate = nft_hash_activate,
+ .deactivate = nft_hash_deactivate,
+ .remove = nft_hash_remove,
+ .lookup = nft_hash_lookup,
+ .update = nft_hash_update,
+ .walk = nft_hash_walk,
+ .features = NFT_SET_MAP | NFT_SET_TIMEOUT,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_hash_module_init(void)
+{
+ return nft_register_set(&nft_hash_ops);
+}
+
+static void __exit nft_hash_module_exit(void)
+{
+ nft_unregister_set(&nft_hash_ops);
+}
+
+module_init(nft_hash_module_init);
+module_exit(nft_hash_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_SET();
diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_set_rbtree.c
index ffe9ae062d23..38b5bda242f8 100644
--- a/net/netfilter/nft_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -96,7 +96,8 @@ out:
}
static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
- struct nft_rbtree_elem *new)
+ struct nft_rbtree_elem *new,
+ struct nft_set_ext **ext)
{
struct nft_rbtree *priv = nft_set_priv(set);
u8 genmask = nft_genmask_next(net);
@@ -124,8 +125,10 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
else if (!nft_rbtree_interval_end(rbe) &&
nft_rbtree_interval_end(new))
p = &parent->rb_right;
- else
+ else {
+ *ext = &rbe->ext;
return -EEXIST;
+ }
}
}
}
@@ -135,13 +138,14 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
}
static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
- const struct nft_set_elem *elem)
+ const struct nft_set_elem *elem,
+ struct nft_set_ext **ext)
{
struct nft_rbtree_elem *rbe = elem->priv;
int err;
spin_lock_bh(&nft_rbtree_lock);
- err = __nft_rbtree_insert(net, set, rbe);
+ err = __nft_rbtree_insert(net, set, rbe, ext);
spin_unlock_bh(&nft_rbtree_lock);
return err;
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 188404b9b002..a3b8f697cfc5 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -233,10 +233,8 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par,
return false;
if (info->match_flags & XT_CONNTRACK_EXPIRES) {
- unsigned long expires = 0;
+ unsigned long expires = nf_ct_expires(ct) / HZ;
- if (timer_pending(&ct->timeout))
- expires = (ct->timeout.expires - jiffies) / HZ;
if ((expires >= info->expires_min &&
expires <= info->expires_max) ^
!(info->invert_flags & XT_CONNTRACK_EXPIRES))
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index e5f18988aee0..bb33598e4530 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -107,8 +107,8 @@ static int physdev_mt_check(const struct xt_mtchk_param *par)
info->invert & XT_PHYSDEV_OP_BRIDGED) &&
par->hook_mask & ((1 << NF_INET_LOCAL_OUT) |
(1 << NF_INET_FORWARD) | (1 << NF_INET_POST_ROUTING))) {
- pr_info("using --physdev-out and --physdev-is-out are only"
- "supported in the FORWARD and POSTROUTING chains with"
+ pr_info("using --physdev-out and --physdev-is-out are only "
+ "supported in the FORWARD and POSTROUTING chains with "
"bridged traffic.\n");
if (par->hook_mask & (1 << NF_INET_LOCAL_OUT))
return -EINVAL;