summaryrefslogtreecommitdiff
path: root/net/netfilter
diff options
context:
space:
mode:
Diffstat (limited to 'net/netfilter')
-rw-r--r--net/netfilter/Kconfig9
-rw-r--r--net/netfilter/Makefile1
-rw-r--r--net/netfilter/core.c351
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c10
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_ftp.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c11
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c10
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_udp.c10
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c46
-rw-r--r--net/netfilter/nf_conntrack_broadcast.c2
-rw-r--r--net/netfilter/nf_conntrack_core.c57
-rw-r--r--net/netfilter/nf_conntrack_expect.c70
-rw-r--r--net/netfilter/nf_conntrack_extend.c2
-rw-r--r--net/netfilter/nf_conntrack_helper.c34
-rw-r--r--net/netfilter/nf_conntrack_l3proto_generic.c7
-rw-r--r--net/netfilter/nf_conntrack_netlink.c106
-rw-r--r--net/netfilter/nf_conntrack_pptp.c2
-rw-r--r--net/netfilter/nf_conntrack_proto.c90
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c20
-rw-r--r--net/netfilter/nf_conntrack_proto_generic.c21
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c16
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c20
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c20
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c18
-rw-r--r--net/netfilter/nf_conntrack_sip.c6
-rw-r--r--net/netfilter/nf_conntrack_standalone.c103
-rw-r--r--net/netfilter/nf_internals.h10
-rw-r--r--net/netfilter/nf_nat_core.c4
-rw-r--r--net/netfilter/nf_nat_redirect.c6
-rw-r--r--net/netfilter/nf_queue.c68
-rw-r--r--net/netfilter/nf_sockopt.c2
-rw-r--r--net/netfilter/nf_tables_api.c515
-rw-r--r--net/netfilter/nf_tables_core.c28
-rw-r--r--net/netfilter/nf_tables_trace.c42
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c22
-rw-r--r--net/netfilter/nfnetlink_log.c2
-rw-r--r--net/netfilter/nfnetlink_queue.c21
-rw-r--r--net/netfilter/nft_counter.c20
-rw-r--r--net/netfilter/nft_ct.c18
-rw-r--r--net/netfilter/nft_exthdr.c213
-rw-r--r--net/netfilter/nft_fib_netdev.c87
-rw-r--r--net/netfilter/nft_limit.c148
-rw-r--r--net/netfilter/nft_objref.c7
-rw-r--r--net/netfilter/nft_payload.c2
-rw-r--r--net/netfilter/nft_quota.c20
-rw-r--r--net/netfilter/nft_rt.c73
-rw-r--r--net/netfilter/nft_set_rbtree.c49
-rw-r--r--net/netfilter/x_tables.c14
-rw-r--r--net/netfilter/xt_CT.c2
-rw-r--r--net/netfilter/xt_NETMAP.c8
-rw-r--r--net/netfilter/xt_TCPMSS.c2
-rw-r--r--net/netfilter/xt_TPROXY.c10
-rw-r--r--net/netfilter/xt_addrtype.c3
-rw-r--r--net/netfilter/xt_connlimit.c26
-rw-r--r--net/netfilter/xt_hashlimit.c285
-rw-r--r--net/netfilter/xt_nat.c20
-rw-r--r--net/netfilter/xt_osf.c2
-rw-r--r--net/netfilter/xt_recent.c2
59 files changed, 1898 insertions, 880 deletions
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 9b28864cc36a..e4a13cc8a2e7 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -636,6 +636,15 @@ config NFT_FWD_NETDEV
help
This option enables packet forwarding for the "netdev" family.
+config NFT_FIB_NETDEV
+ depends on NFT_FIB_IPV4
+ depends on NFT_FIB_IPV6
+ tristate "Netfilter nf_tables netdev fib lookups support"
+ help
+ This option allows using the FIB expression from the netdev table.
+ The lookup will be delegated to the IPv4 or IPv6 FIB depending
+ on the protocol of the packet.
+
endif # NF_TABLES_NETDEV
endif # NF_TABLES
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 913380919301..d3891c93edd6 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -100,6 +100,7 @@ obj-$(CONFIG_NFT_REDIR) += nft_redir.o
obj-$(CONFIG_NFT_HASH) += nft_hash.o
obj-$(CONFIG_NFT_FIB) += nft_fib.o
obj-$(CONFIG_NFT_FIB_INET) += nft_fib_inet.o
+obj-$(CONFIG_NFT_FIB_NETDEV) += nft_fib_netdev.o
# nf_tables netdev
obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 974cf2a3795a..04fe25abc5f6 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -21,7 +21,7 @@
#include <linux/inetdevice.h>
#include <linux/proc_fs.h>
#include <linux/mutex.h>
-#include <linux/slab.h>
+#include <linux/mm.h>
#include <linux/rcupdate.h>
#include <net/net_namespace.h>
#include <net/sock.h>
@@ -62,10 +62,182 @@ EXPORT_SYMBOL(nf_hooks_needed);
#endif
static DEFINE_MUTEX(nf_hook_mutex);
+
+/* max hooks per family/hooknum */
+#define MAX_HOOK_COUNT 1024
+
#define nf_entry_dereference(e) \
rcu_dereference_protected(e, lockdep_is_held(&nf_hook_mutex))
-static struct nf_hook_entry __rcu **nf_hook_entry_head(struct net *net, const struct nf_hook_ops *reg)
+static struct nf_hook_entries *allocate_hook_entries_size(u16 num)
+{
+ struct nf_hook_entries *e;
+ size_t alloc = sizeof(*e) +
+ sizeof(struct nf_hook_entry) * num +
+ sizeof(struct nf_hook_ops *) * num;
+
+ if (num == 0)
+ return NULL;
+
+ e = kvzalloc(alloc, GFP_KERNEL);
+ if (e)
+ e->num_hook_entries = num;
+ return e;
+}
+
+static unsigned int accept_all(void *priv,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ return NF_ACCEPT; /* ACCEPT makes nf_hook_slow call next hook */
+}
+
+static const struct nf_hook_ops dummy_ops = {
+ .hook = accept_all,
+ .priority = INT_MIN,
+};
+
+static struct nf_hook_entries *
+nf_hook_entries_grow(const struct nf_hook_entries *old,
+ const struct nf_hook_ops *reg)
+{
+ unsigned int i, alloc_entries, nhooks, old_entries;
+ struct nf_hook_ops **orig_ops = NULL;
+ struct nf_hook_ops **new_ops;
+ struct nf_hook_entries *new;
+ bool inserted = false;
+
+ alloc_entries = 1;
+ old_entries = old ? old->num_hook_entries : 0;
+
+ if (old) {
+ orig_ops = nf_hook_entries_get_hook_ops(old);
+
+ for (i = 0; i < old_entries; i++) {
+ if (orig_ops[i] != &dummy_ops)
+ alloc_entries++;
+ }
+ }
+
+ if (alloc_entries > MAX_HOOK_COUNT)
+ return ERR_PTR(-E2BIG);
+
+ new = allocate_hook_entries_size(alloc_entries);
+ if (!new)
+ return ERR_PTR(-ENOMEM);
+
+ new_ops = nf_hook_entries_get_hook_ops(new);
+
+ i = 0;
+ nhooks = 0;
+ while (i < old_entries) {
+ if (orig_ops[i] == &dummy_ops) {
+ ++i;
+ continue;
+ }
+ if (inserted || reg->priority > orig_ops[i]->priority) {
+ new_ops[nhooks] = (void *)orig_ops[i];
+ new->hooks[nhooks] = old->hooks[i];
+ i++;
+ } else {
+ new_ops[nhooks] = (void *)reg;
+ new->hooks[nhooks].hook = reg->hook;
+ new->hooks[nhooks].priv = reg->priv;
+ inserted = true;
+ }
+ nhooks++;
+ }
+
+ if (!inserted) {
+ new_ops[nhooks] = (void *)reg;
+ new->hooks[nhooks].hook = reg->hook;
+ new->hooks[nhooks].priv = reg->priv;
+ }
+
+ return new;
+}
+
+static void hooks_validate(const struct nf_hook_entries *hooks)
+{
+#ifdef CONFIG_DEBUG_KERNEL
+ struct nf_hook_ops **orig_ops;
+ int prio = INT_MIN;
+ size_t i = 0;
+
+ orig_ops = nf_hook_entries_get_hook_ops(hooks);
+
+ for (i = 0; i < hooks->num_hook_entries; i++) {
+ if (orig_ops[i] == &dummy_ops)
+ continue;
+
+ WARN_ON(orig_ops[i]->priority < prio);
+
+ if (orig_ops[i]->priority > prio)
+ prio = orig_ops[i]->priority;
+ }
+#endif
+}
+
+/*
+ * __nf_hook_entries_try_shrink - try to shrink hook array
+ *
+ * @pp -- location of hook blob
+ *
+ * Hook unregistration must always succeed, so to-be-removed hooks
+ * are replaced by a dummy one that will just move to next hook.
+ *
+ * This counts the current dummy hooks, attempts to allocate new blob,
+ * copies the live hooks, then replaces and discards old one.
+ *
+ * return values:
+ *
+ * Returns address to free, or NULL.
+ */
+static void *__nf_hook_entries_try_shrink(struct nf_hook_entries __rcu **pp)
+{
+ struct nf_hook_entries *old, *new = NULL;
+ unsigned int i, j, skip = 0, hook_entries;
+ struct nf_hook_ops **orig_ops;
+ struct nf_hook_ops **new_ops;
+
+ old = nf_entry_dereference(*pp);
+ if (WARN_ON_ONCE(!old))
+ return NULL;
+
+ orig_ops = nf_hook_entries_get_hook_ops(old);
+ for (i = 0; i < old->num_hook_entries; i++) {
+ if (orig_ops[i] == &dummy_ops)
+ skip++;
+ }
+
+ /* if skip == hook_entries all hooks have been removed */
+ hook_entries = old->num_hook_entries;
+ if (skip == hook_entries)
+ goto out_assign;
+
+ if (WARN_ON(skip == 0))
+ return NULL;
+
+ hook_entries -= skip;
+ new = allocate_hook_entries_size(hook_entries);
+ if (!new)
+ return NULL;
+
+ new_ops = nf_hook_entries_get_hook_ops(new);
+ for (i = 0, j = 0; i < old->num_hook_entries; i++) {
+ if (orig_ops[i] == &dummy_ops)
+ continue;
+ new->hooks[j] = old->hooks[i];
+ new_ops[j] = (void *)orig_ops[i];
+ j++;
+ }
+ hooks_validate(new);
+out_assign:
+ rcu_assign_pointer(*pp, new);
+ return old;
+}
+
+static struct nf_hook_entries __rcu **nf_hook_entry_head(struct net *net, const struct nf_hook_ops *reg)
{
if (reg->pf != NFPROTO_NETDEV)
return net->nf.hooks[reg->pf]+reg->hooknum;
@@ -76,13 +248,14 @@ static struct nf_hook_entry __rcu **nf_hook_entry_head(struct net *net, const st
return &reg->dev->nf_hooks_ingress;
}
#endif
+ WARN_ON_ONCE(1);
return NULL;
}
int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
{
- struct nf_hook_entry __rcu **pp;
- struct nf_hook_entry *entry, *p;
+ struct nf_hook_entries *p, *new_hooks;
+ struct nf_hook_entries __rcu **pp;
if (reg->pf == NFPROTO_NETDEV) {
#ifndef CONFIG_NETFILTER_INGRESS
@@ -98,23 +271,19 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
if (!pp)
return -EINVAL;
- entry = kmalloc(sizeof(*entry), GFP_KERNEL);
- if (!entry)
- return -ENOMEM;
-
- nf_hook_entry_init(entry, reg);
-
mutex_lock(&nf_hook_mutex);
- /* Find the spot in the list */
- for (; (p = nf_entry_dereference(*pp)) != NULL; pp = &p->next) {
- if (reg->priority < nf_hook_entry_priority(p))
- break;
- }
- rcu_assign_pointer(entry->next, p);
- rcu_assign_pointer(*pp, entry);
+ p = nf_entry_dereference(*pp);
+ new_hooks = nf_hook_entries_grow(p, reg);
+
+ if (!IS_ERR(new_hooks))
+ rcu_assign_pointer(*pp, new_hooks);
mutex_unlock(&nf_hook_mutex);
+ if (IS_ERR(new_hooks))
+ return PTR_ERR(new_hooks);
+
+ hooks_validate(new_hooks);
#ifdef CONFIG_NETFILTER_INGRESS
if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
net_inc_ingress_queue();
@@ -122,48 +291,74 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
#ifdef HAVE_JUMP_LABEL
static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]);
#endif
+ synchronize_net();
+ BUG_ON(p == new_hooks);
+ kvfree(p);
return 0;
}
EXPORT_SYMBOL(nf_register_net_hook);
-static struct nf_hook_entry *
-__nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
+/*
+ * __nf_unregister_net_hook - remove a hook from blob
+ *
+ * @oldp: current address of hook blob
+ * @unreg: hook to unregister
+ *
+ * This cannot fail, hook unregistration must always succeed.
+ * Therefore replace the to-be-removed hook with a dummy hook.
+ */
+static void __nf_unregister_net_hook(struct nf_hook_entries *old,
+ const struct nf_hook_ops *unreg)
{
- struct nf_hook_entry __rcu **pp;
- struct nf_hook_entry *p;
-
- pp = nf_hook_entry_head(net, reg);
- if (WARN_ON_ONCE(!pp))
- return NULL;
+ struct nf_hook_ops **orig_ops;
+ bool found = false;
+ unsigned int i;
- mutex_lock(&nf_hook_mutex);
- for (; (p = nf_entry_dereference(*pp)) != NULL; pp = &p->next) {
- if (nf_hook_entry_ops(p) == reg) {
- rcu_assign_pointer(*pp, p->next);
- break;
- }
- }
- mutex_unlock(&nf_hook_mutex);
- if (!p) {
- WARN(1, "nf_unregister_net_hook: hook not found!\n");
- return NULL;
+ orig_ops = nf_hook_entries_get_hook_ops(old);
+ for (i = 0; i < old->num_hook_entries; i++) {
+ if (orig_ops[i] != unreg)
+ continue;
+ WRITE_ONCE(old->hooks[i].hook, accept_all);
+ WRITE_ONCE(orig_ops[i], &dummy_ops);
+ found = true;
+ break;
}
+
+ if (found) {
#ifdef CONFIG_NETFILTER_INGRESS
- if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
- net_dec_ingress_queue();
+ if (unreg->pf == NFPROTO_NETDEV && unreg->hooknum == NF_NETDEV_INGRESS)
+ net_dec_ingress_queue();
#endif
#ifdef HAVE_JUMP_LABEL
- static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
+ static_key_slow_dec(&nf_hooks_needed[unreg->pf][unreg->hooknum]);
#endif
-
- return p;
+ } else {
+ WARN_ONCE(1, "hook not found, pf %d num %d", unreg->pf, unreg->hooknum);
+ }
}
void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
{
- struct nf_hook_entry *p = __nf_unregister_net_hook(net, reg);
+ struct nf_hook_entries __rcu **pp;
+ struct nf_hook_entries *p;
unsigned int nfq;
+ pp = nf_hook_entry_head(net, reg);
+ if (!pp)
+ return;
+
+ mutex_lock(&nf_hook_mutex);
+
+ p = nf_entry_dereference(*pp);
+ if (WARN_ON_ONCE(!p)) {
+ mutex_unlock(&nf_hook_mutex);
+ return;
+ }
+
+ __nf_unregister_net_hook(p, reg);
+
+ p = __nf_hook_entries_try_shrink(pp);
+ mutex_unlock(&nf_hook_mutex);
if (!p)
return;
@@ -173,7 +368,7 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
nfq = nf_queue_nf_hook_drop(net);
if (nfq)
synchronize_net();
- kfree(p);
+ kvfree(p);
}
EXPORT_SYMBOL(nf_unregister_net_hook);
@@ -200,26 +395,59 @@ EXPORT_SYMBOL(nf_register_net_hooks);
void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg,
unsigned int hookcount)
{
- struct nf_hook_entry *to_free[16];
- unsigned int i, n, nfq;
+ struct nf_hook_entries *to_free[16], *p;
+ struct nf_hook_entries __rcu **pp;
+ unsigned int i, j, n;
+
+ mutex_lock(&nf_hook_mutex);
+ for (i = 0; i < hookcount; i++) {
+ pp = nf_hook_entry_head(net, &reg[i]);
+ if (!pp)
+ continue;
+
+ p = nf_entry_dereference(*pp);
+ if (WARN_ON_ONCE(!p))
+ continue;
+ __nf_unregister_net_hook(p, &reg[i]);
+ }
+ mutex_unlock(&nf_hook_mutex);
do {
n = min_t(unsigned int, hookcount, ARRAY_SIZE(to_free));
- for (i = 0; i < n; i++)
- to_free[i] = __nf_unregister_net_hook(net, &reg[i]);
+ mutex_lock(&nf_hook_mutex);
- synchronize_net();
+ for (i = 0, j = 0; i < hookcount && j < n; i++) {
+ pp = nf_hook_entry_head(net, &reg[i]);
+ if (!pp)
+ continue;
+
+ p = nf_entry_dereference(*pp);
+ if (!p)
+ continue;
+
+ to_free[j] = __nf_hook_entries_try_shrink(pp);
+ if (to_free[j])
+ ++j;
+ }
+
+ mutex_unlock(&nf_hook_mutex);
+
+ if (j) {
+ unsigned int nfq;
- /* need 2nd synchronize_net() if nfqueue is used, skb
- * can get reinjected right before nf_queue_hook_drop()
- */
- nfq = nf_queue_nf_hook_drop(net);
- if (nfq)
synchronize_net();
- for (i = 0; i < n; i++)
- kfree(to_free[i]);
+ /* need 2nd synchronize_net() if nfqueue is used, skb
+ * can get reinjected right before nf_queue_hook_drop()
+ */
+ nfq = nf_queue_nf_hook_drop(net);
+ if (nfq)
+ synchronize_net();
+
+ for (i = 0; i < j; i++)
+ kvfree(to_free[i]);
+ }
reg += n;
hookcount -= n;
@@ -230,16 +458,15 @@ EXPORT_SYMBOL(nf_unregister_net_hooks);
/* Returns 1 if okfn() needs to be executed by the caller,
* -EPERM for NF_DROP, 0 otherwise. Caller must hold rcu_read_lock. */
int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
- struct nf_hook_entry *entry)
+ const struct nf_hook_entries *e, unsigned int s)
{
unsigned int verdict;
int ret;
- do {
- verdict = nf_hook_entry_hookfn(entry, skb, state);
+ for (; s < e->num_hook_entries; s++) {
+ verdict = nf_hook_entry_hookfn(&e->hooks[s], skb, state);
switch (verdict & NF_VERDICT_MASK) {
case NF_ACCEPT:
- entry = rcu_dereference(entry->next);
break;
case NF_DROP:
kfree_skb(skb);
@@ -248,8 +475,8 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
ret = -EPERM;
return ret;
case NF_QUEUE:
- ret = nf_queue(skb, state, &entry, verdict);
- if (ret == 1 && entry)
+ ret = nf_queue(skb, state, e, s, verdict);
+ if (ret == 1)
continue;
return ret;
default:
@@ -258,7 +485,7 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
*/
return 0;
}
- } while (entry);
+ }
return 1;
}
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index e31956b58aba..5cb7cac9177d 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -125,14 +125,12 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
s->cnt.inbytes += skb->len;
u64_stats_update_end(&s->syncp);
- rcu_read_lock();
svc = rcu_dereference(dest->svc);
s = this_cpu_ptr(svc->stats.cpustats);
u64_stats_update_begin(&s->syncp);
s->cnt.inpkts++;
s->cnt.inbytes += skb->len;
u64_stats_update_end(&s->syncp);
- rcu_read_unlock();
s = this_cpu_ptr(ipvs->tot_stats.cpustats);
u64_stats_update_begin(&s->syncp);
@@ -159,14 +157,12 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
s->cnt.outbytes += skb->len;
u64_stats_update_end(&s->syncp);
- rcu_read_lock();
svc = rcu_dereference(dest->svc);
s = this_cpu_ptr(svc->stats.cpustats);
u64_stats_update_begin(&s->syncp);
s->cnt.outpkts++;
s->cnt.outbytes += skb->len;
u64_stats_update_end(&s->syncp);
- rcu_read_unlock();
s = this_cpu_ptr(ipvs->tot_stats.cpustats);
u64_stats_update_begin(&s->syncp);
@@ -1222,7 +1218,6 @@ static struct ip_vs_conn *__ip_vs_rs_conn_out(unsigned int hooknum,
if (!pptr)
return NULL;
- rcu_read_lock();
dest = ip_vs_find_real_service(ipvs, af, iph->protocol,
&iph->saddr, pptr[0]);
if (dest) {
@@ -1237,7 +1232,6 @@ static struct ip_vs_conn *__ip_vs_rs_conn_out(unsigned int hooknum,
pptr[0], pptr[1]);
}
}
- rcu_read_unlock();
return cp;
}
@@ -1689,11 +1683,9 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related,
if (dest) {
struct ip_vs_dest_dst *dest_dst;
- rcu_read_lock();
dest_dst = rcu_dereference(dest->dest_dst);
if (dest_dst)
mtu = dst_mtu(dest_dst->dst_cache);
- rcu_read_unlock();
}
if (mtu > 68 + sizeof(struct iphdr))
mtu -= sizeof(struct iphdr);
@@ -2109,7 +2101,7 @@ ip_vs_forward_icmp_v6(void *priv, struct sk_buff *skb,
#endif
-static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
+static const struct nf_hook_ops ip_vs_ops[] = {
/* After packet filtering, change source only for VS/NAT */
{
.hook = ip_vs_reply4,
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 1fa3c2307b6e..4f940d7eb2f7 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -550,18 +550,15 @@ bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol,
/* Check for "full" addressed entries */
hash = ip_vs_rs_hashkey(af, daddr, dport);
- rcu_read_lock();
hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
if (dest->port == dport &&
dest->af == af &&
ip_vs_addr_equal(af, &dest->addr, daddr) &&
(dest->protocol == protocol || dest->vfwmark)) {
/* HIT */
- rcu_read_unlock();
return true;
}
}
- rcu_read_unlock();
return false;
}
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index fb780be76d15..3e17d32b629d 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -269,13 +269,11 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
* hopefully it will succeed on the retransmitted
* packet.
*/
- rcu_read_lock();
mangled = nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
iph->ihl * 4,
start - data,
end - start,
buf, buf_len);
- rcu_read_unlock();
if (mangled) {
ip_vs_nfct_expect_related(skb, ct, n_cp,
IPPROTO_TCP, 0, 0);
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 3ffad4adaddf..e1efa446b305 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -38,7 +38,6 @@ sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
return 0;
}
- rcu_read_lock();
if (likely(!ip_vs_iph_inverse(iph)))
svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
&iph->daddr, ports[1]);
@@ -53,7 +52,6 @@ sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
* It seems that we are very loaded.
* We have to drop this packet :(
*/
- rcu_read_unlock();
*verdict = NF_DROP;
return 0;
}
@@ -67,11 +65,9 @@ sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
*verdict = ip_vs_leave(svc, skb, pd, iph);
else
*verdict = NF_DROP;
- rcu_read_unlock();
return 0;
}
}
- rcu_read_unlock();
/* NF_ACCEPT */
return 1;
}
@@ -526,12 +522,10 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)
/* Lookup application incarnations and bind the right one */
hash = sctp_app_hashkey(cp->vport);
- rcu_read_lock();
list_for_each_entry_rcu(inc, &ipvs->sctp_apps[hash], p_list) {
if (inc->port == cp->vport) {
if (unlikely(!ip_vs_app_inc_get(inc)))
break;
- rcu_read_unlock();
IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
"%s:%u to app %s on port %u\n",
@@ -544,11 +538,10 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)
cp->app = inc;
if (inc->init_conn)
result = inc->init_conn(inc, cp);
- goto out;
+ break;
}
}
- rcu_read_unlock();
-out:
+
return result;
}
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 12dc8d5bc37d..121a321b91be 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -63,7 +63,6 @@ tcp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
}
/* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
- rcu_read_lock();
if (likely(!ip_vs_iph_inverse(iph)))
svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
@@ -80,7 +79,6 @@ tcp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
* It seems that we are very loaded.
* We have to drop this packet :(
*/
- rcu_read_unlock();
*verdict = NF_DROP;
return 0;
}
@@ -95,11 +93,9 @@ tcp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
*verdict = ip_vs_leave(svc, skb, pd, iph);
else
*verdict = NF_DROP;
- rcu_read_unlock();
return 0;
}
}
- rcu_read_unlock();
/* NF_ACCEPT */
return 1;
}
@@ -661,12 +657,10 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
/* Lookup application incarnations and bind the right one */
hash = tcp_app_hashkey(cp->vport);
- rcu_read_lock();
list_for_each_entry_rcu(inc, &ipvs->tcp_apps[hash], p_list) {
if (inc->port == cp->vport) {
if (unlikely(!ip_vs_app_inc_get(inc)))
break;
- rcu_read_unlock();
IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
"%s:%u to app %s on port %u\n",
@@ -680,12 +674,10 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
cp->app = inc;
if (inc->init_conn)
result = inc->init_conn(inc, cp);
- goto out;
+ break;
}
}
- rcu_read_unlock();
- out:
return result;
}
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index e494e9a88c7f..30e11cd6aa8a 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -53,7 +53,6 @@ udp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
return 0;
}
- rcu_read_lock();
if (likely(!ip_vs_iph_inverse(iph)))
svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
&iph->daddr, ports[1]);
@@ -69,7 +68,6 @@ udp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
* It seems that we are very loaded.
* We have to drop this packet :(
*/
- rcu_read_unlock();
*verdict = NF_DROP;
return 0;
}
@@ -84,11 +82,9 @@ udp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
*verdict = ip_vs_leave(svc, skb, pd, iph);
else
*verdict = NF_DROP;
- rcu_read_unlock();
return 0;
}
}
- rcu_read_unlock();
/* NF_ACCEPT */
return 1;
}
@@ -410,12 +406,10 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
/* Lookup application incarnations and bind the right one */
hash = udp_app_hashkey(cp->vport);
- rcu_read_lock();
list_for_each_entry_rcu(inc, &ipvs->udp_apps[hash], p_list) {
if (inc->port == cp->vport) {
if (unlikely(!ip_vs_app_inc_get(inc)))
break;
- rcu_read_unlock();
IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
"%s:%u to app %s on port %u\n",
@@ -429,12 +423,10 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
cp->app = inc;
if (inc->init_conn)
result = inc->init_conn(inc, cp);
- goto out;
+ break;
}
}
- rcu_read_unlock();
- out:
return result;
}
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 2eab1e0400f4..90d396814798 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -678,7 +678,6 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
- rcu_read_lock();
if (__ip_vs_get_out_rt(cp->ipvs, cp->af, skb, NULL, iph->daddr,
IP_VS_RT_MODE_NON_LOCAL, NULL, ipvsh) < 0)
goto tx_error;
@@ -689,14 +688,12 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
skb->ignore_df = 1;
ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
tx_error:
kfree_skb(skb);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
}
@@ -710,7 +707,6 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
- rcu_read_lock();
if (__ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, NULL,
&iph->daddr, NULL,
ipvsh, 0, IP_VS_RT_MODE_NON_LOCAL) < 0)
@@ -720,14 +716,12 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
skb->ignore_df = 1;
ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
tx_error:
kfree_skb(skb);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
}
@@ -746,7 +740,6 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
- rcu_read_lock();
/* check if it is a connection of no-client-port */
if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
__be16 _pt, *p;
@@ -815,14 +808,12 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
skb->ignore_df = 1;
rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
- rcu_read_unlock();
LeaveFunction(10);
return rc;
tx_error:
kfree_skb(skb);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
}
@@ -837,7 +828,6 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
- rcu_read_lock();
/* check if it is a connection of no-client-port */
if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !ipvsh->fragoffs)) {
__be16 _pt, *p;
@@ -906,7 +896,6 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
skb->ignore_df = 1;
rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
- rcu_read_unlock();
LeaveFunction(10);
return rc;
@@ -914,7 +903,6 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
tx_error:
LeaveFunction(10);
kfree_skb(skb);
- rcu_read_unlock();
return NF_STOLEN;
}
#endif
@@ -1035,7 +1023,6 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
- rcu_read_lock();
local = __ip_vs_get_out_rt(ipvs, cp->af, skb, cp->dest, cp->daddr.ip,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
@@ -1043,10 +1030,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_RT_MODE_TUNNEL, &saddr, ipvsh);
if (local < 0)
goto tx_error;
- if (local) {
- rcu_read_unlock();
+ if (local)
return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
- }
rt = skb_rtable(skb);
tdev = rt->dst.dev;
@@ -1095,7 +1080,6 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
ip_local_out(net, skb->sk, skb);
else if (ret == NF_DROP)
kfree_skb(skb);
- rcu_read_unlock();
LeaveFunction(10);
@@ -1104,7 +1088,6 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
tx_error:
if (!IS_ERR(skb))
kfree_skb(skb);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
}
@@ -1127,7 +1110,6 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
- rcu_read_lock();
local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
&cp->daddr.in6,
&saddr, ipvsh, 1,
@@ -1136,10 +1118,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_RT_MODE_TUNNEL);
if (local < 0)
goto tx_error;
- if (local) {
- rcu_read_unlock();
+ if (local)
return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1);
- }
rt = (struct rt6_info *) skb_dst(skb);
tdev = rt->dst.dev;
@@ -1185,7 +1165,6 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
ip6_local_out(cp->ipvs->net, skb->sk, skb);
else if (ret == NF_DROP)
kfree_skb(skb);
- rcu_read_unlock();
LeaveFunction(10);
@@ -1194,7 +1173,6 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
tx_error:
if (!IS_ERR(skb))
kfree_skb(skb);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
}
@@ -1213,17 +1191,14 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
- rcu_read_lock();
local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_KNOWN_NH, NULL, ipvsh);
if (local < 0)
goto tx_error;
- if (local) {
- rcu_read_unlock();
+ if (local)
return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
- }
ip_send_check(ip_hdr(skb));
@@ -1231,14 +1206,12 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
skb->ignore_df = 1;
ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
tx_error:
kfree_skb(skb);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
}
@@ -1252,7 +1225,6 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
- rcu_read_lock();
local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
&cp->daddr.in6,
NULL, ipvsh, 0,
@@ -1261,23 +1233,19 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_RT_MODE_KNOWN_NH);
if (local < 0)
goto tx_error;
- if (local) {
- rcu_read_unlock();
+ if (local)
return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1);
- }
/* Another hack: avoid icmp_send in ip_fragment */
skb->ignore_df = 1;
ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
tx_error:
kfree_skb(skb);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
}
@@ -1322,7 +1290,6 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
rt_mode = (hooknum != NF_INET_FORWARD) ?
IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
- rcu_read_lock();
local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip, rt_mode,
NULL, iph);
if (local < 0)
@@ -1368,12 +1335,10 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
skb->ignore_df = 1;
rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
- rcu_read_unlock();
goto out;
tx_error:
kfree_skb(skb);
- rcu_read_unlock();
rc = NF_STOLEN;
out:
LeaveFunction(10);
@@ -1414,7 +1379,6 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
rt_mode = (hooknum != NF_INET_FORWARD) ?
IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
- rcu_read_lock();
local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
&cp->daddr.in6, NULL, ipvsh, 0, rt_mode);
if (local < 0)
@@ -1460,12 +1424,10 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
skb->ignore_df = 1;
rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
- rcu_read_unlock();
goto out;
tx_error:
kfree_skb(skb);
- rcu_read_unlock();
rc = NF_STOLEN;
out:
LeaveFunction(10);
diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c
index 4e99cca61612..ecc3ab784633 100644
--- a/net/netfilter/nf_conntrack_broadcast.c
+++ b/net/netfilter/nf_conntrack_broadcast.c
@@ -40,7 +40,6 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb,
if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
goto out;
- rcu_read_lock();
in_dev = __in_dev_get_rcu(rt->dst.dev);
if (in_dev != NULL) {
for_primary_ifa(in_dev) {
@@ -50,7 +49,6 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb,
}
} endfor_ifa(in_dev);
}
- rcu_read_unlock();
if (mask == 0)
goto out;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 51390febd5e3..01130392b7c0 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -56,6 +56,8 @@
#include <net/netfilter/nf_nat_helper.h>
#include <net/netns/hash.h>
+#include "nf_internals.h"
+
#define NF_CONNTRACK_VERSION "0.5.0"
int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct,
@@ -254,8 +256,8 @@ bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
u_int16_t l3num,
struct net *net, struct nf_conntrack_tuple *tuple)
{
- struct nf_conntrack_l3proto *l3proto;
- struct nf_conntrack_l4proto *l4proto;
+ const struct nf_conntrack_l3proto *l3proto;
+ const struct nf_conntrack_l4proto *l4proto;
unsigned int protoff;
u_int8_t protonum;
int ret;
@@ -404,22 +406,19 @@ static void
destroy_conntrack(struct nf_conntrack *nfct)
{
struct nf_conn *ct = (struct nf_conn *)nfct;
- struct nf_conntrack_l4proto *l4proto;
+ const struct nf_conntrack_l4proto *l4proto;
pr_debug("destroy_conntrack(%p)\n", ct);
- NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
+ WARN_ON(atomic_read(&nfct->use) != 0);
if (unlikely(nf_ct_is_template(ct))) {
nf_ct_tmpl_free(ct);
return;
}
- rcu_read_lock();
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
if (l4proto->destroy)
l4proto->destroy(ct);
- rcu_read_unlock();
-
local_bh_disable();
/* Expectations will have been removed in clean_from_lists,
* except TFTP can create an expectation on the first packet,
@@ -701,7 +700,7 @@ static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
{
/* This is the conntrack entry already in hashes that won race. */
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
- struct nf_conntrack_l4proto *l4proto;
+ const struct nf_conntrack_l4proto *l4proto;
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
if (l4proto->allow_clash &&
@@ -763,12 +762,11 @@ __nf_conntrack_confirm(struct sk_buff *skb)
* connections for unconfirmed conns. But packet copies and
* REJECT will give spurious warnings here.
*/
- /* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
/* No external references means no one else could have
* confirmed us.
*/
- NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
+ WARN_ON(nf_ct_is_confirmed(ct));
pr_debug("Confirming conntrack %p\n", ct);
/* We have to check the DYING flag after unlink to prevent
* a race against nf_ct_get_next_corpse() possibly called from
@@ -1090,7 +1088,7 @@ static void gc_worker(struct work_struct *work)
static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
{
- INIT_DELAYED_WORK(&gc_work->dwork, gc_worker);
+ INIT_DEFERRABLE_WORK(&gc_work->dwork, gc_worker);
gc_work->next_gc_run = HZ;
gc_work->exiting = false;
}
@@ -1167,7 +1165,7 @@ void nf_conntrack_free(struct nf_conn *ct)
/* A freed object has refcnt == 0, that's
* the golden rule for SLAB_TYPESAFE_BY_RCU
*/
- NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 0);
+ WARN_ON(atomic_read(&ct->ct_general.use) != 0);
nf_ct_ext_destroy(ct);
nf_ct_ext_free(ct);
@@ -1183,8 +1181,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_free);
static noinline struct nf_conntrack_tuple_hash *
init_conntrack(struct net *net, struct nf_conn *tmpl,
const struct nf_conntrack_tuple *tuple,
- struct nf_conntrack_l3proto *l3proto,
- struct nf_conntrack_l4proto *l4proto,
+ const struct nf_conntrack_l3proto *l3proto,
+ const struct nf_conntrack_l4proto *l4proto,
struct sk_buff *skb,
unsigned int dataoff, u32 hash)
{
@@ -1295,8 +1293,8 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
unsigned int dataoff,
u_int16_t l3num,
u_int8_t protonum,
- struct nf_conntrack_l3proto *l3proto,
- struct nf_conntrack_l4proto *l4proto)
+ const struct nf_conntrack_l3proto *l3proto,
+ const struct nf_conntrack_l4proto *l4proto)
{
const struct nf_conntrack_zone *zone;
struct nf_conntrack_tuple tuple;
@@ -1351,10 +1349,10 @@ unsigned int
nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
struct sk_buff *skb)
{
+ const struct nf_conntrack_l3proto *l3proto;
+ const struct nf_conntrack_l4proto *l4proto;
struct nf_conn *ct, *tmpl;
enum ip_conntrack_info ctinfo;
- struct nf_conntrack_l3proto *l3proto;
- struct nf_conntrack_l4proto *l4proto;
unsigned int *timeouts;
unsigned int dataoff;
u_int8_t protonum;
@@ -1421,7 +1419,7 @@ repeat:
/* Decide what timeout policy we want to apply to this flow. */
timeouts = nf_ct_timeout_lookup(net, ct, l4proto);
- ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum, timeouts);
+ ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, timeouts);
if (ret <= 0) {
/* Invalid: inverse of the return code tells
* the netfilter core what to do */
@@ -1475,7 +1473,7 @@ void nf_conntrack_alter_reply(struct nf_conn *ct,
struct nf_conn_help *help = nfct_help(ct);
/* Should be unconfirmed, so not in hash table yet */
- NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
+ WARN_ON(nf_ct_is_confirmed(ct));
pr_debug("Altering reply tuple of %p to ", ct);
nf_ct_dump_tuple(newreply);
@@ -1497,7 +1495,7 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
unsigned long extra_jiffies,
int do_acct)
{
- NF_CT_ASSERT(skb);
+ WARN_ON(!skb);
/* Only update if this is not a fixed timeout */
if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status))
@@ -1695,6 +1693,18 @@ __nf_ct_unconfirmed_destroy(struct net *net)
}
}
+void nf_ct_unconfirmed_destroy(struct net *net)
+{
+ might_sleep();
+
+ if (atomic_read(&net->ct.count) > 0) {
+ __nf_ct_unconfirmed_destroy(net);
+ nf_queue_nf_hook_drop(net);
+ synchronize_net();
+ }
+}
+EXPORT_SYMBOL_GPL(nf_ct_unconfirmed_destroy);
+
void nf_ct_iterate_cleanup_net(struct net *net,
int (*iter)(struct nf_conn *i, void *data),
void *data, u32 portid, int report)
@@ -1706,14 +1716,10 @@ void nf_ct_iterate_cleanup_net(struct net *net,
if (atomic_read(&net->ct.count) == 0)
return;
- __nf_ct_unconfirmed_destroy(net);
-
d.iter = iter;
d.data = data;
d.net = net;
- synchronize_net();
-
nf_ct_iterate_cleanup(iter_net_only, &d, portid, report);
}
EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup_net);
@@ -1739,6 +1745,7 @@ nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data)
if (atomic_read(&net->ct.count) == 0)
continue;
__nf_ct_unconfirmed_destroy(net);
+ nf_queue_nf_hook_drop(net);
}
rtnl_unlock();
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 899c2c36da13..64778f9a8548 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -51,8 +51,8 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
struct nf_conn_help *master_help = nfct_help(exp->master);
struct net *net = nf_ct_exp_net(exp);
- NF_CT_ASSERT(master_help);
- NF_CT_ASSERT(!timer_pending(&exp->timeout));
+ WARN_ON(!master_help);
+ WARN_ON(timer_pending(&exp->timeout));
hlist_del_rcu(&exp->hnode);
net->ct.expect_count--;
@@ -368,12 +368,6 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
/* two references : one for hash insert, one for the timer */
refcount_add(2, &exp->use);
- hlist_add_head_rcu(&exp->lnode, &master_help->expectations);
- master_help->expecting[exp->class]++;
-
- hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]);
- net->ct.expect_count++;
-
setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
(unsigned long)exp);
helper = rcu_dereference_protected(master_help->helper,
@@ -384,6 +378,12 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
}
add_timer(&exp->timeout);
+ hlist_add_head_rcu(&exp->lnode, &master_help->expectations);
+ master_help->expecting[exp->class]++;
+
+ hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]);
+ net->ct.expect_count++;
+
NF_CT_STAT_INC(net, expect_create);
}
@@ -474,6 +474,60 @@ out:
}
EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
+void nf_ct_expect_iterate_destroy(bool (*iter)(struct nf_conntrack_expect *e, void *data),
+ void *data)
+{
+ struct nf_conntrack_expect *exp;
+ const struct hlist_node *next;
+ unsigned int i;
+
+ spin_lock_bh(&nf_conntrack_expect_lock);
+
+ for (i = 0; i < nf_ct_expect_hsize; i++) {
+ hlist_for_each_entry_safe(exp, next,
+ &nf_ct_expect_hash[i],
+ hnode) {
+ if (iter(exp, data) && del_timer(&exp->timeout)) {
+ nf_ct_unlink_expect(exp);
+ nf_ct_expect_put(exp);
+ }
+ }
+ }
+
+ spin_unlock_bh(&nf_conntrack_expect_lock);
+}
+EXPORT_SYMBOL_GPL(nf_ct_expect_iterate_destroy);
+
+void nf_ct_expect_iterate_net(struct net *net,
+ bool (*iter)(struct nf_conntrack_expect *e, void *data),
+ void *data,
+ u32 portid, int report)
+{
+ struct nf_conntrack_expect *exp;
+ const struct hlist_node *next;
+ unsigned int i;
+
+ spin_lock_bh(&nf_conntrack_expect_lock);
+
+ for (i = 0; i < nf_ct_expect_hsize; i++) {
+ hlist_for_each_entry_safe(exp, next,
+ &nf_ct_expect_hash[i],
+ hnode) {
+
+ if (!net_eq(nf_ct_exp_net(exp), net))
+ continue;
+
+ if (iter(exp, data) && del_timer(&exp->timeout)) {
+ nf_ct_unlink_expect_report(exp, portid, report);
+ nf_ct_expect_put(exp);
+ }
+ }
+ }
+
+ spin_unlock_bh(&nf_conntrack_expect_lock);
+}
+EXPORT_SYMBOL_GPL(nf_ct_expect_iterate_net);
+
#ifdef CONFIG_NF_CONNTRACK_PROCFS
struct ct_expect_iter_state {
struct seq_net_private p;
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index 6c605e88ebae..9fe0ddc333fb 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -47,7 +47,7 @@ void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
struct nf_ct_ext_type *t;
/* Conntrack must not be confirmed to avoid races on reallocation. */
- NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
+ WARN_ON(nf_ct_is_confirmed(ct));
old = ct->ext;
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 9129bb3b5153..551a1eddf0fa 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -437,12 +437,22 @@ out:
}
EXPORT_SYMBOL_GPL(nf_conntrack_helper_register);
-void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
+static bool expect_iter_me(struct nf_conntrack_expect *exp, void *data)
{
- struct nf_conntrack_expect *exp;
- const struct hlist_node *next;
- unsigned int i;
+ struct nf_conn_help *help = nfct_help(exp->master);
+ const struct nf_conntrack_helper *me = data;
+ const struct nf_conntrack_helper *this;
+
+ if (exp->helper == me)
+ return true;
+ this = rcu_dereference_protected(help->helper,
+ lockdep_is_held(&nf_conntrack_expect_lock));
+ return this == me;
+}
+
+void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
+{
mutex_lock(&nf_ct_helper_mutex);
hlist_del_rcu(&me->hnode);
nf_ct_helper_count--;
@@ -453,21 +463,7 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
*/
synchronize_rcu();
- /* Get rid of expectations */
- spin_lock_bh(&nf_conntrack_expect_lock);
- for (i = 0; i < nf_ct_expect_hsize; i++) {
- hlist_for_each_entry_safe(exp, next,
- &nf_ct_expect_hash[i], hnode) {
- struct nf_conn_help *help = nfct_help(exp->master);
- if ((rcu_dereference_protected(
- help->helper,
- lockdep_is_held(&nf_conntrack_expect_lock)
- ) == me || exp->helper == me))
- nf_ct_remove_expect(exp);
- }
- }
- spin_unlock_bh(&nf_conntrack_expect_lock);
-
+ nf_ct_expect_iterate_destroy(expect_iter_me, NULL);
nf_ct_iterate_destroy(unhelp, me);
}
EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister);
diff --git a/net/netfilter/nf_conntrack_l3proto_generic.c b/net/netfilter/nf_conntrack_l3proto_generic.c
index cf9ace70bece..397e6911214f 100644
--- a/net/netfilter/nf_conntrack_l3proto_generic.c
+++ b/net/netfilter/nf_conntrack_l3proto_generic.c
@@ -49,11 +49,6 @@ static bool generic_invert_tuple(struct nf_conntrack_tuple *tuple,
return true;
}
-static void generic_print_tuple(struct seq_file *s,
- const struct nf_conntrack_tuple *tuple)
-{
-}
-
static int generic_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
unsigned int *dataoff, u_int8_t *protonum)
{
@@ -64,10 +59,8 @@ static int generic_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
struct nf_conntrack_l3proto nf_conntrack_l3proto_generic __read_mostly = {
.l3proto = PF_UNSPEC,
- .name = "unknown",
.pkt_to_tuple = generic_pkt_to_tuple,
.invert_tuple = generic_invert_tuple,
- .print_tuple = generic_print_tuple,
.get_l4proto = generic_get_l4proto,
};
EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_generic);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 7999e70c3bfb..de4053d84364 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -61,8 +61,8 @@ MODULE_LICENSE("GPL");
static char __initdata version[] = "0.93";
static int ctnetlink_dump_tuples_proto(struct sk_buff *skb,
- const struct nf_conntrack_tuple *tuple,
- struct nf_conntrack_l4proto *l4proto)
+ const struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_l4proto *l4proto)
{
int ret = 0;
struct nlattr *nest_parms;
@@ -86,7 +86,7 @@ nla_put_failure:
static int ctnetlink_dump_tuples_ip(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple,
- struct nf_conntrack_l3proto *l3proto)
+ const struct nf_conntrack_l3proto *l3proto)
{
int ret = 0;
struct nlattr *nest_parms;
@@ -109,9 +109,9 @@ nla_put_failure:
static int ctnetlink_dump_tuples(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple)
{
+ const struct nf_conntrack_l3proto *l3proto;
+ const struct nf_conntrack_l4proto *l4proto;
int ret;
- struct nf_conntrack_l3proto *l3proto;
- struct nf_conntrack_l4proto *l4proto;
rcu_read_lock();
l3proto = __nf_ct_l3proto_find(tuple->src.l3num);
@@ -163,7 +163,7 @@ nla_put_failure:
static int ctnetlink_dump_protoinfo(struct sk_buff *skb, struct nf_conn *ct)
{
- struct nf_conntrack_l4proto *l4proto;
+ const struct nf_conntrack_l4proto *l4proto;
struct nlattr *nest_proto;
int ret;
@@ -535,17 +535,16 @@ nla_put_failure:
static inline size_t ctnetlink_proto_size(const struct nf_conn *ct)
{
- struct nf_conntrack_l3proto *l3proto;
- struct nf_conntrack_l4proto *l4proto;
- size_t len = 0;
+ const struct nf_conntrack_l3proto *l3proto;
+ const struct nf_conntrack_l4proto *l4proto;
+ size_t len;
- rcu_read_lock();
l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
- len += l3proto->nla_size;
+ len = l3proto->nla_size;
+ len *= 3u; /* ORIG, REPLY, MASTER */
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
len += l4proto->nla_size;
- rcu_read_unlock();
return len;
}
@@ -664,7 +663,6 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = 0;
- rcu_read_lock();
zone = nf_ct_zone(ct);
nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED);
@@ -736,8 +734,6 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
&& ctnetlink_dump_mark(skb, ct) < 0)
goto nla_put_failure;
#endif
- rcu_read_unlock();
-
nlmsg_end(skb, nlh);
err = nfnetlink_send(skb, net, item->portid, group, item->report,
GFP_ATOMIC);
@@ -747,7 +743,6 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
return 0;
nla_put_failure:
- rcu_read_unlock();
nlmsg_cancel(skb, nlh);
nlmsg_failure:
kfree_skb(skb);
@@ -941,8 +936,8 @@ static const struct nla_policy proto_nla_policy[CTA_PROTO_MAX+1] = {
static int ctnetlink_parse_tuple_proto(struct nlattr *attr,
struct nf_conntrack_tuple *tuple)
{
+ const struct nf_conntrack_l4proto *l4proto;
struct nlattr *tb[CTA_PROTO_MAX+1];
- struct nf_conntrack_l4proto *l4proto;
int ret = 0;
ret = nla_parse_nested(tb, CTA_PROTO_MAX, attr, proto_nla_policy,
@@ -1585,8 +1580,8 @@ static int ctnetlink_change_protoinfo(struct nf_conn *ct,
const struct nlattr * const cda[])
{
const struct nlattr *attr = cda[CTA_PROTOINFO];
+ const struct nf_conntrack_l4proto *l4proto;
struct nlattr *tb[CTA_PROTOINFO_MAX+1];
- struct nf_conntrack_l4proto *l4proto;
int err = 0;
err = nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, protoinfo_policy,
@@ -2213,7 +2208,6 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct)
const struct nf_conntrack_zone *zone;
struct nlattr *nest_parms;
- rcu_read_lock();
zone = nf_ct_zone(ct);
nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED);
@@ -2272,11 +2266,9 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct)
#endif
if (ctnetlink_dump_labels(skb, ct) < 0)
goto nla_put_failure;
- rcu_read_unlock();
return 0;
nla_put_failure:
- rcu_read_unlock();
return -ENOSPC;
}
@@ -2483,11 +2475,11 @@ static int ctnetlink_exp_dump_mask(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple_mask *mask)
{
- int ret;
- struct nf_conntrack_l3proto *l3proto;
- struct nf_conntrack_l4proto *l4proto;
+ const struct nf_conntrack_l3proto *l3proto;
+ const struct nf_conntrack_l4proto *l4proto;
struct nf_conntrack_tuple m;
struct nlattr *nest_parms;
+ int ret;
memset(&m, 0xFF, sizeof(m));
memcpy(&m.src.u3, &mask->src.u3, sizeof(m.src.u3));
@@ -2661,17 +2653,14 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = 0;
- rcu_read_lock();
if (ctnetlink_exp_dump_expect(skb, exp) < 0)
goto nla_put_failure;
- rcu_read_unlock();
nlmsg_end(skb, nlh);
nfnetlink_send(skb, net, item->portid, group, item->report, GFP_ATOMIC);
return 0;
nla_put_failure:
- rcu_read_unlock();
nlmsg_cancel(skb, nlh);
nlmsg_failure:
kfree_skb(skb);
@@ -2910,6 +2899,21 @@ out:
return err == -EAGAIN ? -ENOBUFS : err;
}
+static bool expect_iter_name(struct nf_conntrack_expect *exp, void *data)
+{
+ const struct nf_conn_help *m_help;
+ const char *name = data;
+
+ m_help = nfct_help(exp->master);
+
+ return strcmp(m_help->helper->name, name) == 0;
+}
+
+static bool expect_iter_all(struct nf_conntrack_expect *exp, void *data)
+{
+ return true;
+}
+
static int ctnetlink_del_expect(struct net *net, struct sock *ctnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const cda[],
@@ -2918,10 +2922,8 @@ static int ctnetlink_del_expect(struct net *net, struct sock *ctnl,
struct nf_conntrack_expect *exp;
struct nf_conntrack_tuple tuple;
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- struct hlist_node *next;
u_int8_t u3 = nfmsg->nfgen_family;
struct nf_conntrack_zone zone;
- unsigned int i;
int err;
if (cda[CTA_EXPECT_TUPLE]) {
@@ -2961,49 +2963,15 @@ static int ctnetlink_del_expect(struct net *net, struct sock *ctnl,
nf_ct_expect_put(exp);
} else if (cda[CTA_EXPECT_HELP_NAME]) {
char *name = nla_data(cda[CTA_EXPECT_HELP_NAME]);
- struct nf_conn_help *m_help;
-
- /* delete all expectations for this helper */
- spin_lock_bh(&nf_conntrack_expect_lock);
- for (i = 0; i < nf_ct_expect_hsize; i++) {
- hlist_for_each_entry_safe(exp, next,
- &nf_ct_expect_hash[i],
- hnode) {
-
- if (!net_eq(nf_ct_exp_net(exp), net))
- continue;
- m_help = nfct_help(exp->master);
- if (!strcmp(m_help->helper->name, name) &&
- del_timer(&exp->timeout)) {
- nf_ct_unlink_expect_report(exp,
- NETLINK_CB(skb).portid,
- nlmsg_report(nlh));
- nf_ct_expect_put(exp);
- }
- }
- }
- spin_unlock_bh(&nf_conntrack_expect_lock);
+ nf_ct_expect_iterate_net(net, expect_iter_name, name,
+ NETLINK_CB(skb).portid,
+ nlmsg_report(nlh));
} else {
/* This basically means we have to flush everything*/
- spin_lock_bh(&nf_conntrack_expect_lock);
- for (i = 0; i < nf_ct_expect_hsize; i++) {
- hlist_for_each_entry_safe(exp, next,
- &nf_ct_expect_hash[i],
- hnode) {
-
- if (!net_eq(nf_ct_exp_net(exp), net))
- continue;
-
- if (del_timer(&exp->timeout)) {
- nf_ct_unlink_expect_report(exp,
- NETLINK_CB(skb).portid,
- nlmsg_report(nlh));
- nf_ct_expect_put(exp);
- }
- }
- }
- spin_unlock_bh(&nf_conntrack_expect_lock);
+ nf_ct_expect_iterate_net(net, expect_iter_all, NULL,
+ NETLINK_CB(skb).portid,
+ nlmsg_report(nlh));
}
return 0;
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 6959e93063d4..11562f2a08bb 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -113,7 +113,6 @@ static void pptp_expectfn(struct nf_conn *ct,
/* Can you see how rusty this code is, compared with the pre-2.6.11
* one? That's what happened to my shiny newnat of 2002 ;( -HW */
- rcu_read_lock();
nf_nat_pptp_expectfn = rcu_dereference(nf_nat_pptp_hook_expectfn);
if (nf_nat_pptp_expectfn && ct->master->status & IPS_NAT_MASK)
nf_nat_pptp_expectfn(ct, exp);
@@ -136,7 +135,6 @@ static void pptp_expectfn(struct nf_conn *ct,
pr_debug("not found\n");
}
}
- rcu_read_unlock();
}
static int destroy_sibling_or_exp(struct net *net, struct nf_conn *ct,
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 1dcad229c3cc..b3e489c859ec 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -65,7 +65,7 @@ nf_ct_unregister_sysctl(struct ctl_table_header **header,
}
#endif
-struct nf_conntrack_l4proto *
+const struct nf_conntrack_l4proto *
__nf_ct_l4proto_find(u_int16_t l3proto, u_int8_t l4proto)
{
if (unlikely(l3proto >= NFPROTO_NUMPROTO || nf_ct_protos[l3proto] == NULL))
@@ -77,7 +77,7 @@ EXPORT_SYMBOL_GPL(__nf_ct_l4proto_find);
/* this is guaranteed to always return a valid protocol helper, since
* it falls back to generic_protocol */
-struct nf_conntrack_l3proto *
+const struct nf_conntrack_l3proto *
nf_ct_l3proto_find_get(u_int16_t l3proto)
{
struct nf_conntrack_l3proto *p;
@@ -95,8 +95,8 @@ EXPORT_SYMBOL_GPL(nf_ct_l3proto_find_get);
int
nf_ct_l3proto_try_module_get(unsigned short l3proto)
{
+ const struct nf_conntrack_l3proto *p;
int ret;
- struct nf_conntrack_l3proto *p;
retry: p = nf_ct_l3proto_find_get(l3proto);
if (p == &nf_conntrack_l3proto_generic) {
@@ -173,10 +173,10 @@ void nf_ct_netns_put(struct net *net, u8 nfproto)
}
EXPORT_SYMBOL_GPL(nf_ct_netns_put);
-struct nf_conntrack_l4proto *
+const struct nf_conntrack_l4proto *
nf_ct_l4proto_find_get(u_int16_t l3num, u_int8_t l4num)
{
- struct nf_conntrack_l4proto *p;
+ const struct nf_conntrack_l4proto *p;
rcu_read_lock();
p = __nf_ct_l4proto_find(l3num, l4num);
@@ -188,7 +188,7 @@ nf_ct_l4proto_find_get(u_int16_t l3num, u_int8_t l4num)
}
EXPORT_SYMBOL_GPL(nf_ct_l4proto_find_get);
-void nf_ct_l4proto_put(struct nf_conntrack_l4proto *p)
+void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p)
{
module_put(p->me);
}
@@ -196,28 +196,28 @@ EXPORT_SYMBOL_GPL(nf_ct_l4proto_put);
static int kill_l3proto(struct nf_conn *i, void *data)
{
- return nf_ct_l3num(i) == ((struct nf_conntrack_l3proto *)data)->l3proto;
+ return nf_ct_l3num(i) == ((const struct nf_conntrack_l3proto *)data)->l3proto;
}
static int kill_l4proto(struct nf_conn *i, void *data)
{
- struct nf_conntrack_l4proto *l4proto;
+ const struct nf_conntrack_l4proto *l4proto;
l4proto = data;
return nf_ct_protonum(i) == l4proto->l4proto &&
nf_ct_l3num(i) == l4proto->l3proto;
}
-int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto)
+int nf_ct_l3proto_register(const struct nf_conntrack_l3proto *proto)
{
int ret = 0;
struct nf_conntrack_l3proto *old;
if (proto->l3proto >= NFPROTO_NUMPROTO)
return -EBUSY;
-
- if (proto->tuple_to_nlattr && !proto->nlattr_tuple_size)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+ if (proto->tuple_to_nlattr && proto->nla_size == 0)
return -EINVAL;
-
+#endif
mutex_lock(&nf_ct_proto_mutex);
old = rcu_dereference_protected(nf_ct_l3protos[proto->l3proto],
lockdep_is_held(&nf_ct_proto_mutex));
@@ -226,9 +226,6 @@ int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto)
goto out_unlock;
}
- if (proto->nlattr_tuple_size)
- proto->nla_size = 3 * proto->nlattr_tuple_size();
-
rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], proto);
out_unlock:
@@ -238,21 +235,7 @@ out_unlock:
}
EXPORT_SYMBOL_GPL(nf_ct_l3proto_register);
-#ifdef CONFIG_SYSCTL
-extern unsigned int nf_conntrack_default_on;
-
-int nf_ct_l3proto_pernet_register(struct net *net,
- struct nf_conntrack_l3proto *proto)
-{
- if (nf_conntrack_default_on == 0)
- return 0;
-
- return proto->net_ns_get ? proto->net_ns_get(net) : 0;
-}
-EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_register);
-#endif
-
-void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto)
+void nf_ct_l3proto_unregister(const struct nf_conntrack_l3proto *proto)
{
BUG_ON(proto->l3proto >= NFPROTO_NUMPROTO);
@@ -266,27 +249,12 @@ void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto)
synchronize_rcu();
/* Remove all contrack entries for this protocol */
- nf_ct_iterate_destroy(kill_l3proto, proto);
+ nf_ct_iterate_destroy(kill_l3proto, (void*)proto);
}
EXPORT_SYMBOL_GPL(nf_ct_l3proto_unregister);
-void nf_ct_l3proto_pernet_unregister(struct net *net,
- struct nf_conntrack_l3proto *proto)
-{
- /*
- * nf_conntrack_default_on *might* have registered hooks.
- * ->net_ns_put must cope with more puts() than get(), i.e.
- * if nf_conntrack_default_on was 0 at time of
- * nf_ct_l3proto_pernet_register invocation this net_ns_put()
- * should be a noop.
- */
- if (proto->net_ns_put)
- proto->net_ns_put(net);
-}
-EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_unregister);
-
static struct nf_proto_net *nf_ct_l4proto_net(struct net *net,
- struct nf_conntrack_l4proto *l4proto)
+ const struct nf_conntrack_l4proto *l4proto)
{
if (l4proto->get_net_proto) {
/* statically built-in protocols use static per-net */
@@ -301,7 +269,7 @@ static struct nf_proto_net *nf_ct_l4proto_net(struct net *net,
static
int nf_ct_l4proto_register_sysctl(struct net *net,
struct nf_proto_net *pn,
- struct nf_conntrack_l4proto *l4proto)
+ const struct nf_conntrack_l4proto *l4proto)
{
int err = 0;
@@ -324,8 +292,8 @@ int nf_ct_l4proto_register_sysctl(struct net *net,
static
void nf_ct_l4proto_unregister_sysctl(struct net *net,
- struct nf_proto_net *pn,
- struct nf_conntrack_l4proto *l4proto)
+ struct nf_proto_net *pn,
+ const struct nf_conntrack_l4proto *l4proto)
{
#ifdef CONFIG_SYSCTL
if (pn->ctl_table_header != NULL)
@@ -395,7 +363,7 @@ out_unlock:
EXPORT_SYMBOL_GPL(nf_ct_l4proto_register_one);
int nf_ct_l4proto_pernet_register_one(struct net *net,
- struct nf_conntrack_l4proto *l4proto)
+ const struct nf_conntrack_l4proto *l4proto)
{
int ret = 0;
struct nf_proto_net *pn = NULL;
@@ -420,7 +388,7 @@ out:
}
EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register_one);
-static void __nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto)
+static void __nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto)
{
BUG_ON(l4proto->l3proto >= ARRAY_SIZE(nf_ct_protos));
@@ -433,7 +401,7 @@ static void __nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto)
&nf_conntrack_l4proto_generic);
}
-void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto)
+void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto)
{
mutex_lock(&nf_ct_proto_mutex);
__nf_ct_l4proto_unregister_one(l4proto);
@@ -444,7 +412,7 @@ void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto)
EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister_one);
void nf_ct_l4proto_pernet_unregister_one(struct net *net,
- struct nf_conntrack_l4proto *l4proto)
+ const struct nf_conntrack_l4proto *l4proto)
{
struct nf_proto_net *pn = nf_ct_l4proto_net(net, l4proto);
@@ -469,8 +437,8 @@ int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto[],
}
if (i != num_proto) {
ver = l4proto[i]->l3proto == PF_INET6 ? 6 : 4;
- pr_err("nf_conntrack_ipv%d: can't register %s%d proto.\n",
- ver, l4proto[i]->name, ver);
+ pr_err("nf_conntrack_ipv%d: can't register l4 %d proto.\n",
+ ver, l4proto[i]->l4proto);
nf_ct_l4proto_unregister(l4proto, i);
}
return ret;
@@ -478,7 +446,7 @@ int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto[],
EXPORT_SYMBOL_GPL(nf_ct_l4proto_register);
int nf_ct_l4proto_pernet_register(struct net *net,
- struct nf_conntrack_l4proto *l4proto[],
+ struct nf_conntrack_l4proto *const l4proto[],
unsigned int num_proto)
{
int ret = -EINVAL;
@@ -490,8 +458,8 @@ int nf_ct_l4proto_pernet_register(struct net *net,
break;
}
if (i != num_proto) {
- pr_err("nf_conntrack_%s%d: pernet registration failed\n",
- l4proto[i]->name,
+ pr_err("nf_conntrack_proto_%d %d: pernet registration failed\n",
+ l4proto[i]->l4proto,
l4proto[i]->l3proto == PF_INET6 ? 6 : 4);
nf_ct_l4proto_pernet_unregister(net, l4proto, i);
}
@@ -514,8 +482,8 @@ void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto[],
EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister);
void nf_ct_l4proto_pernet_unregister(struct net *net,
- struct nf_conntrack_l4proto *l4proto[],
- unsigned int num_proto)
+ struct nf_conntrack_l4proto *const l4proto[],
+ unsigned int num_proto)
{
while (num_proto-- != 0)
nf_ct_l4proto_pernet_unregister_one(net, l4proto[num_proto]);
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 4707d997558a..0f5a4d79f6b8 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -469,7 +469,7 @@ static unsigned int *dccp_get_timeouts(struct net *net)
static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff, enum ip_conntrack_info ctinfo,
- u_int8_t pf, unsigned int hooknum,
+ u_int8_t pf,
unsigned int *timeouts)
{
struct net *net = nf_ct_net(ct);
@@ -623,18 +623,12 @@ static bool dccp_can_early_drop(const struct nf_conn *ct)
return false;
}
-static void dccp_print_tuple(struct seq_file *s,
- const struct nf_conntrack_tuple *tuple)
-{
- seq_printf(s, "sport=%hu dport=%hu ",
- ntohs(tuple->src.u.dccp.port),
- ntohs(tuple->dst.u.dccp.port));
-}
-
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
static void dccp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
{
seq_printf(s, "%s ", dccp_state_names[ct->proto.dccp.state]);
}
+#endif
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
@@ -880,7 +874,6 @@ static struct nf_proto_net *dccp_get_net_proto(struct net *net)
struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = {
.l3proto = AF_INET,
.l4proto = IPPROTO_DCCP,
- .name = "dccp",
.pkt_to_tuple = dccp_pkt_to_tuple,
.invert_tuple = dccp_invert_tuple,
.new = dccp_new,
@@ -888,8 +881,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = {
.get_timeouts = dccp_get_timeouts,
.error = dccp_error,
.can_early_drop = dccp_can_early_drop,
- .print_tuple = dccp_print_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = dccp_print_conntrack,
+#endif
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.to_nlattr = dccp_to_nlattr,
.nlattr_size = dccp_nlattr_size,
@@ -916,7 +910,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp4);
struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 __read_mostly = {
.l3proto = AF_INET6,
.l4proto = IPPROTO_DCCP,
- .name = "dccp",
.pkt_to_tuple = dccp_pkt_to_tuple,
.invert_tuple = dccp_invert_tuple,
.new = dccp_new,
@@ -924,8 +917,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 __read_mostly = {
.get_timeouts = dccp_get_timeouts,
.error = dccp_error,
.can_early_drop = dccp_can_early_drop,
- .print_tuple = dccp_print_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = dccp_print_conntrack,
+#endif
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.to_nlattr = dccp_to_nlattr,
.nlattr_size = dccp_nlattr_size,
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index d5868bad33a7..9cd40700842e 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -17,22 +17,10 @@ static unsigned int nf_ct_generic_timeout __read_mostly = 600*HZ;
static bool nf_generic_should_process(u8 proto)
{
switch (proto) {
-#ifdef CONFIG_NF_CT_PROTO_SCTP_MODULE
- case IPPROTO_SCTP:
- return false;
-#endif
-#ifdef CONFIG_NF_CT_PROTO_DCCP_MODULE
- case IPPROTO_DCCP:
- return false;
-#endif
#ifdef CONFIG_NF_CT_PROTO_GRE_MODULE
case IPPROTO_GRE:
return false;
#endif
-#ifdef CONFIG_NF_CT_PROTO_UDPLITE_MODULE
- case IPPROTO_UDPLITE:
- return false;
-#endif
default:
return true;
}
@@ -62,12 +50,6 @@ static bool generic_invert_tuple(struct nf_conntrack_tuple *tuple,
return true;
}
-/* Print out the per-protocol part of the tuple. */
-static void generic_print_tuple(struct seq_file *s,
- const struct nf_conntrack_tuple *tuple)
-{
-}
-
static unsigned int *generic_get_timeouts(struct net *net)
{
return &(generic_pernet(net)->timeout);
@@ -79,7 +61,6 @@ static int generic_packet(struct nf_conn *ct,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
u_int8_t pf,
- unsigned int hooknum,
unsigned int *timeout)
{
nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
@@ -187,10 +168,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_generic __read_mostly =
{
.l3proto = PF_UNSPEC,
.l4proto = 255,
- .name = "unknown",
.pkt_to_tuple = generic_pkt_to_tuple,
.invert_tuple = generic_invert_tuple,
- .print_tuple = generic_print_tuple,
.packet = generic_packet,
.get_timeouts = generic_get_timeouts,
.new = generic_new,
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 87bb40a3feb5..09a90484c27d 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -224,15 +224,7 @@ static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
return true;
}
-/* print gre part of tuple */
-static void gre_print_tuple(struct seq_file *s,
- const struct nf_conntrack_tuple *tuple)
-{
- seq_printf(s, "srckey=0x%x dstkey=0x%x ",
- ntohs(tuple->src.u.gre.key),
- ntohs(tuple->dst.u.gre.key));
-}
-
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
/* print private data for conntrack */
static void gre_print_conntrack(struct seq_file *s, struct nf_conn *ct)
{
@@ -240,6 +232,7 @@ static void gre_print_conntrack(struct seq_file *s, struct nf_conn *ct)
(ct->proto.gre.timeout / HZ),
(ct->proto.gre.stream_timeout / HZ));
}
+#endif
static unsigned int *gre_get_timeouts(struct net *net)
{
@@ -252,7 +245,6 @@ static int gre_packet(struct nf_conn *ct,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
u_int8_t pf,
- unsigned int hooknum,
unsigned int *timeouts)
{
/* If we've seen traffic both ways, this is a GRE connection.
@@ -364,11 +356,11 @@ static int gre_init_net(struct net *net, u_int16_t proto)
static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = {
.l3proto = AF_INET,
.l4proto = IPPROTO_GRE,
- .name = "gre",
.pkt_to_tuple = gre_pkt_to_tuple,
.invert_tuple = gre_invert_tuple,
- .print_tuple = gre_print_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = gre_print_conntrack,
+#endif
.get_timeouts = gre_get_timeouts,
.packet = gre_packet,
.new = gre_new,
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 6eef29d2eec4..6303a88af12b 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -174,20 +174,13 @@ static bool sctp_invert_tuple(struct nf_conntrack_tuple *tuple,
return true;
}
-/* Print out the per-protocol part of the tuple. */
-static void sctp_print_tuple(struct seq_file *s,
- const struct nf_conntrack_tuple *tuple)
-{
- seq_printf(s, "sport=%hu dport=%hu ",
- ntohs(tuple->src.u.sctp.port),
- ntohs(tuple->dst.u.sctp.port));
-}
-
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
/* Print out the private part of the conntrack. */
static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
{
seq_printf(s, "%s ", sctp_conntrack_names[ct->proto.sctp.state]);
}
+#endif
#define for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count) \
for ((offset) = (dataoff) + sizeof(struct sctphdr), (count) = 0; \
@@ -314,7 +307,6 @@ static int sctp_packet(struct nf_conn *ct,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
u_int8_t pf,
- unsigned int hooknum,
unsigned int *timeouts)
{
enum sctp_conntrack new_state, old_state;
@@ -791,11 +783,11 @@ static struct nf_proto_net *sctp_get_net_proto(struct net *net)
struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
.l3proto = PF_INET,
.l4proto = IPPROTO_SCTP,
- .name = "sctp",
.pkt_to_tuple = sctp_pkt_to_tuple,
.invert_tuple = sctp_invert_tuple,
- .print_tuple = sctp_print_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = sctp_print_conntrack,
+#endif
.packet = sctp_packet,
.get_timeouts = sctp_get_timeouts,
.new = sctp_new,
@@ -828,11 +820,11 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_sctp4);
struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {
.l3proto = PF_INET6,
.l4proto = IPPROTO_SCTP,
- .name = "sctp",
.pkt_to_tuple = sctp_pkt_to_tuple,
.invert_tuple = sctp_invert_tuple,
- .print_tuple = sctp_print_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = sctp_print_conntrack,
+#endif
.packet = sctp_packet,
.get_timeouts = sctp_get_timeouts,
.new = sctp_new,
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 9758a7dfd83e..cba1c6ffe51a 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -301,20 +301,13 @@ static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
return true;
}
-/* Print out the per-protocol part of the tuple. */
-static void tcp_print_tuple(struct seq_file *s,
- const struct nf_conntrack_tuple *tuple)
-{
- seq_printf(s, "sport=%hu dport=%hu ",
- ntohs(tuple->src.u.tcp.port),
- ntohs(tuple->dst.u.tcp.port));
-}
-
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
/* Print out the private part of the conntrack. */
static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
{
seq_printf(s, "%s ", tcp_conntrack_names[ct->proto.tcp.state]);
}
+#endif
static unsigned int get_conntrack_index(const struct tcphdr *tcph)
{
@@ -810,7 +803,6 @@ static int tcp_packet(struct nf_conn *ct,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
u_int8_t pf,
- unsigned int hooknum,
unsigned int *timeouts)
{
struct net *net = nf_ct_net(ct);
@@ -1556,11 +1548,11 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
{
.l3proto = PF_INET,
.l4proto = IPPROTO_TCP,
- .name = "tcp",
.pkt_to_tuple = tcp_pkt_to_tuple,
.invert_tuple = tcp_invert_tuple,
- .print_tuple = tcp_print_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = tcp_print_conntrack,
+#endif
.packet = tcp_packet,
.get_timeouts = tcp_get_timeouts,
.new = tcp_new,
@@ -1594,11 +1586,11 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
{
.l3proto = PF_INET6,
.l4proto = IPPROTO_TCP,
- .name = "tcp",
.pkt_to_tuple = tcp_pkt_to_tuple,
.invert_tuple = tcp_invert_tuple,
- .print_tuple = tcp_print_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = tcp_print_conntrack,
+#endif
.packet = tcp_packet,
.get_timeouts = tcp_get_timeouts,
.new = tcp_new,
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index f6ebce6178ca..8af734cd1a94 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -63,15 +63,6 @@ static bool udp_invert_tuple(struct nf_conntrack_tuple *tuple,
return true;
}
-/* Print out the per-protocol part of the tuple. */
-static void udp_print_tuple(struct seq_file *s,
- const struct nf_conntrack_tuple *tuple)
-{
- seq_printf(s, "sport=%hu dport=%hu ",
- ntohs(tuple->src.u.udp.port),
- ntohs(tuple->dst.u.udp.port));
-}
-
static unsigned int *udp_get_timeouts(struct net *net)
{
return udp_pernet(net)->timeouts;
@@ -83,7 +74,6 @@ static int udp_packet(struct nf_conn *ct,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
u_int8_t pf,
- unsigned int hooknum,
unsigned int *timeouts)
{
/* If we've seen traffic both ways, this is some kind of UDP
@@ -313,11 +303,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly =
{
.l3proto = PF_INET,
.l4proto = IPPROTO_UDP,
- .name = "udp",
.allow_clash = true,
.pkt_to_tuple = udp_pkt_to_tuple,
.invert_tuple = udp_invert_tuple,
- .print_tuple = udp_print_tuple,
.packet = udp_packet,
.get_timeouts = udp_get_timeouts,
.new = udp_new,
@@ -347,11 +335,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly =
{
.l3proto = PF_INET,
.l4proto = IPPROTO_UDPLITE,
- .name = "udplite",
.allow_clash = true,
.pkt_to_tuple = udp_pkt_to_tuple,
.invert_tuple = udp_invert_tuple,
- .print_tuple = udp_print_tuple,
.packet = udp_packet,
.get_timeouts = udp_get_timeouts,
.new = udp_new,
@@ -381,11 +367,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly =
{
.l3proto = PF_INET6,
.l4proto = IPPROTO_UDP,
- .name = "udp",
.allow_clash = true,
.pkt_to_tuple = udp_pkt_to_tuple,
.invert_tuple = udp_invert_tuple,
- .print_tuple = udp_print_tuple,
.packet = udp_packet,
.get_timeouts = udp_get_timeouts,
.new = udp_new,
@@ -415,11 +399,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly =
{
.l3proto = PF_INET6,
.l4proto = IPPROTO_UDPLITE,
- .name = "udplite",
.allow_clash = true,
.pkt_to_tuple = udp_pkt_to_tuple,
.invert_tuple = udp_invert_tuple,
- .print_tuple = udp_print_tuple,
.packet = udp_packet,
.get_timeouts = udp_get_timeouts,
.new = udp_new,
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index d38af4274335..4dbb5bad4363 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -884,7 +884,6 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
tuple.dst.u3 = *daddr;
tuple.dst.u.udp.port = port;
- rcu_read_lock();
do {
exp = __nf_ct_expect_find(net, nf_ct_zone(ct), &tuple);
@@ -918,10 +917,8 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
goto err1;
}
- if (skip_expect) {
- rcu_read_unlock();
+ if (skip_expect)
return NF_ACCEPT;
- }
rtp_exp = nf_ct_expect_alloc(ct);
if (rtp_exp == NULL)
@@ -952,7 +949,6 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
err2:
nf_ct_expect_put(rtp_exp);
err1:
- rcu_read_unlock();
return ret;
}
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index ccb5cb9043e0..5a101caa3e12 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -41,8 +41,62 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_l3proto *l3proto,
const struct nf_conntrack_l4proto *l4proto)
{
- l3proto->print_tuple(s, tuple);
- l4proto->print_tuple(s, tuple);
+ switch (l3proto->l3proto) {
+ case NFPROTO_IPV4:
+ seq_printf(s, "src=%pI4 dst=%pI4 ",
+ &tuple->src.u3.ip, &tuple->dst.u3.ip);
+ break;
+ case NFPROTO_IPV6:
+ seq_printf(s, "src=%pI6 dst=%pI6 ",
+ tuple->src.u3.ip6, tuple->dst.u3.ip6);
+ break;
+ default:
+ break;
+ }
+
+ switch (l4proto->l4proto) {
+ case IPPROTO_ICMP:
+ seq_printf(s, "type=%u code=%u id=%u ",
+ tuple->dst.u.icmp.type,
+ tuple->dst.u.icmp.code,
+ ntohs(tuple->src.u.icmp.id));
+ break;
+ case IPPROTO_TCP:
+ seq_printf(s, "sport=%hu dport=%hu ",
+ ntohs(tuple->src.u.tcp.port),
+ ntohs(tuple->dst.u.tcp.port));
+ break;
+ case IPPROTO_UDPLITE: /* fallthrough */
+ case IPPROTO_UDP:
+ seq_printf(s, "sport=%hu dport=%hu ",
+ ntohs(tuple->src.u.udp.port),
+ ntohs(tuple->dst.u.udp.port));
+
+ break;
+ case IPPROTO_DCCP:
+ seq_printf(s, "sport=%hu dport=%hu ",
+ ntohs(tuple->src.u.dccp.port),
+ ntohs(tuple->dst.u.dccp.port));
+ break;
+ case IPPROTO_SCTP:
+ seq_printf(s, "sport=%hu dport=%hu ",
+ ntohs(tuple->src.u.sctp.port),
+ ntohs(tuple->dst.u.sctp.port));
+ break;
+ case IPPROTO_ICMPV6:
+ seq_printf(s, "type=%u code=%u id=%u ",
+ tuple->dst.u.icmp.type,
+ tuple->dst.u.icmp.code,
+ ntohs(tuple->src.u.icmp.id));
+ break;
+ case IPPROTO_GRE:
+ seq_printf(s, "srckey=0x%x dstkey=0x%x ",
+ ntohs(tuple->src.u.gre.key),
+ ntohs(tuple->dst.u.gre.key));
+ break;
+ default:
+ break;
+ }
}
EXPORT_SYMBOL_GPL(print_tuple);
@@ -198,6 +252,31 @@ ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct)
}
#endif
+static const char* l3proto_name(u16 proto)
+{
+ switch (proto) {
+ case AF_INET: return "ipv4";
+ case AF_INET6: return "ipv6";
+ }
+
+ return "unknown";
+}
+
+static const char* l4proto_name(u16 proto)
+{
+ switch (proto) {
+ case IPPROTO_ICMP: return "icmp";
+ case IPPROTO_TCP: return "tcp";
+ case IPPROTO_UDP: return "udp";
+ case IPPROTO_DCCP: return "dccp";
+ case IPPROTO_GRE: return "gre";
+ case IPPROTO_SCTP: return "sctp";
+ case IPPROTO_UDPLITE: return "udplite";
+ }
+
+ return "unknown";
+}
+
/* return 0 on success, 1 in case of error */
static int ct_seq_show(struct seq_file *s, void *v)
{
@@ -208,7 +287,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
struct net *net = seq_file_net(s);
int ret = 0;
- NF_CT_ASSERT(ct);
+ WARN_ON(!ct);
if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
return 0;
@@ -225,14 +304,14 @@ static int ct_seq_show(struct seq_file *s, void *v)
goto release;
l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
- NF_CT_ASSERT(l3proto);
+ WARN_ON(!l3proto);
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
- NF_CT_ASSERT(l4proto);
+ WARN_ON(!l4proto);
ret = -ENOSPC;
seq_printf(s, "%-8s %u %-8s %u %ld ",
- l3proto->name, nf_ct_l3num(ct),
- l4proto->name, nf_ct_protonum(ct),
+ l3proto_name(l3proto->l3proto), nf_ct_l3num(ct),
+ l4proto_name(l4proto->l4proto), nf_ct_protonum(ct),
nf_ct_expires(ct) / HZ);
if (l4proto->print_conntrack)
@@ -452,9 +531,6 @@ static int log_invalid_proto_max __read_mostly = 255;
/* size the user *wants to set */
static unsigned int nf_conntrack_htable_size_user __read_mostly;
-extern unsigned int nf_conntrack_default_on;
-unsigned int nf_conntrack_default_on __read_mostly = 1;
-
static int
nf_conntrack_hash_sysctl(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -520,13 +596,6 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- {
- .procname = "nf_conntrack_default_on",
- .data = &nf_conntrack_default_on,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
{ }
};
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index bfa742da83af..49f87ec093a3 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -5,17 +5,11 @@
#include <linux/skbuff.h>
#include <linux/netdevice.h>
-#ifdef CONFIG_NETFILTER_DEBUG
-#define NFDEBUG(format, args...) printk(KERN_DEBUG format , ## args)
-#else
-#define NFDEBUG(format, args...)
-#endif
-
/* nf_queue.c */
int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
- struct nf_hook_entry **entryp, unsigned int verdict);
+ const struct nf_hook_entries *entries, unsigned int index,
+ unsigned int verdict);
unsigned int nf_queue_nf_hook_drop(struct net *net);
-int __init netfilter_queue_init(void);
/* nf_log.c */
int __init netfilter_log_init(void);
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index b1d3740ae36a..40573aa6c133 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -414,8 +414,8 @@ nf_nat_setup_info(struct nf_conn *ct,
if (nf_ct_is_confirmed(ct))
return NF_ACCEPT;
- NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC ||
- maniptype == NF_NAT_MANIP_DST);
+ WARN_ON(maniptype != NF_NAT_MANIP_SRC &&
+ maniptype != NF_NAT_MANIP_DST);
BUG_ON(nf_nat_initialized(ct, maniptype));
/* What we've got will look like inverse of reply. Normally
diff --git a/net/netfilter/nf_nat_redirect.c b/net/netfilter/nf_nat_redirect.c
index 86067560a318..25b06b959118 100644
--- a/net/netfilter/nf_nat_redirect.c
+++ b/net/netfilter/nf_nat_redirect.c
@@ -38,11 +38,11 @@ nf_nat_redirect_ipv4(struct sk_buff *skb,
__be32 newdst;
struct nf_nat_range newrange;
- NF_CT_ASSERT(hooknum == NF_INET_PRE_ROUTING ||
- hooknum == NF_INET_LOCAL_OUT);
+ WARN_ON(hooknum != NF_INET_PRE_ROUTING &&
+ hooknum != NF_INET_LOCAL_OUT);
ct = nf_ct_get(skb, &ctinfo);
- NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
+ WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)));
/* Local packets: make them go to loopback */
if (hooknum == NF_INET_LOCAL_OUT) {
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 043850c9d154..f7e21953b1de 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -109,9 +109,11 @@ unsigned int nf_queue_nf_hook_drop(struct net *net)
return count;
}
+EXPORT_SYMBOL_GPL(nf_queue_nf_hook_drop);
static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
- struct nf_hook_entry *hook_entry, unsigned int queuenum)
+ const struct nf_hook_entries *entries,
+ unsigned int index, unsigned int queuenum)
{
int status = -ENOENT;
struct nf_queue_entry *entry = NULL;
@@ -139,7 +141,7 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
*entry = (struct nf_queue_entry) {
.skb = skb,
.state = *state,
- .hook = hook_entry,
+ .hook_index = index,
.size = sizeof(*entry) + afinfo->route_key_size,
};
@@ -162,18 +164,16 @@ err:
/* Packets leaving via this function must come back through nf_reinject(). */
int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
- struct nf_hook_entry **entryp, unsigned int verdict)
+ const struct nf_hook_entries *entries, unsigned int index,
+ unsigned int verdict)
{
- struct nf_hook_entry *entry = *entryp;
int ret;
- ret = __nf_queue(skb, state, entry, verdict >> NF_VERDICT_QBITS);
+ ret = __nf_queue(skb, state, entries, index, verdict >> NF_VERDICT_QBITS);
if (ret < 0) {
if (ret == -ESRCH &&
- (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) {
- *entryp = rcu_dereference(entry->next);
+ (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
return 1;
- }
kfree_skb(skb);
}
@@ -182,33 +182,56 @@ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
static unsigned int nf_iterate(struct sk_buff *skb,
struct nf_hook_state *state,
- struct nf_hook_entry **entryp)
+ const struct nf_hook_entries *hooks,
+ unsigned int *index)
{
- unsigned int verdict;
+ const struct nf_hook_entry *hook;
+ unsigned int verdict, i = *index;
- do {
+ while (i < hooks->num_hook_entries) {
+ hook = &hooks->hooks[i];
repeat:
- verdict = nf_hook_entry_hookfn((*entryp), skb, state);
+ verdict = nf_hook_entry_hookfn(hook, skb, state);
if (verdict != NF_ACCEPT) {
if (verdict != NF_REPEAT)
return verdict;
goto repeat;
}
- *entryp = rcu_dereference((*entryp)->next);
- } while (*entryp);
+ i++;
+ }
+ *index = i;
return NF_ACCEPT;
}
+/* Caller must hold rcu read-side lock */
void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
{
- struct nf_hook_entry *hook_entry = entry->hook;
+ const struct nf_hook_entry *hook_entry;
+ const struct nf_hook_entries *hooks;
struct sk_buff *skb = entry->skb;
const struct nf_afinfo *afinfo;
+ const struct net *net;
+ unsigned int i;
int err;
+ u8 pf;
+
+ net = entry->state.net;
+ pf = entry->state.pf;
+
+ hooks = rcu_dereference(net->nf.hooks[pf][entry->state.hook]);
nf_queue_entry_release_refs(entry);
+ i = entry->hook_index;
+ if (WARN_ON_ONCE(i >= hooks->num_hook_entries)) {
+ kfree_skb(skb);
+ kfree(entry);
+ return;
+ }
+
+ hook_entry = &hooks->hooks[i];
+
/* Continue traversal iff userspace said ok... */
if (verdict == NF_REPEAT)
verdict = nf_hook_entry_hookfn(hook_entry, skb, &entry->state);
@@ -220,27 +243,22 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
}
if (verdict == NF_ACCEPT) {
- hook_entry = rcu_dereference(hook_entry->next);
- if (hook_entry)
next_hook:
- verdict = nf_iterate(skb, &entry->state, &hook_entry);
+ ++i;
+ verdict = nf_iterate(skb, &entry->state, hooks, &i);
}
switch (verdict & NF_VERDICT_MASK) {
case NF_ACCEPT:
case NF_STOP:
-okfn:
local_bh_disable();
entry->state.okfn(entry->state.net, entry->state.sk, skb);
local_bh_enable();
break;
case NF_QUEUE:
- err = nf_queue(skb, &entry->state, &hook_entry, verdict);
- if (err == 1) {
- if (hook_entry)
- goto next_hook;
- goto okfn;
- }
+ err = nf_queue(skb, &entry->state, hooks, i, verdict);
+ if (err == 1)
+ goto next_hook;
break;
case NF_STOLEN:
break;
diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c
index c68c1e58b362..d2a9e6b5d01f 100644
--- a/net/netfilter/nf_sockopt.c
+++ b/net/netfilter/nf_sockopt.c
@@ -33,7 +33,7 @@ int nf_register_sockopt(struct nf_sockopt_ops *reg)
reg->set_optmin, reg->set_optmax)
|| overlap(ops->get_optmin, ops->get_optmax,
reg->get_optmin, reg->get_optmax))) {
- NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
+ pr_debug("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
ops->set_optmin, ops->set_optmax,
ops->get_optmin, ops->get_optmax,
reg->set_optmin, reg->set_optmax,
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 7843efa33c59..929927171426 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -726,7 +726,10 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
if (table == NULL)
goto err2;
- nla_strlcpy(table->name, name, NFT_TABLE_MAXNAMELEN);
+ table->name = nla_strdup(name, GFP_KERNEL);
+ if (table->name == NULL)
+ goto err3;
+
INIT_LIST_HEAD(&table->chains);
INIT_LIST_HEAD(&table->sets);
INIT_LIST_HEAD(&table->objects);
@@ -735,10 +738,12 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE);
if (err < 0)
- goto err3;
+ goto err4;
list_add_tail_rcu(&table->list, &afi->tables);
return 0;
+err4:
+ kfree(table->name);
err3:
kfree(table);
err2:
@@ -855,6 +860,10 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk,
if (IS_ERR(table))
return PTR_ERR(table);
+ if (nlh->nlmsg_flags & NLM_F_NONREC &&
+ table->use > 0)
+ return -EBUSY;
+
ctx.afi = afi;
ctx.table = table;
@@ -865,6 +874,7 @@ static void nf_tables_table_destroy(struct nft_ctx *ctx)
{
BUG_ON(ctx->table->use > 0);
+ kfree(ctx->table->name);
kfree(ctx->table);
module_put(ctx->afi->owner);
}
@@ -1240,10 +1250,14 @@ static void nf_tables_chain_destroy(struct nft_chain *chain)
module_put(basechain->type->owner);
free_percpu(basechain->stats);
+ if (basechain->stats)
+ static_branch_dec(&nft_counters_enabled);
if (basechain->ops[0].dev != NULL)
dev_put(basechain->ops[0].dev);
+ kfree(chain->name);
kfree(basechain);
} else {
+ kfree(chain->name);
kfree(chain);
}
}
@@ -1325,155 +1339,18 @@ static void nft_chain_release_hook(struct nft_chain_hook *hook)
dev_put(hook->dev);
}
-static int nf_tables_newchain(struct net *net, struct sock *nlsk,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nla[],
- struct netlink_ext_ack *extack)
+static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
+ u8 policy, bool create)
{
- const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- const struct nlattr * uninitialized_var(name);
- struct nft_af_info *afi;
- struct nft_table *table;
+ const struct nlattr * const *nla = ctx->nla;
+ struct nft_table *table = ctx->table;
+ struct nft_af_info *afi = ctx->afi;
+ struct nft_base_chain *basechain;
+ struct nft_stats __percpu *stats;
+ struct net *net = ctx->net;
struct nft_chain *chain;
- struct nft_base_chain *basechain = NULL;
- u8 genmask = nft_genmask_next(net);
- int family = nfmsg->nfgen_family;
- u8 policy = NF_ACCEPT;
- u64 handle = 0;
unsigned int i;
- struct nft_stats __percpu *stats;
int err;
- bool create;
- struct nft_ctx ctx;
-
- create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
-
- afi = nf_tables_afinfo_lookup(net, family, true);
- if (IS_ERR(afi))
- return PTR_ERR(afi);
-
- table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask);
- if (IS_ERR(table))
- return PTR_ERR(table);
-
- chain = NULL;
- name = nla[NFTA_CHAIN_NAME];
-
- if (nla[NFTA_CHAIN_HANDLE]) {
- handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE]));
- chain = nf_tables_chain_lookup_byhandle(table, handle, genmask);
- if (IS_ERR(chain))
- return PTR_ERR(chain);
- } else {
- chain = nf_tables_chain_lookup(table, name, genmask);
- if (IS_ERR(chain)) {
- if (PTR_ERR(chain) != -ENOENT)
- return PTR_ERR(chain);
- chain = NULL;
- }
- }
-
- if (nla[NFTA_CHAIN_POLICY]) {
- if (chain != NULL &&
- !nft_is_base_chain(chain))
- return -EOPNOTSUPP;
-
- if (chain == NULL &&
- nla[NFTA_CHAIN_HOOK] == NULL)
- return -EOPNOTSUPP;
-
- policy = ntohl(nla_get_be32(nla[NFTA_CHAIN_POLICY]));
- switch (policy) {
- case NF_DROP:
- case NF_ACCEPT:
- break;
- default:
- return -EINVAL;
- }
- }
-
- if (chain != NULL) {
- struct nft_stats *stats = NULL;
- struct nft_trans *trans;
-
- if (nlh->nlmsg_flags & NLM_F_EXCL)
- return -EEXIST;
- if (nlh->nlmsg_flags & NLM_F_REPLACE)
- return -EOPNOTSUPP;
-
- if (nla[NFTA_CHAIN_HOOK]) {
- struct nft_base_chain *basechain;
- struct nft_chain_hook hook;
- struct nf_hook_ops *ops;
-
- if (!nft_is_base_chain(chain))
- return -EBUSY;
-
- err = nft_chain_parse_hook(net, nla, afi, &hook,
- create);
- if (err < 0)
- return err;
-
- basechain = nft_base_chain(chain);
- if (basechain->type != hook.type) {
- nft_chain_release_hook(&hook);
- return -EBUSY;
- }
-
- for (i = 0; i < afi->nops; i++) {
- ops = &basechain->ops[i];
- if (ops->hooknum != hook.num ||
- ops->priority != hook.priority ||
- ops->dev != hook.dev) {
- nft_chain_release_hook(&hook);
- return -EBUSY;
- }
- }
- nft_chain_release_hook(&hook);
- }
-
- if (nla[NFTA_CHAIN_HANDLE] && name) {
- struct nft_chain *chain2;
-
- chain2 = nf_tables_chain_lookup(table,
- nla[NFTA_CHAIN_NAME],
- genmask);
- if (IS_ERR(chain2))
- return PTR_ERR(chain2);
- }
-
- if (nla[NFTA_CHAIN_COUNTERS]) {
- if (!nft_is_base_chain(chain))
- return -EOPNOTSUPP;
-
- stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
- if (IS_ERR(stats))
- return PTR_ERR(stats);
- }
-
- nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
- trans = nft_trans_alloc(&ctx, NFT_MSG_NEWCHAIN,
- sizeof(struct nft_trans_chain));
- if (trans == NULL) {
- free_percpu(stats);
- return -ENOMEM;
- }
-
- nft_trans_chain_stats(trans) = stats;
- nft_trans_chain_update(trans) = true;
-
- if (nla[NFTA_CHAIN_POLICY])
- nft_trans_chain_policy(trans) = policy;
- else
- nft_trans_chain_policy(trans) = -1;
-
- if (nla[NFTA_CHAIN_HANDLE] && name) {
- nla_strlcpy(nft_trans_chain_name(trans), name,
- NFT_CHAIN_MAXNAMELEN);
- }
- list_add_tail(&trans->list, &net->nft.commit_list);
- return 0;
- }
if (table->use == UINT_MAX)
return -EOVERFLOW;
@@ -1504,14 +1381,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
return PTR_ERR(stats);
}
basechain->stats = stats;
- } else {
- stats = netdev_alloc_pcpu_stats(struct nft_stats);
- if (stats == NULL) {
- nft_chain_release_hook(&hook);
- kfree(basechain);
- return -ENOMEM;
- }
- rcu_assign_pointer(basechain->stats, stats);
+ static_branch_inc(&nft_counters_enabled);
}
hookfn = hook.type->hooks[hook.num];
@@ -1539,31 +1409,204 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
if (chain == NULL)
return -ENOMEM;
}
-
INIT_LIST_HEAD(&chain->rules);
chain->handle = nf_tables_alloc_handle(table);
chain->table = table;
- nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN);
+ chain->name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL);
+ if (!chain->name) {
+ err = -ENOMEM;
+ goto err1;
+ }
err = nf_tables_register_hooks(net, table, chain, afi->nops);
if (err < 0)
goto err1;
- nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
- err = nft_trans_chain_add(&ctx, NFT_MSG_NEWCHAIN);
+ ctx->chain = chain;
+ err = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN);
if (err < 0)
goto err2;
table->use++;
list_add_tail_rcu(&chain->list, &table->chains);
+
return 0;
err2:
nf_tables_unregister_hooks(net, table, chain, afi->nops);
err1:
nf_tables_chain_destroy(chain);
+
return err;
}
+static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
+ bool create)
+{
+ const struct nlattr * const *nla = ctx->nla;
+ struct nft_table *table = ctx->table;
+ struct nft_chain *chain = ctx->chain;
+ struct nft_af_info *afi = ctx->afi;
+ struct nft_base_chain *basechain;
+ struct nft_stats *stats = NULL;
+ struct nft_chain_hook hook;
+ const struct nlattr *name;
+ struct nf_hook_ops *ops;
+ struct nft_trans *trans;
+ int err, i;
+
+ if (nla[NFTA_CHAIN_HOOK]) {
+ if (!nft_is_base_chain(chain))
+ return -EBUSY;
+
+ err = nft_chain_parse_hook(ctx->net, nla, ctx->afi, &hook,
+ create);
+ if (err < 0)
+ return err;
+
+ basechain = nft_base_chain(chain);
+ if (basechain->type != hook.type) {
+ nft_chain_release_hook(&hook);
+ return -EBUSY;
+ }
+
+ for (i = 0; i < afi->nops; i++) {
+ ops = &basechain->ops[i];
+ if (ops->hooknum != hook.num ||
+ ops->priority != hook.priority ||
+ ops->dev != hook.dev) {
+ nft_chain_release_hook(&hook);
+ return -EBUSY;
+ }
+ }
+ nft_chain_release_hook(&hook);
+ }
+
+ if (nla[NFTA_CHAIN_HANDLE] &&
+ nla[NFTA_CHAIN_NAME]) {
+ struct nft_chain *chain2;
+
+ chain2 = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME],
+ genmask);
+ if (IS_ERR(chain2))
+ return PTR_ERR(chain2);
+ }
+
+ if (nla[NFTA_CHAIN_COUNTERS]) {
+ if (!nft_is_base_chain(chain))
+ return -EOPNOTSUPP;
+
+ stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
+ if (IS_ERR(stats))
+ return PTR_ERR(stats);
+ }
+
+ trans = nft_trans_alloc(ctx, NFT_MSG_NEWCHAIN,
+ sizeof(struct nft_trans_chain));
+ if (trans == NULL) {
+ free_percpu(stats);
+ return -ENOMEM;
+ }
+
+ nft_trans_chain_stats(trans) = stats;
+ nft_trans_chain_update(trans) = true;
+
+ if (nla[NFTA_CHAIN_POLICY])
+ nft_trans_chain_policy(trans) = policy;
+ else
+ nft_trans_chain_policy(trans) = -1;
+
+ name = nla[NFTA_CHAIN_NAME];
+ if (nla[NFTA_CHAIN_HANDLE] && name) {
+ nft_trans_chain_name(trans) =
+ nla_strdup(name, GFP_KERNEL);
+ if (!nft_trans_chain_name(trans)) {
+ kfree(trans);
+ free_percpu(stats);
+ return -ENOMEM;
+ }
+ }
+ list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+
+ return 0;
+}
+
+static int nf_tables_newchain(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[],
+ struct netlink_ext_ack *extack)
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ const struct nlattr * uninitialized_var(name);
+ u8 genmask = nft_genmask_next(net);
+ int family = nfmsg->nfgen_family;
+ struct nft_af_info *afi;
+ struct nft_table *table;
+ struct nft_chain *chain;
+ u8 policy = NF_ACCEPT;
+ struct nft_ctx ctx;
+ u64 handle = 0;
+ bool create;
+
+ create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
+
+ afi = nf_tables_afinfo_lookup(net, family, true);
+ if (IS_ERR(afi))
+ return PTR_ERR(afi);
+
+ table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask);
+ if (IS_ERR(table))
+ return PTR_ERR(table);
+
+ chain = NULL;
+ name = nla[NFTA_CHAIN_NAME];
+
+ if (nla[NFTA_CHAIN_HANDLE]) {
+ handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE]));
+ chain = nf_tables_chain_lookup_byhandle(table, handle, genmask);
+ if (IS_ERR(chain))
+ return PTR_ERR(chain);
+ } else {
+ chain = nf_tables_chain_lookup(table, name, genmask);
+ if (IS_ERR(chain)) {
+ if (PTR_ERR(chain) != -ENOENT)
+ return PTR_ERR(chain);
+ chain = NULL;
+ }
+ }
+
+ if (nla[NFTA_CHAIN_POLICY]) {
+ if (chain != NULL &&
+ !nft_is_base_chain(chain))
+ return -EOPNOTSUPP;
+
+ if (chain == NULL &&
+ nla[NFTA_CHAIN_HOOK] == NULL)
+ return -EOPNOTSUPP;
+
+ policy = ntohl(nla_get_be32(nla[NFTA_CHAIN_POLICY]));
+ switch (policy) {
+ case NF_DROP:
+ case NF_ACCEPT:
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
+
+ if (chain != NULL) {
+ if (nlh->nlmsg_flags & NLM_F_EXCL)
+ return -EEXIST;
+ if (nlh->nlmsg_flags & NLM_F_REPLACE)
+ return -EOPNOTSUPP;
+
+ return nf_tables_updchain(&ctx, genmask, policy, create);
+ }
+
+ return nf_tables_addchain(&ctx, family, genmask, policy, create);
+}
+
static int nf_tables_delchain(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[],
@@ -1574,8 +1617,11 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,
struct nft_af_info *afi;
struct nft_table *table;
struct nft_chain *chain;
+ struct nft_rule *rule;
int family = nfmsg->nfgen_family;
struct nft_ctx ctx;
+ u32 use;
+ int err;
afi = nf_tables_afinfo_lookup(net, family, false);
if (IS_ERR(afi))
@@ -1588,11 +1634,30 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,
chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
if (IS_ERR(chain))
return PTR_ERR(chain);
- if (chain->use > 0)
+
+ if (nlh->nlmsg_flags & NLM_F_NONREC &&
+ chain->use > 0)
return -EBUSY;
nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
+ use = chain->use;
+ list_for_each_entry(rule, &chain->rules, list) {
+ if (!nft_is_active_next(net, rule))
+ continue;
+ use--;
+
+ err = nft_delrule(&ctx, rule);
+ if (err < 0)
+ return err;
+ }
+
+ /* There are rules and elements that are still holding references to us,
+ * we cannot do a recursive removal in this case.
+ */
+ if (use > 0)
+ return -EBUSY;
+
return nft_delchain(&ctx);
}
@@ -1977,8 +2042,8 @@ err:
}
struct nft_rule_dump_ctx {
- char table[NFT_TABLE_MAXNAMELEN];
- char chain[NFT_CHAIN_MAXNAMELEN];
+ char *table;
+ char *chain;
};
static int nf_tables_dump_rules(struct sk_buff *skb,
@@ -2002,7 +2067,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
continue;
list_for_each_entry_rcu(table, &afi->tables, list) {
- if (ctx && ctx->table[0] &&
+ if (ctx && ctx->table &&
strcmp(ctx->table, table->name) != 0)
continue;
@@ -2042,7 +2107,13 @@ done:
static int nf_tables_dump_rules_done(struct netlink_callback *cb)
{
- kfree(cb->data);
+ struct nft_rule_dump_ctx *ctx = cb->data;
+
+ if (ctx) {
+ kfree(ctx->table);
+ kfree(ctx->chain);
+ kfree(ctx);
+ }
return 0;
}
@@ -2074,12 +2145,23 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
if (!ctx)
return -ENOMEM;
- if (nla[NFTA_RULE_TABLE])
- nla_strlcpy(ctx->table, nla[NFTA_RULE_TABLE],
- sizeof(ctx->table));
- if (nla[NFTA_RULE_CHAIN])
- nla_strlcpy(ctx->chain, nla[NFTA_RULE_CHAIN],
- sizeof(ctx->chain));
+ if (nla[NFTA_RULE_TABLE]) {
+ ctx->table = nla_strdup(nla[NFTA_RULE_TABLE],
+ GFP_KERNEL);
+ if (!ctx->table) {
+ kfree(ctx);
+ return -ENOMEM;
+ }
+ }
+ if (nla[NFTA_RULE_CHAIN]) {
+ ctx->chain = nla_strdup(nla[NFTA_RULE_CHAIN],
+ GFP_KERNEL);
+ if (!ctx->chain) {
+ kfree(ctx->table);
+ kfree(ctx);
+ return -ENOMEM;
+ }
+ }
c.data = ctx;
}
@@ -2621,7 +2703,7 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
unsigned long *inuse;
unsigned int n = 0, min = 0;
- p = strnchr(name, NFT_SET_MAXNAMELEN, '%');
+ p = strchr(name, '%');
if (p != NULL) {
if (p[1] != 'd' || strchr(p + 2, '%'))
return -EINVAL;
@@ -2652,7 +2734,10 @@ cont:
free_page((unsigned long)inuse);
}
- snprintf(set->name, sizeof(set->name), name, min + n);
+ set->name = kasprintf(GFP_KERNEL, name, min + n);
+ if (!set->name)
+ return -ENOMEM;
+
list_for_each_entry(i, &ctx->table->sets, list) {
if (!nft_is_active_next(ctx->net, i))
continue;
@@ -2929,7 +3014,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
struct nft_table *table;
struct nft_set *set;
struct nft_ctx ctx;
- char name[NFT_SET_MAXNAMELEN];
+ char *name;
unsigned int size;
bool create;
u64 timeout;
@@ -3075,8 +3160,14 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
goto err1;
}
- nla_strlcpy(name, nla[NFTA_SET_NAME], sizeof(set->name));
+ name = nla_strdup(nla[NFTA_SET_NAME], GFP_KERNEL);
+ if (!name) {
+ err = -ENOMEM;
+ goto err2;
+ }
+
err = nf_tables_set_alloc_name(&ctx, set, name);
+ kfree(name);
if (err < 0)
goto err2;
@@ -3126,6 +3217,7 @@ static void nft_set_destroy(struct nft_set *set)
{
set->ops->destroy(set);
module_put(set->ops->type->owner);
+ kfree(set->name);
kvfree(set);
}
@@ -3159,7 +3251,9 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk,
set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask);
if (IS_ERR(set))
return PTR_ERR(set);
- if (!list_empty(&set->bindings))
+
+ if (!list_empty(&set->bindings) ||
+ (nlh->nlmsg_flags & NLM_F_NONREC && atomic_read(&set->nelems) > 0))
return -EBUSY;
return nft_delset(&ctx, set);
@@ -4209,7 +4303,7 @@ struct nft_object *nf_tables_obj_lookup(const struct nft_table *table,
list_for_each_entry(obj, &table->objects, list) {
if (!nla_strcmp(nla, obj->name) &&
- objtype == obj->type->type &&
+ objtype == obj->ops->type->type &&
nft_active_genmask(obj, genmask))
return obj;
}
@@ -4231,6 +4325,7 @@ static struct nft_object *nft_obj_init(const struct nft_ctx *ctx,
const struct nlattr *attr)
{
struct nlattr *tb[type->maxattr + 1];
+ const struct nft_object_ops *ops;
struct nft_object *obj;
int err;
@@ -4243,16 +4338,27 @@ static struct nft_object *nft_obj_init(const struct nft_ctx *ctx,
memset(tb, 0, sizeof(tb[0]) * (type->maxattr + 1));
}
+ if (type->select_ops) {
+ ops = type->select_ops(ctx, (const struct nlattr * const *)tb);
+ if (IS_ERR(ops)) {
+ err = PTR_ERR(ops);
+ goto err1;
+ }
+ } else {
+ ops = type->ops;
+ }
+
err = -ENOMEM;
- obj = kzalloc(sizeof(struct nft_object) + type->size, GFP_KERNEL);
+ obj = kzalloc(sizeof(*obj) + ops->size, GFP_KERNEL);
if (obj == NULL)
goto err1;
- err = type->init(ctx, (const struct nlattr * const *)tb, obj);
+ err = ops->init(ctx, (const struct nlattr * const *)tb, obj);
if (err < 0)
goto err2;
- obj->type = type;
+ obj->ops = ops;
+
return obj;
err2:
kfree(obj);
@@ -4268,7 +4374,7 @@ static int nft_object_dump(struct sk_buff *skb, unsigned int attr,
nest = nla_nest_start(skb, attr);
if (!nest)
goto nla_put_failure;
- if (obj->type->dump(skb, obj, reset) < 0)
+ if (obj->ops->dump(skb, obj, reset) < 0)
goto nla_put_failure;
nla_nest_end(skb, nest);
return 0;
@@ -4363,18 +4469,24 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
goto err1;
}
obj->table = table;
- nla_strlcpy(obj->name, nla[NFTA_OBJ_NAME], NFT_OBJ_MAXNAMELEN);
+ obj->name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL);
+ if (!obj->name) {
+ err = -ENOMEM;
+ goto err2;
+ }
err = nft_trans_obj_add(&ctx, NFT_MSG_NEWOBJ, obj);
if (err < 0)
- goto err2;
+ goto err3;
list_add_tail_rcu(&obj->list, &table->objects);
table->use++;
return 0;
+err3:
+ kfree(obj->name);
err2:
- if (obj->type->destroy)
- obj->type->destroy(obj);
+ if (obj->ops->destroy)
+ obj->ops->destroy(obj);
kfree(obj);
err1:
module_put(type->owner);
@@ -4401,7 +4513,7 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net,
if (nla_put_string(skb, NFTA_OBJ_TABLE, table->name) ||
nla_put_string(skb, NFTA_OBJ_NAME, obj->name) ||
- nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->type->type)) ||
+ nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->ops->type->type)) ||
nla_put_be32(skb, NFTA_OBJ_USE, htonl(obj->use)) ||
nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset))
goto nla_put_failure;
@@ -4415,7 +4527,7 @@ nla_put_failure:
}
struct nft_obj_filter {
- char table[NFT_OBJ_MAXNAMELEN];
+ char *table;
u32 type;
};
@@ -4455,7 +4567,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
goto cont;
if (filter &&
filter->type != NFT_OBJECT_UNSPEC &&
- obj->type->type != filter->type)
+ obj->ops->type->type != filter->type)
goto cont;
if (nf_tables_fill_obj_info(skb, net, NETLINK_CB(cb->skb).portid,
@@ -4480,7 +4592,10 @@ done:
static int nf_tables_dump_obj_done(struct netlink_callback *cb)
{
- kfree(cb->data);
+ struct nft_obj_filter *filter = cb->data;
+
+ kfree(filter->table);
+ kfree(filter);
return 0;
}
@@ -4494,9 +4609,13 @@ nft_obj_filter_alloc(const struct nlattr * const nla[])
if (!filter)
return ERR_PTR(-ENOMEM);
- if (nla[NFTA_OBJ_TABLE])
- nla_strlcpy(filter->table, nla[NFTA_OBJ_TABLE],
- NFT_TABLE_MAXNAMELEN);
+ if (nla[NFTA_OBJ_TABLE]) {
+ filter->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_KERNEL);
+ if (!filter->table) {
+ kfree(filter);
+ return ERR_PTR(-ENOMEM);
+ }
+ }
if (nla[NFTA_OBJ_TYPE])
filter->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
@@ -4576,10 +4695,11 @@ err:
static void nft_obj_destroy(struct nft_object *obj)
{
- if (obj->type->destroy)
- obj->type->destroy(obj);
+ if (obj->ops->destroy)
+ obj->ops->destroy(obj);
- module_put(obj->type->owner);
+ module_put(obj->ops->type->owner);
+ kfree(obj->name);
kfree(obj);
}
@@ -4662,6 +4782,7 @@ static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
+ char buf[TASK_COMM_LEN];
int event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWGEN);
nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), 0);
@@ -4673,7 +4794,9 @@ static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
- if (nla_put_be32(skb, NFTA_GEN_ID, htonl(net->nft.base_seq)))
+ if (nla_put_be32(skb, NFTA_GEN_ID, htonl(net->nft.base_seq)) ||
+ nla_put_be32(skb, NFTA_GEN_PROC_PID, htonl(task_pid_nr(current))) ||
+ nla_put_string(skb, NFTA_GEN_PROC_NAME, get_task_comm(buf, current)))
goto nla_put_failure;
nlmsg_end(skb, nlh);
@@ -4842,7 +4965,7 @@ static void nft_chain_commit_update(struct nft_trans *trans)
{
struct nft_base_chain *basechain;
- if (nft_trans_chain_name(trans)[0])
+ if (nft_trans_chain_name(trans))
strcpy(trans->ctx.chain->name, nft_trans_chain_name(trans));
if (!nft_is_base_chain(trans->ctx.chain))
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 65dbeadcb118..dfd0bf3810d2 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -29,7 +29,7 @@ static const char *const comments[__NFT_TRACETYPE_MAX] = {
[NFT_TRACETYPE_RULE] = "rule",
};
-static struct nf_loginfo trace_loginfo = {
+static const struct nf_loginfo trace_loginfo = {
.type = NF_LOG_TYPE_LOG,
.u = {
.log = {
@@ -114,6 +114,22 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr,
return true;
}
+DEFINE_STATIC_KEY_FALSE(nft_counters_enabled);
+
+static noinline void nft_update_chain_stats(const struct nft_chain *chain,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_stats *stats;
+
+ local_bh_disable();
+ stats = this_cpu_ptr(rcu_dereference(nft_base_chain(chain)->stats));
+ u64_stats_update_begin(&stats->syncp);
+ stats->pkts++;
+ stats->bytes += pkt->skb->len;
+ u64_stats_update_end(&stats->syncp);
+ local_bh_enable();
+}
+
struct nft_jumpstack {
const struct nft_chain *chain;
const struct nft_rule *rule;
@@ -130,7 +146,6 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv)
struct nft_regs regs;
unsigned int stackptr = 0;
struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE];
- struct nft_stats *stats;
int rulenum;
unsigned int gencursor = nft_genmask_cur(net);
struct nft_traceinfo info;
@@ -220,13 +235,8 @@ next_rule:
nft_trace_packet(&info, basechain, NULL, -1,
NFT_TRACETYPE_POLICY);
- rcu_read_lock_bh();
- stats = this_cpu_ptr(rcu_dereference(nft_base_chain(basechain)->stats));
- u64_stats_update_begin(&stats->syncp);
- stats->pkts++;
- stats->bytes += pkt->skb->len;
- u64_stats_update_end(&stats->syncp);
- rcu_read_unlock_bh();
+ if (static_branch_unlikely(&nft_counters_enabled))
+ nft_update_chain_stats(basechain, pkt);
return nft_base_chain(basechain)->policy;
}
diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c
index e1b15e7a5793..e1dc527a493b 100644
--- a/net/netfilter/nf_tables_trace.c
+++ b/net/netfilter/nf_tables_trace.c
@@ -162,6 +162,27 @@ static int nf_trace_fill_rule_info(struct sk_buff *nlskb,
NFTA_TRACE_PAD);
}
+static bool nft_trace_have_verdict_chain(struct nft_traceinfo *info)
+{
+ switch (info->type) {
+ case NFT_TRACETYPE_RETURN:
+ case NFT_TRACETYPE_RULE:
+ break;
+ default:
+ return false;
+ }
+
+ switch (info->verdict->code) {
+ case NFT_JUMP:
+ case NFT_GOTO:
+ break;
+ default:
+ return false;
+ }
+
+ return true;
+}
+
void nft_trace_notify(struct nft_traceinfo *info)
{
const struct nft_pktinfo *pkt = info->pkt;
@@ -175,13 +196,12 @@ void nft_trace_notify(struct nft_traceinfo *info)
return;
size = nlmsg_total_size(sizeof(struct nfgenmsg)) +
- nla_total_size(NFT_TABLE_MAXNAMELEN) +
- nla_total_size(NFT_CHAIN_MAXNAMELEN) +
+ nla_total_size(strlen(info->chain->table->name)) +
+ nla_total_size(strlen(info->chain->name)) +
nla_total_size_64bit(sizeof(__be64)) + /* rule handle */
nla_total_size(sizeof(__be32)) + /* trace type */
nla_total_size(0) + /* VERDICT, nested */
nla_total_size(sizeof(u32)) + /* verdict code */
- nla_total_size(NFT_CHAIN_MAXNAMELEN) + /* jump target */
nla_total_size(sizeof(u32)) + /* id */
nla_total_size(NFT_TRACETYPE_LL_HSIZE) +
nla_total_size(NFT_TRACETYPE_NETWORK_HSIZE) +
@@ -194,6 +214,9 @@ void nft_trace_notify(struct nft_traceinfo *info)
nla_total_size(sizeof(u32)) + /* nfproto */
nla_total_size(sizeof(u32)); /* policy */
+ if (nft_trace_have_verdict_chain(info))
+ size += nla_total_size(strlen(info->verdict->chain->name)); /* jump target */
+
skb = nlmsg_new(size, GFP_ATOMIC);
if (!skb)
return;
@@ -217,14 +240,11 @@ void nft_trace_notify(struct nft_traceinfo *info)
if (trace_fill_id(skb, pkt->skb))
goto nla_put_failure;
- if (info->chain) {
- if (nla_put_string(skb, NFTA_TRACE_CHAIN,
- info->chain->name))
- goto nla_put_failure;
- if (nla_put_string(skb, NFTA_TRACE_TABLE,
- info->chain->table->name))
- goto nla_put_failure;
- }
+ if (nla_put_string(skb, NFTA_TRACE_CHAIN, info->chain->name))
+ goto nla_put_failure;
+
+ if (nla_put_string(skb, NFTA_TRACE_TABLE, info->chain->table->name))
+ goto nla_put_failure;
if (nf_trace_fill_rule_info(skb, info))
goto nla_put_failure;
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 400e9ae97153..32b1c0b44e79 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -47,7 +47,8 @@ static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = {
};
static int
-ctnl_timeout_parse_policy(void *timeouts, struct nf_conntrack_l4proto *l4proto,
+ctnl_timeout_parse_policy(void *timeouts,
+ const struct nf_conntrack_l4proto *l4proto,
struct net *net, const struct nlattr *attr)
{
int ret = 0;
@@ -74,7 +75,7 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl,
{
__u16 l3num;
__u8 l4num;
- struct nf_conntrack_l4proto *l4proto;
+ const struct nf_conntrack_l4proto *l4proto;
struct ctnl_timeout *timeout, *matching = NULL;
char *name;
int ret;
@@ -158,7 +159,7 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
unsigned int flags = portid ? NLM_F_MULTI : 0;
- struct nf_conntrack_l4proto *l4proto = timeout->l4proto;
+ const struct nf_conntrack_l4proto *l4proto = timeout->l4proto;
event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event);
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
@@ -363,10 +364,10 @@ static int cttimeout_default_set(struct net *net, struct sock *ctnl,
const struct nlattr * const cda[],
struct netlink_ext_ack *extack)
{
+ const struct nf_conntrack_l4proto *l4proto;
+ unsigned int *timeouts;
__u16 l3num;
__u8 l4num;
- struct nf_conntrack_l4proto *l4proto;
- unsigned int *timeouts;
int ret;
if (!cda[CTA_TIMEOUT_L3PROTO] ||
@@ -401,7 +402,7 @@ err:
static int
cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid,
u32 seq, u32 type, int event,
- struct nf_conntrack_l4proto *l4proto)
+ const struct nf_conntrack_l4proto *l4proto)
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
@@ -453,11 +454,11 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
const struct nlattr * const cda[],
struct netlink_ext_ack *extack)
{
- __u16 l3num;
- __u8 l4num;
- struct nf_conntrack_l4proto *l4proto;
+ const struct nf_conntrack_l4proto *l4proto;
struct sk_buff *skb2;
int ret, err;
+ __u16 l3num;
+ __u8 l4num;
if (!cda[CTA_TIMEOUT_L3PROTO] || !cda[CTA_TIMEOUT_L4PROTO])
return -EINVAL;
@@ -505,7 +506,6 @@ ctnl_timeout_find_get(struct net *net, const char *name)
{
struct ctnl_timeout *timeout, *matching = NULL;
- rcu_read_lock();
list_for_each_entry_rcu(timeout, &net->nfct_timeout_list, head) {
if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
continue;
@@ -521,7 +521,6 @@ ctnl_timeout_find_get(struct net *net, const char *name)
break;
}
err:
- rcu_read_unlock();
return matching;
}
@@ -572,6 +571,7 @@ static void __net_exit cttimeout_net_exit(struct net *net)
{
struct ctnl_timeout *cur, *tmp;
+ nf_ct_unconfirmed_destroy(net);
ctnl_untimeout(net, NULL);
list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list, head) {
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index c684ba95dbb4..cad6498f10b0 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -606,7 +606,7 @@ nla_put_failure:
return -1;
}
-static struct nf_loginfo default_loginfo = {
+static const struct nf_loginfo default_loginfo = {
.type = NF_LOG_TYPE_ULOG,
.u = {
.ulog = {
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 16fa04086880..c9796629858f 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -41,6 +41,10 @@
#include "../bridge/br_private.h"
#endif
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include <net/netfilter/nf_conntrack.h>
+#endif
+
#define NFQNL_QMAX_DEFAULT 1024
/* We're using struct nlattr which has 16bit nla_len. Note that nla_len
@@ -612,6 +616,18 @@ nlmsg_failure:
return NULL;
}
+static bool nf_ct_drop_unconfirmed(const struct nf_queue_entry *entry)
+{
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+ static const unsigned long flags = IPS_CONFIRMED | IPS_DYING;
+ const struct nf_conn *ct = (void *)skb_nfct(entry->skb);
+
+ if (ct && ((ct->status & flags) == IPS_DYING))
+ return true;
+#endif
+ return false;
+}
+
static int
__nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue,
struct nf_queue_entry *entry)
@@ -628,6 +644,9 @@ __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue,
}
spin_lock_bh(&queue->lock);
+ if (nf_ct_drop_unconfirmed(entry))
+ goto err_out_free_nskb;
+
if (queue->queue_total >= queue->queue_maxlen) {
if (queue->flags & NFQA_CFG_F_FAIL_OPEN) {
failopen = 1;
@@ -928,7 +947,6 @@ static unsigned int nfqnl_nf_hook_drop(struct net *net)
unsigned int instances = 0;
int i;
- rcu_read_lock();
for (i = 0; i < INSTANCE_BUCKETS; i++) {
struct nfqnl_instance *inst;
struct hlist_head *head = &q->instance_table[i];
@@ -938,7 +956,6 @@ static unsigned int nfqnl_nf_hook_drop(struct net *net)
instances++;
}
}
- rcu_read_unlock();
return instances;
}
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index 67a710ebde09..eefe3b409925 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -175,15 +175,21 @@ static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = {
[NFTA_COUNTER_BYTES] = { .type = NLA_U64 },
};
-static struct nft_object_type nft_counter_obj __read_mostly = {
- .type = NFT_OBJECT_COUNTER,
+static struct nft_object_type nft_counter_obj_type;
+static const struct nft_object_ops nft_counter_obj_ops = {
+ .type = &nft_counter_obj_type,
.size = sizeof(struct nft_counter_percpu_priv),
- .maxattr = NFTA_COUNTER_MAX,
- .policy = nft_counter_policy,
.eval = nft_counter_obj_eval,
.init = nft_counter_obj_init,
.destroy = nft_counter_obj_destroy,
.dump = nft_counter_obj_dump,
+};
+
+static struct nft_object_type nft_counter_obj_type __read_mostly = {
+ .type = NFT_OBJECT_COUNTER,
+ .ops = &nft_counter_obj_ops,
+ .maxattr = NFTA_COUNTER_MAX,
+ .policy = nft_counter_policy,
.owner = THIS_MODULE,
};
@@ -271,7 +277,7 @@ static int __init nft_counter_module_init(void)
for_each_possible_cpu(cpu)
seqcount_init(per_cpu_ptr(&nft_counter_seq, cpu));
- err = nft_register_obj(&nft_counter_obj);
+ err = nft_register_obj(&nft_counter_obj_type);
if (err < 0)
return err;
@@ -281,14 +287,14 @@ static int __init nft_counter_module_init(void)
return 0;
err1:
- nft_unregister_obj(&nft_counter_obj);
+ nft_unregister_obj(&nft_counter_obj_type);
return err;
}
static void __exit nft_counter_module_exit(void)
{
nft_unregister_expr(&nft_counter_type);
- nft_unregister_obj(&nft_counter_obj);
+ nft_unregister_obj(&nft_counter_obj_type);
}
module_init(nft_counter_module_init);
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 1678e9e75e8e..bd0975d7dd6f 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -904,15 +904,21 @@ static const struct nla_policy nft_ct_helper_policy[NFTA_CT_HELPER_MAX + 1] = {
[NFTA_CT_HELPER_L4PROTO] = { .type = NLA_U8 },
};
-static struct nft_object_type nft_ct_helper_obj __read_mostly = {
- .type = NFT_OBJECT_CT_HELPER,
+static struct nft_object_type nft_ct_helper_obj_type;
+static const struct nft_object_ops nft_ct_helper_obj_ops = {
+ .type = &nft_ct_helper_obj_type,
.size = sizeof(struct nft_ct_helper_obj),
- .maxattr = NFTA_CT_HELPER_MAX,
- .policy = nft_ct_helper_policy,
.eval = nft_ct_helper_obj_eval,
.init = nft_ct_helper_obj_init,
.destroy = nft_ct_helper_obj_destroy,
.dump = nft_ct_helper_obj_dump,
+};
+
+static struct nft_object_type nft_ct_helper_obj_type __read_mostly = {
+ .type = NFT_OBJECT_CT_HELPER,
+ .ops = &nft_ct_helper_obj_ops,
+ .maxattr = NFTA_CT_HELPER_MAX,
+ .policy = nft_ct_helper_policy,
.owner = THIS_MODULE,
};
@@ -930,7 +936,7 @@ static int __init nft_ct_module_init(void)
if (err < 0)
goto err1;
- err = nft_register_obj(&nft_ct_helper_obj);
+ err = nft_register_obj(&nft_ct_helper_obj_type);
if (err < 0)
goto err2;
@@ -945,7 +951,7 @@ err1:
static void __exit nft_ct_module_exit(void)
{
- nft_unregister_obj(&nft_ct_helper_obj);
+ nft_unregister_obj(&nft_ct_helper_obj_type);
nft_unregister_expr(&nft_notrack_type);
nft_unregister_expr(&nft_ct_type);
}
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index 1ec49fe5845f..a0a93d987a3b 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -8,6 +8,7 @@
* Development of this code funded by Astaro AG (http://www.astaro.com/)
*/
+#include <asm/unaligned.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
@@ -23,6 +24,7 @@ struct nft_exthdr {
u8 len;
u8 op;
enum nft_registers dreg:8;
+ enum nft_registers sreg:8;
u8 flags;
};
@@ -61,6 +63,26 @@ err:
regs->verdict.code = NFT_BREAK;
}
+static void *
+nft_tcp_header_pointer(const struct nft_pktinfo *pkt,
+ unsigned int len, void *buffer, unsigned int *tcphdr_len)
+{
+ struct tcphdr *tcph;
+
+ if (!pkt->tprot_set || pkt->tprot != IPPROTO_TCP)
+ return NULL;
+
+ tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff, sizeof(*tcph), buffer);
+ if (!tcph)
+ return NULL;
+
+ *tcphdr_len = __tcp_hdrlen(tcph);
+ if (*tcphdr_len < sizeof(*tcph) || *tcphdr_len > len)
+ return NULL;
+
+ return skb_header_pointer(pkt->skb, pkt->xt.thoff, *tcphdr_len, buffer);
+}
+
static void nft_exthdr_tcp_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -72,18 +94,7 @@ static void nft_exthdr_tcp_eval(const struct nft_expr *expr,
struct tcphdr *tcph;
u8 *opt;
- if (!pkt->tprot_set || pkt->tprot != IPPROTO_TCP)
- goto err;
-
- tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff, sizeof(*tcph), buff);
- if (!tcph)
- goto err;
-
- tcphdr_len = __tcp_hdrlen(tcph);
- if (tcphdr_len < sizeof(*tcph))
- goto err;
-
- tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff, tcphdr_len, buff);
+ tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len);
if (!tcph)
goto err;
@@ -115,6 +126,88 @@ err:
regs->verdict.code = NFT_BREAK;
}
+static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ u8 buff[sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE];
+ struct nft_exthdr *priv = nft_expr_priv(expr);
+ unsigned int i, optl, tcphdr_len, offset;
+ struct tcphdr *tcph;
+ u8 *opt;
+ u32 src;
+
+ tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len);
+ if (!tcph)
+ return;
+
+ opt = (u8 *)tcph;
+ for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) {
+ union {
+ u8 octet;
+ __be16 v16;
+ __be32 v32;
+ } old, new;
+
+ optl = optlen(opt, i);
+
+ if (priv->type != opt[i])
+ continue;
+
+ if (i + optl > tcphdr_len || priv->len + priv->offset > optl)
+ return;
+
+ if (!skb_make_writable(pkt->skb, pkt->xt.thoff + i + priv->len))
+ return;
+
+ tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff,
+ &tcphdr_len);
+ if (!tcph)
+ return;
+
+ src = regs->data[priv->sreg];
+ offset = i + priv->offset;
+
+ switch (priv->len) {
+ case 2:
+ old.v16 = get_unaligned((u16 *)(opt + offset));
+ new.v16 = src;
+
+ switch (priv->type) {
+ case TCPOPT_MSS:
+ /* increase can cause connection to stall */
+ if (ntohs(old.v16) <= ntohs(new.v16))
+ return;
+ break;
+ }
+
+ if (old.v16 == new.v16)
+ return;
+
+ put_unaligned(new.v16, (u16*)(opt + offset));
+ inet_proto_csum_replace2(&tcph->check, pkt->skb,
+ old.v16, new.v16, false);
+ break;
+ case 4:
+ new.v32 = src;
+ old.v32 = get_unaligned((u32 *)(opt + offset));
+
+ if (old.v32 == new.v32)
+ return;
+
+ put_unaligned(new.v32, (u32*)(opt + offset));
+ inet_proto_csum_replace4(&tcph->check, pkt->skb,
+ old.v32, new.v32, false);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ break;
+ }
+
+ return;
+ }
+}
+
static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = {
[NFTA_EXTHDR_DREG] = { .type = NLA_U32 },
[NFTA_EXTHDR_TYPE] = { .type = NLA_U8 },
@@ -171,12 +264,57 @@ static int nft_exthdr_init(const struct nft_ctx *ctx,
NFT_DATA_VALUE, priv->len);
}
-static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr)
+static int nft_exthdr_tcp_set_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
{
- const struct nft_exthdr *priv = nft_expr_priv(expr);
+ struct nft_exthdr *priv = nft_expr_priv(expr);
+ u32 offset, len, flags = 0, op = NFT_EXTHDR_OP_IPV6;
+ int err;
- if (nft_dump_register(skb, NFTA_EXTHDR_DREG, priv->dreg))
- goto nla_put_failure;
+ if (!tb[NFTA_EXTHDR_SREG] ||
+ !tb[NFTA_EXTHDR_TYPE] ||
+ !tb[NFTA_EXTHDR_OFFSET] ||
+ !tb[NFTA_EXTHDR_LEN])
+ return -EINVAL;
+
+ if (tb[NFTA_EXTHDR_DREG] || tb[NFTA_EXTHDR_FLAGS])
+ return -EINVAL;
+
+ err = nft_parse_u32_check(tb[NFTA_EXTHDR_OFFSET], U8_MAX, &offset);
+ if (err < 0)
+ return err;
+
+ err = nft_parse_u32_check(tb[NFTA_EXTHDR_LEN], U8_MAX, &len);
+ if (err < 0)
+ return err;
+
+ if (offset < 2)
+ return -EOPNOTSUPP;
+
+ switch (len) {
+ case 2: break;
+ case 4: break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ err = nft_parse_u32_check(tb[NFTA_EXTHDR_OP], U8_MAX, &op);
+ if (err < 0)
+ return err;
+
+ priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]);
+ priv->offset = offset;
+ priv->len = len;
+ priv->sreg = nft_parse_register(tb[NFTA_EXTHDR_SREG]);
+ priv->flags = flags;
+ priv->op = op;
+
+ return nft_validate_register_load(priv->sreg, priv->len);
+}
+
+static int nft_exthdr_dump_common(struct sk_buff *skb, const struct nft_exthdr *priv)
+{
if (nla_put_u8(skb, NFTA_EXTHDR_TYPE, priv->type))
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_EXTHDR_OFFSET, htonl(priv->offset)))
@@ -193,6 +331,26 @@ nla_put_failure:
return -1;
}
+static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_exthdr *priv = nft_expr_priv(expr);
+
+ if (nft_dump_register(skb, NFTA_EXTHDR_DREG, priv->dreg))
+ return -1;
+
+ return nft_exthdr_dump_common(skb, priv);
+}
+
+static int nft_exthdr_dump_set(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_exthdr *priv = nft_expr_priv(expr);
+
+ if (nft_dump_register(skb, NFTA_EXTHDR_SREG, priv->sreg))
+ return -1;
+
+ return nft_exthdr_dump_common(skb, priv);
+}
+
static struct nft_expr_type nft_exthdr_type;
static const struct nft_expr_ops nft_exthdr_ipv6_ops = {
.type = &nft_exthdr_type,
@@ -210,6 +368,14 @@ static const struct nft_expr_ops nft_exthdr_tcp_ops = {
.dump = nft_exthdr_dump,
};
+static const struct nft_expr_ops nft_exthdr_tcp_set_ops = {
+ .type = &nft_exthdr_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
+ .eval = nft_exthdr_tcp_set_eval,
+ .init = nft_exthdr_tcp_set_init,
+ .dump = nft_exthdr_dump_set,
+};
+
static const struct nft_expr_ops *
nft_exthdr_select_ops(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
@@ -219,12 +385,21 @@ nft_exthdr_select_ops(const struct nft_ctx *ctx,
if (!tb[NFTA_EXTHDR_OP])
return &nft_exthdr_ipv6_ops;
- op = ntohl(nla_get_u32(tb[NFTA_EXTHDR_OP]));
+ if (tb[NFTA_EXTHDR_SREG] && tb[NFTA_EXTHDR_DREG])
+ return ERR_PTR(-EOPNOTSUPP);
+
+ op = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OP]));
switch (op) {
case NFT_EXTHDR_OP_TCPOPT:
- return &nft_exthdr_tcp_ops;
+ if (tb[NFTA_EXTHDR_SREG])
+ return &nft_exthdr_tcp_set_ops;
+ if (tb[NFTA_EXTHDR_DREG])
+ return &nft_exthdr_tcp_ops;
+ break;
case NFT_EXTHDR_OP_IPV6:
- return &nft_exthdr_ipv6_ops;
+ if (tb[NFTA_EXTHDR_DREG])
+ return &nft_exthdr_ipv6_ops;
+ break;
}
return ERR_PTR(-EOPNOTSUPP);
diff --git a/net/netfilter/nft_fib_netdev.c b/net/netfilter/nft_fib_netdev.c
new file mode 100644
index 000000000000..3997ee36cfbd
--- /dev/null
+++ b/net/netfilter/nft_fib_netdev.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2017 Pablo M. Bermudo Garay <pablombg@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This code is based on net/netfilter/nft_fib_inet.c, written by
+ * Florian Westphal <fw@strlen.de>.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+#include <net/netfilter/nft_fib.h>
+
+static void nft_fib_netdev_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_fib *priv = nft_expr_priv(expr);
+
+ switch (ntohs(pkt->skb->protocol)) {
+ case ETH_P_IP:
+ switch (priv->result) {
+ case NFT_FIB_RESULT_OIF:
+ case NFT_FIB_RESULT_OIFNAME:
+ return nft_fib4_eval(expr, regs, pkt);
+ case NFT_FIB_RESULT_ADDRTYPE:
+ return nft_fib4_eval_type(expr, regs, pkt);
+ }
+ break;
+ case ETH_P_IPV6:
+ switch (priv->result) {
+ case NFT_FIB_RESULT_OIF:
+ case NFT_FIB_RESULT_OIFNAME:
+ return nft_fib6_eval(expr, regs, pkt);
+ case NFT_FIB_RESULT_ADDRTYPE:
+ return nft_fib6_eval_type(expr, regs, pkt);
+ }
+ break;
+ }
+
+ regs->verdict.code = NFT_BREAK;
+}
+
+static struct nft_expr_type nft_fib_netdev_type;
+static const struct nft_expr_ops nft_fib_netdev_ops = {
+ .type = &nft_fib_netdev_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)),
+ .eval = nft_fib_netdev_eval,
+ .init = nft_fib_init,
+ .dump = nft_fib_dump,
+ .validate = nft_fib_validate,
+};
+
+static struct nft_expr_type nft_fib_netdev_type __read_mostly = {
+ .family = NFPROTO_NETDEV,
+ .name = "fib",
+ .ops = &nft_fib_netdev_ops,
+ .policy = nft_fib_policy,
+ .maxattr = NFTA_FIB_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_fib_netdev_module_init(void)
+{
+ return nft_register_expr(&nft_fib_netdev_type);
+}
+
+static void __exit nft_fib_netdev_module_exit(void)
+{
+ nft_unregister_expr(&nft_fib_netdev_type);
+}
+
+module_init(nft_fib_netdev_module_init);
+module_exit(nft_fib_netdev_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo M. Bermudo Garay <pablombg@gmail.com>");
+MODULE_ALIAS_NFT_AF_EXPR(5, "fib");
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index 14538b1d4d11..a9fc298ef4c3 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -168,9 +168,9 @@ static const struct nft_expr_ops nft_limit_pkts_ops = {
.dump = nft_limit_pkts_dump,
};
-static void nft_limit_pkt_bytes_eval(const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_pktinfo *pkt)
+static void nft_limit_bytes_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
{
struct nft_limit *priv = nft_expr_priv(expr);
u64 cost = div64_u64(priv->nsecs * pkt->skb->len, priv->rate);
@@ -179,29 +179,29 @@ static void nft_limit_pkt_bytes_eval(const struct nft_expr *expr,
regs->verdict.code = NFT_BREAK;
}
-static int nft_limit_pkt_bytes_init(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nlattr * const tb[])
+static int nft_limit_bytes_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
{
struct nft_limit *priv = nft_expr_priv(expr);
return nft_limit_init(priv, tb);
}
-static int nft_limit_pkt_bytes_dump(struct sk_buff *skb,
- const struct nft_expr *expr)
+static int nft_limit_bytes_dump(struct sk_buff *skb,
+ const struct nft_expr *expr)
{
const struct nft_limit *priv = nft_expr_priv(expr);
return nft_limit_dump(skb, priv, NFT_LIMIT_PKT_BYTES);
}
-static const struct nft_expr_ops nft_limit_pkt_bytes_ops = {
+static const struct nft_expr_ops nft_limit_bytes_ops = {
.type = &nft_limit_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_limit)),
- .eval = nft_limit_pkt_bytes_eval,
- .init = nft_limit_pkt_bytes_init,
- .dump = nft_limit_pkt_bytes_dump,
+ .eval = nft_limit_bytes_eval,
+ .init = nft_limit_bytes_init,
+ .dump = nft_limit_bytes_dump,
};
static const struct nft_expr_ops *
@@ -215,7 +215,7 @@ nft_limit_select_ops(const struct nft_ctx *ctx,
case NFT_LIMIT_PKTS:
return &nft_limit_pkts_ops;
case NFT_LIMIT_PKT_BYTES:
- return &nft_limit_pkt_bytes_ops;
+ return &nft_limit_bytes_ops;
}
return ERR_PTR(-EOPNOTSUPP);
}
@@ -229,14 +229,133 @@ static struct nft_expr_type nft_limit_type __read_mostly = {
.owner = THIS_MODULE,
};
+static void nft_limit_obj_pkts_eval(struct nft_object *obj,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_limit_pkts *priv = nft_obj_data(obj);
+
+ if (nft_limit_eval(&priv->limit, priv->cost))
+ regs->verdict.code = NFT_BREAK;
+}
+
+static int nft_limit_obj_pkts_init(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[],
+ struct nft_object *obj)
+{
+ struct nft_limit_pkts *priv = nft_obj_data(obj);
+ int err;
+
+ err = nft_limit_init(&priv->limit, tb);
+ if (err < 0)
+ return err;
+
+ priv->cost = div64_u64(priv->limit.nsecs, priv->limit.rate);
+ return 0;
+}
+
+static int nft_limit_obj_pkts_dump(struct sk_buff *skb,
+ struct nft_object *obj,
+ bool reset)
+{
+ const struct nft_limit_pkts *priv = nft_obj_data(obj);
+
+ return nft_limit_dump(skb, &priv->limit, NFT_LIMIT_PKTS);
+}
+
+static struct nft_object_type nft_limit_obj_type;
+static const struct nft_object_ops nft_limit_obj_pkts_ops = {
+ .type = &nft_limit_obj_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_limit_pkts)),
+ .init = nft_limit_obj_pkts_init,
+ .eval = nft_limit_obj_pkts_eval,
+ .dump = nft_limit_obj_pkts_dump,
+};
+
+static void nft_limit_obj_bytes_eval(struct nft_object *obj,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_limit *priv = nft_obj_data(obj);
+ u64 cost = div64_u64(priv->nsecs * pkt->skb->len, priv->rate);
+
+ if (nft_limit_eval(priv, cost))
+ regs->verdict.code = NFT_BREAK;
+}
+
+static int nft_limit_obj_bytes_init(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[],
+ struct nft_object *obj)
+{
+ struct nft_limit *priv = nft_obj_data(obj);
+
+ return nft_limit_init(priv, tb);
+}
+
+static int nft_limit_obj_bytes_dump(struct sk_buff *skb,
+ struct nft_object *obj,
+ bool reset)
+{
+ const struct nft_limit *priv = nft_obj_data(obj);
+
+ return nft_limit_dump(skb, priv, NFT_LIMIT_PKT_BYTES);
+}
+
+static struct nft_object_type nft_limit_obj_type;
+static const struct nft_object_ops nft_limit_obj_bytes_ops = {
+ .type = &nft_limit_obj_type,
+ .size = sizeof(struct nft_limit),
+ .init = nft_limit_obj_bytes_init,
+ .eval = nft_limit_obj_bytes_eval,
+ .dump = nft_limit_obj_bytes_dump,
+};
+
+static const struct nft_object_ops *
+nft_limit_obj_select_ops(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[])
+{
+ if (!tb[NFTA_LIMIT_TYPE])
+ return &nft_limit_obj_pkts_ops;
+
+ switch (ntohl(nla_get_be32(tb[NFTA_LIMIT_TYPE]))) {
+ case NFT_LIMIT_PKTS:
+ return &nft_limit_obj_pkts_ops;
+ case NFT_LIMIT_PKT_BYTES:
+ return &nft_limit_obj_bytes_ops;
+ }
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static struct nft_object_type nft_limit_obj_type __read_mostly = {
+ .select_ops = nft_limit_obj_select_ops,
+ .type = NFT_OBJECT_LIMIT,
+ .maxattr = NFTA_LIMIT_MAX,
+ .policy = nft_limit_policy,
+ .owner = THIS_MODULE,
+};
+
static int __init nft_limit_module_init(void)
{
- return nft_register_expr(&nft_limit_type);
+ int err;
+
+ err = nft_register_obj(&nft_limit_obj_type);
+ if (err < 0)
+ return err;
+
+ err = nft_register_expr(&nft_limit_type);
+ if (err < 0)
+ goto err1;
+
+ return 0;
+err1:
+ nft_unregister_obj(&nft_limit_obj_type);
+ return err;
}
static void __exit nft_limit_module_exit(void)
{
nft_unregister_expr(&nft_limit_type);
+ nft_unregister_obj(&nft_limit_obj_type);
}
module_init(nft_limit_module_init);
@@ -245,3 +364,4 @@ module_exit(nft_limit_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
MODULE_ALIAS_NFT_EXPR("limit");
+MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_LIMIT);
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index 1dd428fbaaa3..7bcdc48f3d73 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -22,7 +22,7 @@ static void nft_objref_eval(const struct nft_expr *expr,
{
struct nft_object *obj = nft_objref_priv(expr);
- obj->type->eval(obj, regs, pkt);
+ obj->ops->eval(obj, regs, pkt);
}
static int nft_objref_init(const struct nft_ctx *ctx,
@@ -54,7 +54,8 @@ static int nft_objref_dump(struct sk_buff *skb, const struct nft_expr *expr)
const struct nft_object *obj = nft_objref_priv(expr);
if (nla_put_string(skb, NFTA_OBJREF_IMM_NAME, obj->name) ||
- nla_put_be32(skb, NFTA_OBJREF_IMM_TYPE, htonl(obj->type->type)))
+ nla_put_be32(skb, NFTA_OBJREF_IMM_TYPE,
+ htonl(obj->ops->type->type)))
goto nla_put_failure;
return 0;
@@ -104,7 +105,7 @@ static void nft_objref_map_eval(const struct nft_expr *expr,
return;
}
obj = *nft_set_ext_obj(ext);
- obj->type->eval(obj, regs, pkt);
+ obj->ops->eval(obj, regs, pkt);
}
static int nft_objref_map_init(const struct nft_ctx *ctx,
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 7d699bbd45b0..e110b0ebbf58 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -184,7 +184,7 @@ static bool nft_payload_udp_checksum(struct sk_buff *skb, unsigned int thoff)
if (!uh)
return false;
- return uh->check;
+ return (__force bool)uh->check;
}
static int nft_payload_l4csum_offset(const struct nft_pktinfo *pkt,
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
index 25e33159be57..0ed124a93fcf 100644
--- a/net/netfilter/nft_quota.c
+++ b/net/netfilter/nft_quota.c
@@ -151,14 +151,20 @@ static int nft_quota_obj_dump(struct sk_buff *skb, struct nft_object *obj,
return nft_quota_do_dump(skb, priv, reset);
}
-static struct nft_object_type nft_quota_obj __read_mostly = {
- .type = NFT_OBJECT_QUOTA,
+static struct nft_object_type nft_quota_obj_type;
+static const struct nft_object_ops nft_quota_obj_ops = {
+ .type = &nft_quota_obj_type,
.size = sizeof(struct nft_quota),
- .maxattr = NFTA_QUOTA_MAX,
- .policy = nft_quota_policy,
.init = nft_quota_obj_init,
.eval = nft_quota_obj_eval,
.dump = nft_quota_obj_dump,
+};
+
+static struct nft_object_type nft_quota_obj_type __read_mostly = {
+ .type = NFT_OBJECT_QUOTA,
+ .ops = &nft_quota_obj_ops,
+ .maxattr = NFTA_QUOTA_MAX,
+ .policy = nft_quota_policy,
.owner = THIS_MODULE,
};
@@ -209,7 +215,7 @@ static int __init nft_quota_module_init(void)
{
int err;
- err = nft_register_obj(&nft_quota_obj);
+ err = nft_register_obj(&nft_quota_obj_type);
if (err < 0)
return err;
@@ -219,14 +225,14 @@ static int __init nft_quota_module_init(void)
return 0;
err1:
- nft_unregister_obj(&nft_quota_obj);
+ nft_unregister_obj(&nft_quota_obj_type);
return err;
}
static void __exit nft_quota_module_exit(void)
{
nft_unregister_expr(&nft_quota_type);
- nft_unregister_obj(&nft_quota_obj);
+ nft_unregister_obj(&nft_quota_obj_type);
}
module_init(nft_quota_module_init);
diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c
index c7383d8f88d0..a6b7d05aeacf 100644
--- a/net/netfilter/nft_rt.c
+++ b/net/netfilter/nft_rt.c
@@ -23,6 +23,43 @@ struct nft_rt {
enum nft_registers dreg:8;
};
+static u16 get_tcpmss(const struct nft_pktinfo *pkt, const struct dst_entry *skbdst)
+{
+ u32 minlen = sizeof(struct ipv6hdr), mtu = dst_mtu(skbdst);
+ const struct sk_buff *skb = pkt->skb;
+ const struct nf_afinfo *ai;
+ struct flowi fl;
+
+ memset(&fl, 0, sizeof(fl));
+
+ switch (nft_pf(pkt)) {
+ case NFPROTO_IPV4:
+ fl.u.ip4.daddr = ip_hdr(skb)->saddr;
+ minlen = sizeof(struct iphdr) + sizeof(struct tcphdr);
+ break;
+ case NFPROTO_IPV6:
+ fl.u.ip6.daddr = ipv6_hdr(skb)->saddr;
+ minlen = sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
+ break;
+ }
+
+ ai = nf_get_afinfo(nft_pf(pkt));
+ if (ai) {
+ struct dst_entry *dst = NULL;
+
+ ai->route(nft_net(pkt), &dst, &fl, false);
+ if (dst) {
+ mtu = min(mtu, dst_mtu(dst));
+ dst_release(dst);
+ }
+ }
+
+ if (mtu <= minlen || mtu > 0xffff)
+ return TCP_MSS_DEFAULT;
+
+ return mtu - minlen;
+}
+
static void nft_rt_get_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -46,8 +83,8 @@ static void nft_rt_get_eval(const struct nft_expr *expr,
if (nft_pf(pkt) != NFPROTO_IPV4)
goto err;
- *dest = rt_nexthop((const struct rtable *)dst,
- ip_hdr(skb)->daddr);
+ *dest = (__force u32)rt_nexthop((const struct rtable *)dst,
+ ip_hdr(skb)->daddr);
break;
case NFT_RT_NEXTHOP6:
if (nft_pf(pkt) != NFPROTO_IPV6)
@@ -57,6 +94,9 @@ static void nft_rt_get_eval(const struct nft_expr *expr,
&ipv6_hdr(skb)->daddr),
sizeof(struct in6_addr));
break;
+ case NFT_RT_TCPMSS:
+ nft_reg_store16(dest, get_tcpmss(pkt, dst));
+ break;
default:
WARN_ON(1);
goto err;
@@ -67,7 +107,7 @@ err:
regs->verdict.code = NFT_BREAK;
}
-const struct nla_policy nft_rt_policy[NFTA_RT_MAX + 1] = {
+static const struct nla_policy nft_rt_policy[NFTA_RT_MAX + 1] = {
[NFTA_RT_DREG] = { .type = NLA_U32 },
[NFTA_RT_KEY] = { .type = NLA_U32 },
};
@@ -94,6 +134,9 @@ static int nft_rt_get_init(const struct nft_ctx *ctx,
case NFT_RT_NEXTHOP6:
len = sizeof(struct in6_addr);
break;
+ case NFT_RT_TCPMSS:
+ len = sizeof(u16);
+ break;
default:
return -EOPNOTSUPP;
}
@@ -118,6 +161,29 @@ nla_put_failure:
return -1;
}
+static int nft_rt_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
+ const struct nft_data **data)
+{
+ const struct nft_rt *priv = nft_expr_priv(expr);
+ unsigned int hooks;
+
+ switch (priv->key) {
+ case NFT_RT_NEXTHOP4:
+ case NFT_RT_NEXTHOP6:
+ case NFT_RT_CLASSID:
+ return 0;
+ case NFT_RT_TCPMSS:
+ hooks = (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_POST_ROUTING);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return nft_chain_validate_hooks(ctx->chain, hooks);
+}
+
static struct nft_expr_type nft_rt_type;
static const struct nft_expr_ops nft_rt_get_ops = {
.type = &nft_rt_type,
@@ -125,6 +191,7 @@ static const struct nft_expr_ops nft_rt_get_ops = {
.eval = nft_rt_get_eval,
.init = nft_rt_get_init,
.dump = nft_rt_get_dump,
+ .validate = nft_rt_validate,
};
static struct nft_expr_type nft_rt_type __read_mostly = {
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index bce5382f1d49..d83a4ec5900d 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -19,8 +19,9 @@
#include <net/netfilter/nf_tables.h>
struct nft_rbtree {
- rwlock_t lock;
struct rb_root root;
+ rwlock_t lock;
+ seqcount_t count;
};
struct nft_rbtree_elem {
@@ -40,8 +41,9 @@ static bool nft_rbtree_equal(const struct nft_set *set, const void *this,
return memcmp(this, nft_set_ext_key(&interval->ext), set->klen) == 0;
}
-static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
- const u32 *key, const struct nft_set_ext **ext)
+static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key, const struct nft_set_ext **ext,
+ unsigned int seq)
{
struct nft_rbtree *priv = nft_set_priv(set);
const struct nft_rbtree_elem *rbe, *interval = NULL;
@@ -50,15 +52,17 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
const void *this;
int d;
- read_lock_bh(&priv->lock);
- parent = priv->root.rb_node;
+ parent = rcu_dereference_raw(priv->root.rb_node);
while (parent != NULL) {
+ if (read_seqcount_retry(&priv->count, seq))
+ return false;
+
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
this = nft_set_ext_key(&rbe->ext);
d = memcmp(this, key, set->klen);
if (d < 0) {
- parent = parent->rb_left;
+ parent = rcu_dereference_raw(parent->rb_left);
if (interval &&
nft_rbtree_equal(set, this, interval) &&
nft_rbtree_interval_end(this) &&
@@ -66,15 +70,14 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
continue;
interval = rbe;
} else if (d > 0)
- parent = parent->rb_right;
+ parent = rcu_dereference_raw(parent->rb_right);
else {
if (!nft_set_elem_active(&rbe->ext, genmask)) {
- parent = parent->rb_left;
+ parent = rcu_dereference_raw(parent->rb_left);
continue;
}
if (nft_rbtree_interval_end(rbe))
goto out;
- read_unlock_bh(&priv->lock);
*ext = &rbe->ext;
return true;
@@ -84,15 +87,32 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
nft_set_elem_active(&interval->ext, genmask) &&
!nft_rbtree_interval_end(interval)) {
- read_unlock_bh(&priv->lock);
*ext = &interval->ext;
return true;
}
out:
- read_unlock_bh(&priv->lock);
return false;
}
+static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key, const struct nft_set_ext **ext)
+{
+ struct nft_rbtree *priv = nft_set_priv(set);
+ unsigned int seq = read_seqcount_begin(&priv->count);
+ bool ret;
+
+ ret = __nft_rbtree_lookup(net, set, key, ext, seq);
+ if (ret || !read_seqcount_retry(&priv->count, seq))
+ return ret;
+
+ read_lock_bh(&priv->lock);
+ seq = read_seqcount_begin(&priv->count);
+ ret = __nft_rbtree_lookup(net, set, key, ext, seq);
+ read_unlock_bh(&priv->lock);
+
+ return ret;
+}
+
static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
struct nft_rbtree_elem *new,
struct nft_set_ext **ext)
@@ -130,7 +150,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
}
}
}
- rb_link_node(&new->node, parent, p);
+ rb_link_node_rcu(&new->node, parent, p);
rb_insert_color(&new->node, &priv->root);
return 0;
}
@@ -144,7 +164,9 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
int err;
write_lock_bh(&priv->lock);
+ write_seqcount_begin(&priv->count);
err = __nft_rbtree_insert(net, set, rbe, ext);
+ write_seqcount_end(&priv->count);
write_unlock_bh(&priv->lock);
return err;
@@ -158,7 +180,9 @@ static void nft_rbtree_remove(const struct net *net,
struct nft_rbtree_elem *rbe = elem->priv;
write_lock_bh(&priv->lock);
+ write_seqcount_begin(&priv->count);
rb_erase(&rbe->node, &priv->root);
+ write_seqcount_end(&priv->count);
write_unlock_bh(&priv->lock);
}
@@ -264,6 +288,7 @@ static int nft_rbtree_init(const struct nft_set *set,
struct nft_rbtree *priv = nft_set_priv(set);
rwlock_init(&priv->lock);
+ seqcount_init(&priv->count);
priv->root = RB_ROOT;
return 0;
}
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index e1648238a9c9..c83a3b5e1c6c 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1192,16 +1192,10 @@ xt_replace_table(struct xt_table *table,
#ifdef CONFIG_AUDIT
if (audit_enabled) {
- struct audit_buffer *ab;
-
- ab = audit_log_start(current->audit_context, GFP_KERNEL,
- AUDIT_NETFILTER_CFG);
- if (ab) {
- audit_log_format(ab, "table=%s family=%u entries=%u",
- table->name, table->af,
- private->number);
- audit_log_end(ab);
- }
+ audit_log(current->audit_context, GFP_KERNEL,
+ AUDIT_NETFILTER_CFG,
+ "table=%s family=%u entries=%u",
+ table->name, table->af, private->number);
}
#endif
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 623ef37de886..5a152e2acfd5 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -121,9 +121,9 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par,
{
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
typeof(nf_ct_timeout_find_get_hook) timeout_find_get;
+ const struct nf_conntrack_l4proto *l4proto;
struct ctnl_timeout *timeout;
struct nf_conn_timeout *timeout_ext;
- struct nf_conntrack_l4proto *l4proto;
int ret = 0;
u8 proto;
diff --git a/net/netfilter/xt_NETMAP.c b/net/netfilter/xt_NETMAP.c
index e45a01255e70..58aa9dd3c5b7 100644
--- a/net/netfilter/xt_NETMAP.c
+++ b/net/netfilter/xt_NETMAP.c
@@ -77,10 +77,10 @@ netmap_tg4(struct sk_buff *skb, const struct xt_action_param *par)
const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
struct nf_nat_range newrange;
- NF_CT_ASSERT(xt_hooknum(par) == NF_INET_PRE_ROUTING ||
- xt_hooknum(par) == NF_INET_POST_ROUTING ||
- xt_hooknum(par) == NF_INET_LOCAL_OUT ||
- xt_hooknum(par) == NF_INET_LOCAL_IN);
+ WARN_ON(xt_hooknum(par) != NF_INET_PRE_ROUTING &&
+ xt_hooknum(par) != NF_INET_POST_ROUTING &&
+ xt_hooknum(par) != NF_INET_LOCAL_OUT &&
+ xt_hooknum(par) != NF_INET_LOCAL_IN);
ct = nf_ct_get(skb, &ctinfo);
netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip);
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index c64aca611ac5..9dae4d665965 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -62,11 +62,9 @@ static u_int32_t tcpmss_reverse_mtu(struct net *net,
memset(fl6, 0, sizeof(*fl6));
fl6->daddr = ipv6_hdr(skb)->saddr;
}
- rcu_read_lock();
ai = nf_get_afinfo(family);
if (ai != NULL)
ai->route(net, (struct dst_entry **)&rt, &fl, false);
- rcu_read_unlock();
if (rt != NULL) {
mtu = dst_mtu(&rt->dst);
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index d767e35fff6b..17d7705e3bd4 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -70,13 +70,11 @@ tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
return user_laddr;
laddr = 0;
- rcu_read_lock();
indev = __in_dev_get_rcu(skb->dev);
for_primary_ifa(indev) {
laddr = ifa->ifa_local;
break;
} endfor_ifa(indev);
- rcu_read_unlock();
return laddr ? laddr : daddr;
}
@@ -125,7 +123,7 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
__tcp_hdrlen(tcph),
saddr, sport,
daddr, dport,
- in->ifindex);
+ in->ifindex, 0);
if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
sk = NULL;
@@ -195,7 +193,7 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
thoff + __tcp_hdrlen(tcph),
saddr, sport,
daddr, ntohs(dport),
- in->ifindex);
+ in->ifindex, 0);
if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
sk = NULL;
@@ -208,7 +206,7 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
case NFT_LOOKUP_ESTABLISHED:
sk = __inet6_lookup_established(net, &tcp_hashinfo,
saddr, sport, daddr, ntohs(dport),
- in->ifindex);
+ in->ifindex, 0);
break;
default:
BUG();
@@ -391,7 +389,6 @@ tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr,
return user_laddr;
laddr = NULL;
- rcu_read_lock();
indev = __in6_dev_get(skb->dev);
if (indev) {
read_lock_bh(&indev->lock);
@@ -404,7 +401,6 @@ tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr,
}
read_unlock_bh(&indev->lock);
}
- rcu_read_unlock();
return laddr ? laddr : daddr;
}
diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c
index e329dabde35f..3b2be2ae6987 100644
--- a/net/netfilter/xt_addrtype.c
+++ b/net/netfilter/xt_addrtype.c
@@ -47,8 +47,6 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
if (dev)
flow.flowi6_oif = dev->ifindex;
- rcu_read_lock();
-
afinfo = nf_get_afinfo(NFPROTO_IPV6);
if (afinfo != NULL) {
const struct nf_ipv6_ops *v6ops;
@@ -63,7 +61,6 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
} else {
route_err = 1;
}
- rcu_read_unlock();
if (route_err)
return XT_ADDRTYPE_UNREACHABLE;
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index b8fd4ab762ed..ffa8eec980e9 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -58,8 +58,7 @@ struct xt_connlimit_rb {
static spinlock_t xt_connlimit_locks[CONNLIMIT_LOCK_SLOTS] __cacheline_aligned_in_smp;
struct xt_connlimit_data {
- struct rb_root climit_root4[CONNLIMIT_SLOTS];
- struct rb_root climit_root6[CONNLIMIT_SLOTS];
+ struct rb_root climit_root[CONNLIMIT_SLOTS];
};
static u_int32_t connlimit_rnd __read_mostly;
@@ -144,7 +143,6 @@ static unsigned int check_hlist(struct net *net,
unsigned int length = 0;
*addit = true;
- rcu_read_lock();
/* check the saved connections */
hlist_for_each_entry_safe(conn, n, head, node) {
@@ -179,8 +177,6 @@ static unsigned int check_hlist(struct net *net,
length++;
}
- rcu_read_unlock();
-
return length;
}
@@ -297,13 +293,11 @@ static int count_them(struct net *net,
int count;
u32 hash;
- if (family == NFPROTO_IPV6) {
+ if (family == NFPROTO_IPV6)
hash = connlimit_iphash6(addr, mask);
- root = &data->climit_root6[hash];
- } else {
+ else
hash = connlimit_iphash(addr->ip & mask->ip);
- root = &data->climit_root4[hash];
- }
+ root = &data->climit_root[hash];
spin_lock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]);
@@ -382,10 +376,8 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par)
return -ENOMEM;
}
- for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i)
- info->data->climit_root4[i] = RB_ROOT;
- for (i = 0; i < ARRAY_SIZE(info->data->climit_root6); ++i)
- info->data->climit_root6[i] = RB_ROOT;
+ for (i = 0; i < ARRAY_SIZE(info->data->climit_root); ++i)
+ info->data->climit_root[i] = RB_ROOT;
return 0;
}
@@ -416,10 +408,8 @@ static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
nf_ct_netns_put(par->net, par->family);
- for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i)
- destroy_tree(&info->data->climit_root4[i]);
- for (i = 0; i < ARRAY_SIZE(info->data->climit_root6); ++i)
- destroy_tree(&info->data->climit_root6[i]);
+ for (i = 0; i < ARRAY_SIZE(info->data->climit_root); ++i)
+ destroy_tree(&info->data->climit_root[i]);
kfree(info->data);
}
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 762e1874f28b..10d48234f5f4 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -56,6 +56,7 @@ static inline struct hashlimit_net *hashlimit_pernet(struct net *net)
}
/* need to declare this at the top */
+static const struct file_operations dl_file_ops_v2;
static const struct file_operations dl_file_ops_v1;
static const struct file_operations dl_file_ops;
@@ -87,8 +88,19 @@ struct dsthash_ent {
unsigned long expires; /* precalculated expiry time */
struct {
unsigned long prev; /* last modification */
- u_int64_t credit;
- u_int64_t credit_cap, cost;
+ union {
+ struct {
+ u_int64_t credit;
+ u_int64_t credit_cap;
+ u_int64_t cost;
+ };
+ struct {
+ u_int32_t interval, prev_window;
+ u_int64_t current_rate;
+ u_int64_t rate;
+ int64_t burst;
+ };
+ };
} rateinfo;
struct rcu_head rcu;
};
@@ -99,7 +111,7 @@ struct xt_hashlimit_htable {
u_int8_t family;
bool rnd_initialized;
- struct hashlimit_cfg2 cfg; /* config */
+ struct hashlimit_cfg3 cfg; /* config */
/* used internally */
spinlock_t lock; /* lock for list_head */
@@ -116,10 +128,10 @@ struct xt_hashlimit_htable {
};
static int
-cfg_copy(struct hashlimit_cfg2 *to, void *from, int revision)
+cfg_copy(struct hashlimit_cfg3 *to, const void *from, int revision)
{
if (revision == 1) {
- struct hashlimit_cfg1 *cfg = from;
+ struct hashlimit_cfg1 *cfg = (struct hashlimit_cfg1 *)from;
to->mode = cfg->mode;
to->avg = cfg->avg;
@@ -131,7 +143,19 @@ cfg_copy(struct hashlimit_cfg2 *to, void *from, int revision)
to->srcmask = cfg->srcmask;
to->dstmask = cfg->dstmask;
} else if (revision == 2) {
- memcpy(to, from, sizeof(struct hashlimit_cfg2));
+ struct hashlimit_cfg2 *cfg = (struct hashlimit_cfg2 *)from;
+
+ to->mode = cfg->mode;
+ to->avg = cfg->avg;
+ to->burst = cfg->burst;
+ to->size = cfg->size;
+ to->max = cfg->max;
+ to->gc_interval = cfg->gc_interval;
+ to->expire = cfg->expire;
+ to->srcmask = cfg->srcmask;
+ to->dstmask = cfg->dstmask;
+ } else if (revision == 3) {
+ memcpy(to, from, sizeof(struct hashlimit_cfg3));
} else {
return -EINVAL;
}
@@ -240,13 +264,14 @@ dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent)
}
static void htable_gc(struct work_struct *work);
-static int htable_create(struct net *net, struct hashlimit_cfg2 *cfg,
+static int htable_create(struct net *net, struct hashlimit_cfg3 *cfg,
const char *name, u_int8_t family,
struct xt_hashlimit_htable **out_hinfo,
int revision)
{
struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
struct xt_hashlimit_htable *hinfo;
+ const struct file_operations *fops;
unsigned int size, i;
int ret;
@@ -268,7 +293,7 @@ static int htable_create(struct net *net, struct hashlimit_cfg2 *cfg,
*out_hinfo = hinfo;
/* copy match config into hashtable config */
- ret = cfg_copy(&hinfo->cfg, (void *)cfg, 2);
+ ret = cfg_copy(&hinfo->cfg, (void *)cfg, 3);
if (ret)
return ret;
@@ -293,11 +318,21 @@ static int htable_create(struct net *net, struct hashlimit_cfg2 *cfg,
}
spin_lock_init(&hinfo->lock);
+ switch (revision) {
+ case 1:
+ fops = &dl_file_ops_v1;
+ break;
+ case 2:
+ fops = &dl_file_ops_v2;
+ break;
+ default:
+ fops = &dl_file_ops;
+ }
+
hinfo->pde = proc_create_data(name, 0,
(family == NFPROTO_IPV4) ?
hashlimit_net->ipt_hashlimit : hashlimit_net->ip6t_hashlimit,
- (revision == 1) ? &dl_file_ops_v1 : &dl_file_ops,
- hinfo);
+ fops, hinfo);
if (hinfo->pde == NULL) {
kfree(hinfo->name);
vfree(hinfo);
@@ -482,6 +517,25 @@ static u32 user2credits_byte(u32 user)
return (u32) (us >> 32);
}
+static u64 user2rate(u64 user)
+{
+ if (user != 0) {
+ return div64_u64(XT_HASHLIMIT_SCALE_v2, user);
+ } else {
+ pr_warn("invalid rate from userspace: %llu\n", user);
+ return 0;
+ }
+}
+
+static u64 user2rate_bytes(u64 user)
+{
+ u64 r;
+
+ r = user ? 0xFFFFFFFFULL / user : 0xFFFFFFFFULL;
+ r = (r - 1) << 4;
+ return r;
+}
+
static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now,
u32 mode, int revision)
{
@@ -491,6 +545,21 @@ static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now,
if (delta == 0)
return;
+ if (revision >= 3 && mode & XT_HASHLIMIT_RATE_MATCH) {
+ u64 interval = dh->rateinfo.interval * HZ;
+
+ if (delta < interval)
+ return;
+
+ dh->rateinfo.prev = now;
+ dh->rateinfo.prev_window =
+ ((dh->rateinfo.current_rate * interval) >
+ (delta * dh->rateinfo.rate));
+ dh->rateinfo.current_rate = 0;
+
+ return;
+ }
+
dh->rateinfo.prev = now;
if (mode & XT_HASHLIMIT_BYTES) {
@@ -515,7 +584,23 @@ static void rateinfo_init(struct dsthash_ent *dh,
struct xt_hashlimit_htable *hinfo, int revision)
{
dh->rateinfo.prev = jiffies;
- if (hinfo->cfg.mode & XT_HASHLIMIT_BYTES) {
+ if (revision >= 3 && hinfo->cfg.mode & XT_HASHLIMIT_RATE_MATCH) {
+ dh->rateinfo.prev_window = 0;
+ dh->rateinfo.current_rate = 0;
+ if (hinfo->cfg.mode & XT_HASHLIMIT_BYTES) {
+ dh->rateinfo.rate = user2rate_bytes(hinfo->cfg.avg);
+ if (hinfo->cfg.burst)
+ dh->rateinfo.burst =
+ hinfo->cfg.burst * dh->rateinfo.rate;
+ else
+ dh->rateinfo.burst = dh->rateinfo.rate;
+ } else {
+ dh->rateinfo.rate = user2rate(hinfo->cfg.avg);
+ dh->rateinfo.burst =
+ hinfo->cfg.burst + dh->rateinfo.rate;
+ }
+ dh->rateinfo.interval = hinfo->cfg.interval;
+ } else if (hinfo->cfg.mode & XT_HASHLIMIT_BYTES) {
dh->rateinfo.credit = CREDITS_PER_JIFFY_BYTES * HZ;
dh->rateinfo.cost = user2credits_byte(hinfo->cfg.avg);
dh->rateinfo.credit_cap = hinfo->cfg.burst;
@@ -648,7 +733,7 @@ static u32 hashlimit_byte_cost(unsigned int len, struct dsthash_ent *dh)
static bool
hashlimit_mt_common(const struct sk_buff *skb, struct xt_action_param *par,
struct xt_hashlimit_htable *hinfo,
- const struct hashlimit_cfg2 *cfg, int revision)
+ const struct hashlimit_cfg3 *cfg, int revision)
{
unsigned long now = jiffies;
struct dsthash_ent *dh;
@@ -659,12 +744,12 @@ hashlimit_mt_common(const struct sk_buff *skb, struct xt_action_param *par,
if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0)
goto hotdrop;
- rcu_read_lock_bh();
+ local_bh_disable();
dh = dsthash_find(hinfo, &dst);
if (dh == NULL) {
dh = dsthash_alloc_init(hinfo, &dst, &race);
if (dh == NULL) {
- rcu_read_unlock_bh();
+ local_bh_enable();
goto hotdrop;
} else if (race) {
/* Already got an entry, update expiration timeout */
@@ -680,6 +765,20 @@ hashlimit_mt_common(const struct sk_buff *skb, struct xt_action_param *par,
rateinfo_recalc(dh, now, hinfo->cfg.mode, revision);
}
+ if (cfg->mode & XT_HASHLIMIT_RATE_MATCH) {
+ cost = (cfg->mode & XT_HASHLIMIT_BYTES) ? skb->len : 1;
+ dh->rateinfo.current_rate += cost;
+
+ if (!dh->rateinfo.prev_window &&
+ (dh->rateinfo.current_rate <= dh->rateinfo.burst)) {
+ spin_unlock(&dh->lock);
+ rcu_read_unlock_bh();
+ return !(cfg->mode & XT_HASHLIMIT_INVERT);
+ } else {
+ goto overlimit;
+ }
+ }
+
if (cfg->mode & XT_HASHLIMIT_BYTES)
cost = hashlimit_byte_cost(skb->len, dh);
else
@@ -689,12 +788,13 @@ hashlimit_mt_common(const struct sk_buff *skb, struct xt_action_param *par,
/* below the limit */
dh->rateinfo.credit -= cost;
spin_unlock(&dh->lock);
- rcu_read_unlock_bh();
+ local_bh_enable();
return !(cfg->mode & XT_HASHLIMIT_INVERT);
}
+overlimit:
spin_unlock(&dh->lock);
- rcu_read_unlock_bh();
+ local_bh_enable();
/* default match is underlimit - so over the limit, we need to invert */
return cfg->mode & XT_HASHLIMIT_INVERT;
@@ -708,7 +808,7 @@ hashlimit_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
struct xt_hashlimit_htable *hinfo = info->hinfo;
- struct hashlimit_cfg2 cfg = {};
+ struct hashlimit_cfg3 cfg = {};
int ret;
ret = cfg_copy(&cfg, (void *)&info->cfg, 1);
@@ -720,17 +820,33 @@ hashlimit_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
}
static bool
-hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
+hashlimit_mt_v2(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_hashlimit_mtinfo2 *info = par->matchinfo;
struct xt_hashlimit_htable *hinfo = info->hinfo;
+ struct hashlimit_cfg3 cfg = {};
+ int ret;
+
+ ret = cfg_copy(&cfg, (void *)&info->cfg, 2);
+
+ if (ret)
+ return ret;
+
+ return hashlimit_mt_common(skb, par, hinfo, &cfg, 2);
+}
+
+static bool
+hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ const struct xt_hashlimit_mtinfo3 *info = par->matchinfo;
+ struct xt_hashlimit_htable *hinfo = info->hinfo;
- return hashlimit_mt_common(skb, par, hinfo, &info->cfg, 2);
+ return hashlimit_mt_common(skb, par, hinfo, &info->cfg, 3);
}
static int hashlimit_mt_check_common(const struct xt_mtchk_param *par,
struct xt_hashlimit_htable **hinfo,
- struct hashlimit_cfg2 *cfg,
+ struct hashlimit_cfg3 *cfg,
const char *name, int revision)
{
struct net *net = par->net;
@@ -753,7 +869,17 @@ static int hashlimit_mt_check_common(const struct xt_mtchk_param *par,
}
/* Check for overflow. */
- if (cfg->mode & XT_HASHLIMIT_BYTES) {
+ if (revision >= 3 && cfg->mode & XT_HASHLIMIT_RATE_MATCH) {
+ if (cfg->avg == 0) {
+ pr_info("hashlimit invalid rate\n");
+ return -ERANGE;
+ }
+
+ if (cfg->interval == 0) {
+ pr_info("hashlimit invalid interval\n");
+ return -EINVAL;
+ }
+ } else if (cfg->mode & XT_HASHLIMIT_BYTES) {
if (user2credits_byte(cfg->avg) == 0) {
pr_info("overflow, rate too high: %llu\n", cfg->avg);
return -EINVAL;
@@ -784,7 +910,7 @@ static int hashlimit_mt_check_common(const struct xt_mtchk_param *par,
static int hashlimit_mt_check_v1(const struct xt_mtchk_param *par)
{
struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
- struct hashlimit_cfg2 cfg = {};
+ struct hashlimit_cfg3 cfg = {};
int ret;
if (info->name[sizeof(info->name) - 1] != '\0')
@@ -799,15 +925,40 @@ static int hashlimit_mt_check_v1(const struct xt_mtchk_param *par)
&cfg, info->name, 1);
}
-static int hashlimit_mt_check(const struct xt_mtchk_param *par)
+static int hashlimit_mt_check_v2(const struct xt_mtchk_param *par)
{
struct xt_hashlimit_mtinfo2 *info = par->matchinfo;
+ struct hashlimit_cfg3 cfg = {};
+ int ret;
+
+ if (info->name[sizeof(info->name) - 1] != '\0')
+ return -EINVAL;
+
+ ret = cfg_copy(&cfg, (void *)&info->cfg, 2);
+
+ if (ret)
+ return ret;
+
+ return hashlimit_mt_check_common(par, &info->hinfo,
+ &cfg, info->name, 2);
+}
+
+static int hashlimit_mt_check(const struct xt_mtchk_param *par)
+{
+ struct xt_hashlimit_mtinfo3 *info = par->matchinfo;
if (info->name[sizeof(info->name) - 1] != '\0')
return -EINVAL;
return hashlimit_mt_check_common(par, &info->hinfo, &info->cfg,
- info->name, 2);
+ info->name, 3);
+}
+
+static void hashlimit_mt_destroy_v2(const struct xt_mtdtor_param *par)
+{
+ const struct xt_hashlimit_mtinfo2 *info = par->matchinfo;
+
+ htable_put(info->hinfo);
}
static void hashlimit_mt_destroy_v1(const struct xt_mtdtor_param *par)
@@ -819,7 +970,7 @@ static void hashlimit_mt_destroy_v1(const struct xt_mtdtor_param *par)
static void hashlimit_mt_destroy(const struct xt_mtdtor_param *par)
{
- const struct xt_hashlimit_mtinfo2 *info = par->matchinfo;
+ const struct xt_hashlimit_mtinfo3 *info = par->matchinfo;
htable_put(info->hinfo);
}
@@ -840,9 +991,20 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = {
.name = "hashlimit",
.revision = 2,
.family = NFPROTO_IPV4,
- .match = hashlimit_mt,
+ .match = hashlimit_mt_v2,
.matchsize = sizeof(struct xt_hashlimit_mtinfo2),
.usersize = offsetof(struct xt_hashlimit_mtinfo2, hinfo),
+ .checkentry = hashlimit_mt_check_v2,
+ .destroy = hashlimit_mt_destroy_v2,
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "hashlimit",
+ .revision = 3,
+ .family = NFPROTO_IPV4,
+ .match = hashlimit_mt,
+ .matchsize = sizeof(struct xt_hashlimit_mtinfo3),
+ .usersize = offsetof(struct xt_hashlimit_mtinfo3, hinfo),
.checkentry = hashlimit_mt_check,
.destroy = hashlimit_mt_destroy,
.me = THIS_MODULE,
@@ -863,9 +1025,20 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = {
.name = "hashlimit",
.revision = 2,
.family = NFPROTO_IPV6,
- .match = hashlimit_mt,
+ .match = hashlimit_mt_v2,
.matchsize = sizeof(struct xt_hashlimit_mtinfo2),
.usersize = offsetof(struct xt_hashlimit_mtinfo2, hinfo),
+ .checkentry = hashlimit_mt_check_v2,
+ .destroy = hashlimit_mt_destroy_v2,
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "hashlimit",
+ .revision = 3,
+ .family = NFPROTO_IPV6,
+ .match = hashlimit_mt,
+ .matchsize = sizeof(struct xt_hashlimit_mtinfo3),
+ .usersize = offsetof(struct xt_hashlimit_mtinfo3, hinfo),
.checkentry = hashlimit_mt_check,
.destroy = hashlimit_mt_destroy,
.me = THIS_MODULE,
@@ -947,6 +1120,21 @@ static void dl_seq_print(struct dsthash_ent *ent, u_int8_t family,
}
}
+static int dl_seq_real_show_v2(struct dsthash_ent *ent, u_int8_t family,
+ struct seq_file *s)
+{
+ const struct xt_hashlimit_htable *ht = s->private;
+
+ spin_lock(&ent->lock);
+ /* recalculate to show accurate numbers */
+ rateinfo_recalc(ent, jiffies, ht->cfg.mode, 2);
+
+ dl_seq_print(ent, family, s);
+
+ spin_unlock(&ent->lock);
+ return seq_has_overflowed(s);
+}
+
static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family,
struct seq_file *s)
{
@@ -969,7 +1157,7 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
spin_lock(&ent->lock);
/* recalculate to show accurate numbers */
- rateinfo_recalc(ent, jiffies, ht->cfg.mode, 2);
+ rateinfo_recalc(ent, jiffies, ht->cfg.mode, 3);
dl_seq_print(ent, family, s);
@@ -977,6 +1165,20 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
return seq_has_overflowed(s);
}
+static int dl_seq_show_v2(struct seq_file *s, void *v)
+{
+ struct xt_hashlimit_htable *htable = s->private;
+ unsigned int *bucket = (unsigned int *)v;
+ struct dsthash_ent *ent;
+
+ if (!hlist_empty(&htable->hash[*bucket])) {
+ hlist_for_each_entry(ent, &htable->hash[*bucket], node)
+ if (dl_seq_real_show_v2(ent, htable->family, s))
+ return -1;
+ }
+ return 0;
+}
+
static int dl_seq_show_v1(struct seq_file *s, void *v)
{
struct xt_hashlimit_htable *htable = s->private;
@@ -1012,6 +1214,13 @@ static const struct seq_operations dl_seq_ops_v1 = {
.show = dl_seq_show_v1
};
+static const struct seq_operations dl_seq_ops_v2 = {
+ .start = dl_seq_start,
+ .next = dl_seq_next,
+ .stop = dl_seq_stop,
+ .show = dl_seq_show_v2
+};
+
static const struct seq_operations dl_seq_ops = {
.start = dl_seq_start,
.next = dl_seq_next,
@@ -1019,6 +1228,18 @@ static const struct seq_operations dl_seq_ops = {
.show = dl_seq_show
};
+static int dl_proc_open_v2(struct inode *inode, struct file *file)
+{
+ int ret = seq_open(file, &dl_seq_ops_v2);
+
+ if (!ret) {
+ struct seq_file *sf = file->private_data;
+
+ sf->private = PDE_DATA(inode);
+ }
+ return ret;
+}
+
static int dl_proc_open_v1(struct inode *inode, struct file *file)
{
int ret = seq_open(file, &dl_seq_ops_v1);
@@ -1042,6 +1263,14 @@ static int dl_proc_open(struct inode *inode, struct file *file)
return ret;
}
+static const struct file_operations dl_file_ops_v2 = {
+ .owner = THIS_MODULE,
+ .open = dl_proc_open_v2,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release
+};
+
static const struct file_operations dl_file_ops_v1 = {
.owner = THIS_MODULE,
.open = dl_proc_open_v1,
diff --git a/net/netfilter/xt_nat.c b/net/netfilter/xt_nat.c
index 8107b3eb865f..0fd14d1eb09d 100644
--- a/net/netfilter/xt_nat.c
+++ b/net/netfilter/xt_nat.c
@@ -58,9 +58,9 @@ xt_snat_target_v0(struct sk_buff *skb, const struct xt_action_param *par)
struct nf_conn *ct;
ct = nf_ct_get(skb, &ctinfo);
- NF_CT_ASSERT(ct != NULL &&
- (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
- ctinfo == IP_CT_RELATED_REPLY));
+ WARN_ON(!(ct != NULL &&
+ (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+ ctinfo == IP_CT_RELATED_REPLY)));
xt_nat_convert_range(&range, &mr->range[0]);
return nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
@@ -75,8 +75,8 @@ xt_dnat_target_v0(struct sk_buff *skb, const struct xt_action_param *par)
struct nf_conn *ct;
ct = nf_ct_get(skb, &ctinfo);
- NF_CT_ASSERT(ct != NULL &&
- (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
+ WARN_ON(!(ct != NULL &&
+ (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)));
xt_nat_convert_range(&range, &mr->range[0]);
return nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
@@ -90,9 +90,9 @@ xt_snat_target_v1(struct sk_buff *skb, const struct xt_action_param *par)
struct nf_conn *ct;
ct = nf_ct_get(skb, &ctinfo);
- NF_CT_ASSERT(ct != NULL &&
- (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
- ctinfo == IP_CT_RELATED_REPLY));
+ WARN_ON(!(ct != NULL &&
+ (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+ ctinfo == IP_CT_RELATED_REPLY)));
return nf_nat_setup_info(ct, range, NF_NAT_MANIP_SRC);
}
@@ -105,8 +105,8 @@ xt_dnat_target_v1(struct sk_buff *skb, const struct xt_action_param *par)
struct nf_conn *ct;
ct = nf_ct_get(skb, &ctinfo);
- NF_CT_ASSERT(ct != NULL &&
- (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
+ WARN_ON(!(ct != NULL &&
+ (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)));
return nf_nat_setup_info(ct, range, NF_NAT_MANIP_DST);
}
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index 71cfa9551d08..36e14b1f061d 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -226,7 +226,6 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
sizeof(struct tcphdr), optsize, opts);
}
- rcu_read_lock();
list_for_each_entry_rcu(kf, &xt_osf_fingers[df], finger_entry) {
int foptsize, optnum;
@@ -340,7 +339,6 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
info->loglevel == XT_OSF_LOGLEVEL_FIRST)
break;
}
- rcu_read_unlock();
if (!fcount && (info->flags & XT_OSF_LOG))
nf_log_packet(net, xt_family(p), xt_hooknum(p), skb, xt_in(p),
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 3f6c4fa78bdb..245fa350a7a8 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -106,7 +106,7 @@ static DEFINE_SPINLOCK(recent_lock);
static DEFINE_MUTEX(recent_mutex);
#ifdef CONFIG_PROC_FS
-static const struct file_operations recent_old_fops, recent_mt_fops;
+static const struct file_operations recent_mt_fops;
#endif
static u_int32_t hash_rnd __read_mostly;