From 470acf55a021713869b9bcc967268ac90c8a0fac Mon Sep 17 00:00:00 2001
From: Gao Feng <fgao@ikuai8.com>
Date: Fri, 14 Apr 2017 10:00:08 +0800
Subject: netfilter: xt_CT: fix refcnt leak on error path

There are two cases which causes refcnt leak.

1. When nf_ct_timeout_ext_add failed in xt_ct_set_timeout, it should
free the timeout refcnt.
Now goto the err_put_timeout error handler instead of going ahead.

2. When the time policy is not found, we should call module_put.
Otherwise, the related cthelper module cannot be removed anymore.
It is easy to reproduce by typing the following command:
  # iptables -t raw -A OUTPUT -p tcp -j CT --helper ftp --timeout xxx

Signed-off-by: Gao Feng <fgao@ikuai8.com>
Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/xt_CT.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index b008db0184b8..81fdcdca7457 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -167,8 +167,10 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par,
 		goto err_put_timeout;
 	}
 	timeout_ext = nf_ct_timeout_ext_add(ct, timeout, GFP_ATOMIC);
-	if (timeout_ext == NULL)
+	if (!timeout_ext) {
 		ret = -ENOMEM;
+		goto err_put_timeout;
+	}
 
 	rcu_read_unlock();
 	return ret;
@@ -200,6 +202,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
 			  struct xt_ct_target_info_v1 *info)
 {
 	struct nf_conntrack_zone zone;
+	struct nf_conn_help *help;
 	struct nf_conn *ct;
 	int ret = -EOPNOTSUPP;
 
@@ -248,7 +251,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
 	if (info->timeout[0]) {
 		ret = xt_ct_set_timeout(ct, par, info->timeout);
 		if (ret < 0)
-			goto err3;
+			goto err4;
 	}
 	__set_bit(IPS_CONFIRMED_BIT, &ct->status);
 	nf_conntrack_get(&ct->ct_general);
@@ -256,6 +259,10 @@ out:
 	info->ct = ct;
 	return 0;
 
+err4:
+	help = nfct_help(ct);
+	if (help)
+		module_put(help->helper->me);
 err3:
 	nf_ct_tmpl_free(ct);
 err2:
-- 
cgit 


From 66e5a6b18bd09d0431e97cd3c162e76c5c2aebba Mon Sep 17 00:00:00 2001
From: Liping Zhang <zlpnobody@gmail.com>
Date: Sat, 15 Apr 2017 19:27:42 +0800
Subject: netfilter: nf_ct_helper: permit cthelpers with different names via
 nfnetlink

cthelpers added via nfnetlink may have the same tuple, i.e. except for
the l3proto and l4proto, other fields are all zero. So even with the
different names, we will also fail to add them:
  # nfct helper add ssdp inet udp
  # nfct helper add tftp inet udp
  nfct v1.4.3: netlink error: File exists

So in order to avoid unpredictable behaviour, we should:
1. cthelpers can be selected by nft ct helper obj or xt_CT target, so
report error if duplicated { name, l3proto, l4proto } tuple exist.
2. cthelpers can be selected by nf_ct_tuple_src_mask_cmp when
nf_ct_auto_assign_helper is enabled, so also report error if duplicated
{ l3proto, l4proto, src-port } tuple exist.

Also note, if the cthelper is added from userspace, then the src-port will
always be zero, it's invalid for nf_ct_auto_assign_helper, so there's no
need to check the second point listed above.

Fixes: 893e093c786c ("netfilter: nf_ct_helper: bail out on duplicated helpers")
Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_helper.c | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 4eeb3418366a..99bcd44aac70 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -386,17 +386,33 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
 	struct nf_conntrack_tuple_mask mask = { .src.u.all = htons(0xFFFF) };
 	unsigned int h = helper_hash(&me->tuple);
 	struct nf_conntrack_helper *cur;
-	int ret = 0;
+	int ret = 0, i;
 
 	BUG_ON(me->expect_policy == NULL);
 	BUG_ON(me->expect_class_max >= NF_CT_MAX_EXPECT_CLASSES);
 	BUG_ON(strlen(me->name) > NF_CT_HELPER_NAME_LEN - 1);
 
 	mutex_lock(&nf_ct_helper_mutex);
-	hlist_for_each_entry(cur, &nf_ct_helper_hash[h], hnode) {
-		if (nf_ct_tuple_src_mask_cmp(&cur->tuple, &me->tuple, &mask)) {
-			ret = -EEXIST;
-			goto out;
+	for (i = 0; i < nf_ct_helper_hsize; i++) {
+		hlist_for_each_entry(cur, &nf_ct_helper_hash[i], hnode) {
+			if (!strcmp(cur->name, me->name) &&
+			    (cur->tuple.src.l3num == NFPROTO_UNSPEC ||
+			     cur->tuple.src.l3num == me->tuple.src.l3num) &&
+			    cur->tuple.dst.protonum == me->tuple.dst.protonum) {
+				ret = -EEXIST;
+				goto out;
+			}
+		}
+	}
+
+	/* avoid unpredictable behaviour for auto_assign_helper */
+	if (!(me->flags & NF_CT_HELPER_F_USERSPACE)) {
+		hlist_for_each_entry(cur, &nf_ct_helper_hash[h], hnode) {
+			if (nf_ct_tuple_src_mask_cmp(&cur->tuple, &me->tuple,
+						     &mask)) {
+				ret = -EEXIST;
+				goto out;
+			}
 		}
 	}
 	hlist_add_head_rcu(&me->hnode, &nf_ct_helper_hash[h]);
-- 
cgit 


From 54a5f9d9abab639039eb7288bdc26c9c67f4e79b Mon Sep 17 00:00:00 2001
From: Liping Zhang <zlpnobody@gmail.com>
Date: Sat, 15 Apr 2017 23:37:36 +0800
Subject: netfilter: nft_set_bitmap: free dummy elements when destroy the set

We forget to free dummy elements when deleting the set. So when I was
running nft-test.py, I saw many kmemleak warnings:
  kmemleak: 1344 new suspected memory leaks ...
  # cat /sys/kernel/debug/kmemleak
  unreferenced object 0xffff8800631345c8 (size 32):
  comm "nft", pid 9075, jiffies 4295743309 (age 1354.815s)
  hex dump (first 32 bytes):
    f8 63 13 63 00 88 ff ff 88 79 13 63 00 88 ff ff  .c.c.....y.c....
    04 0c 00 00 00 00 00 00 00 00 00 00 08 03 00 00  ................
  backtrace:
    [<ffffffff819059da>] kmemleak_alloc+0x4a/0xa0
    [<ffffffff81288174>] __kmalloc+0x164/0x310
    [<ffffffffa061269d>] nft_set_elem_init+0x3d/0x1b0 [nf_tables]
    [<ffffffffa06130da>] nft_add_set_elem+0x45a/0x8c0 [nf_tables]
    [<ffffffffa0613645>] nf_tables_newsetelem+0x105/0x1d0 [nf_tables]
    [<ffffffffa05fe6d4>] nfnetlink_rcv+0x414/0x770 [nfnetlink]
    [<ffffffff817f0ca6>] netlink_unicast+0x1f6/0x310
    [<ffffffff817f10c6>] netlink_sendmsg+0x306/0x3b0
  ...

Fixes: e920dde516088 ("netfilter: nft_set_bitmap: keep a list of dummy elements")
Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_set_bitmap.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net/netfilter')

diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index 8ebbc2940f4c..b988162b5b15 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -257,6 +257,11 @@ static int nft_bitmap_init(const struct nft_set *set,
 
 static void nft_bitmap_destroy(const struct nft_set *set)
 {
+	struct nft_bitmap *priv = nft_set_priv(set);
+	struct nft_bitmap_elem *be, *n;
+
+	list_for_each_entry_safe(be, n, &priv->list, head)
+		nft_set_elem_destroy(set, be, true);
 }
 
 static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features,
-- 
cgit 


From 14e567615679a9999ce6bf4f23d6c9e00f03e00e Mon Sep 17 00:00:00 2001
From: Liping Zhang <zlpnobody@gmail.com>
Date: Mon, 17 Apr 2017 21:18:55 +0800
Subject: netfilter: ctnetlink: drop the incorrect cthelper module request

First, when creating a new ct, we will invoke request_module to try to
load the related inkernel cthelper. So there's no need to call the
request_module again when updating the ct helpinfo.

Second, ctnetlink_change_helper may be called with rcu_read_lock held,
i.e. rcu_read_lock -> nfqnl_recv_verdict -> nfqnl_ct_parse ->
ctnetlink_glue_parse -> ctnetlink_glue_parse_ct ->
ctnetlink_change_helper. But the request_module invocation may sleep,
so we can't call it with the rcu_read_lock held.

Remove it now.

Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_netlink.c | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index dc7dfd68fafe..48c184552de0 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1512,23 +1512,8 @@ static int ctnetlink_change_helper(struct nf_conn *ct,
 
 	helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
 					    nf_ct_protonum(ct));
-	if (helper == NULL) {
-#ifdef CONFIG_MODULES
-		spin_unlock_bh(&nf_conntrack_expect_lock);
-
-		if (request_module("nfct-helper-%s", helpname) < 0) {
-			spin_lock_bh(&nf_conntrack_expect_lock);
-			return -EOPNOTSUPP;
-		}
-
-		spin_lock_bh(&nf_conntrack_expect_lock);
-		helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
-						    nf_ct_protonum(ct));
-		if (helper)
-			return -EAGAIN;
-#endif
+	if (helper == NULL)
 		return -EOPNOTSUPP;
-	}
 
 	if (help) {
 		if (help->helper == helper) {
-- 
cgit 


From 88be4c09d9008f9ff337cbf48c5d0f06c8f872e7 Mon Sep 17 00:00:00 2001
From: Liping Zhang <zlpnobody@gmail.com>
Date: Mon, 17 Apr 2017 21:18:56 +0800
Subject: netfilter: ctnetlink: fix deadlock due to acquire _expect_lock twice

Currently, ctnetlink_change_conntrack is always protected by _expect_lock,
but this will cause a deadlock when deleting the helper from a conntrack,
as the _expect_lock will be acquired again by nf_ct_remove_expectations:

         CPU0
        ----
  lock(nf_conntrack_expect_lock);
  lock(nf_conntrack_expect_lock);

  *** DEADLOCK ***
  May be due to missing lock nesting notation

  2 locks held by lt-conntrack_gr/12853:
  #0:  (&table[i].mutex){+.+.+.}, at: [<ffffffffa05e2009>]
       nfnetlink_rcv_msg+0x399/0x6a9 [nfnetlink]
  #1:  (nf_conntrack_expect_lock){+.....}, at: [<ffffffffa05f2c1f>]
       ctnetlink_new_conntrack+0x17f/0x408 [nf_conntrack_netlink]

  Call Trace:
   dump_stack+0x85/0xc2
   __lock_acquire+0x1608/0x1680
   ? ctnetlink_parse_tuple_proto+0x10f/0x1c0 [nf_conntrack_netlink]
   lock_acquire+0x100/0x1f0
   ? nf_ct_remove_expectations+0x32/0x90 [nf_conntrack]
   _raw_spin_lock_bh+0x3f/0x50
   ? nf_ct_remove_expectations+0x32/0x90 [nf_conntrack]
   nf_ct_remove_expectations+0x32/0x90 [nf_conntrack]
   ctnetlink_change_helper+0xc6/0x190 [nf_conntrack_netlink]
   ctnetlink_new_conntrack+0x1b2/0x408 [nf_conntrack_netlink]
   nfnetlink_rcv_msg+0x60a/0x6a9 [nfnetlink]
   ? nfnetlink_rcv_msg+0x1b9/0x6a9 [nfnetlink]
   ? nfnetlink_bind+0x1a0/0x1a0 [nfnetlink]
   netlink_rcv_skb+0xa4/0xc0
   nfnetlink_rcv+0x87/0x770 [nfnetlink]

Since the operations are unrelated to nf_ct_expect, so we can drop the
_expect_lock. Also note, after removing the _expect_lock protection,
another CPU may invoke nf_conntrack_helper_unregister, so we should
use rcu_read_lock to protect __nf_conntrack_helper_find invoked by
ctnetlink_change_helper.

Fixes: ca7433df3a67 ("netfilter: conntrack: seperate expect locking from nf_conntrack_lock")
Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_netlink.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 48c184552de0..e5f97777b1f4 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1510,23 +1510,29 @@ static int ctnetlink_change_helper(struct nf_conn *ct,
 		return 0;
 	}
 
+	rcu_read_lock();
 	helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
 					    nf_ct_protonum(ct));
-	if (helper == NULL)
+	if (helper == NULL) {
+		rcu_read_unlock();
 		return -EOPNOTSUPP;
+	}
 
 	if (help) {
 		if (help->helper == helper) {
 			/* update private helper data if allowed. */
 			if (helper->from_nlattr)
 				helper->from_nlattr(helpinfo, ct);
-			return 0;
+			err = 0;
 		} else
-			return -EBUSY;
+			err = -EBUSY;
+	} else {
+		/* we cannot set a helper for an existing conntrack */
+		err = -EOPNOTSUPP;
 	}
 
-	/* we cannot set a helper for an existing conntrack */
-	return -EOPNOTSUPP;
+	rcu_read_unlock();
+	return err;
 }
 
 static int ctnetlink_change_timeout(struct nf_conn *ct,
@@ -1945,9 +1951,7 @@ static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl,
 	err = -EEXIST;
 	ct = nf_ct_tuplehash_to_ctrack(h);
 	if (!(nlh->nlmsg_flags & NLM_F_EXCL)) {
-		spin_lock_bh(&nf_conntrack_expect_lock);
 		err = ctnetlink_change_conntrack(ct, cda);
-		spin_unlock_bh(&nf_conntrack_expect_lock);
 		if (err == 0) {
 			nf_conntrack_eventmask_report((1 << IPCT_REPLY) |
 						      (1 << IPCT_ASSURED) |
@@ -2342,11 +2346,7 @@ ctnetlink_glue_parse(const struct nlattr *attr, struct nf_conn *ct)
 	if (ret < 0)
 		return ret;
 
-	spin_lock_bh(&nf_conntrack_expect_lock);
-	ret = ctnetlink_glue_parse_ct((const struct nlattr **)cda, ct);
-	spin_unlock_bh(&nf_conntrack_expect_lock);
-
-	return ret;
+	return ctnetlink_glue_parse_ct((const struct nlattr **)cda, ct);
 }
 
 static int ctnetlink_glue_exp_parse(const struct nlattr * const *cda,
-- 
cgit 


From 53b56da83d7899de375a9de153fd7f5397de85e6 Mon Sep 17 00:00:00 2001
From: Liping Zhang <zlpnobody@gmail.com>
Date: Mon, 17 Apr 2017 21:18:57 +0800
Subject: netfilter: ctnetlink: make it safer when updating ct->status

After converting to use rcu for conntrack hash, one CPU may update
the ct->status via ctnetlink, while another CPU may process the
packets and update the ct->status.

So the non-atomic operation "ct->status |= status;" via ctnetlink
becomes unsafe, and this may clear the IPS_DYING_BIT bit set by
another CPU unexpectedly. For example:
         CPU0                            CPU1
  ctnetlink_change_status        __nf_conntrack_find_get
      old = ct->status              nf_ct_gc_expired
          -                         nf_ct_kill
          -                      test_and_set_bit(IPS_DYING_BIT
      new = old | status;                 -
  ct->status = new; <-- oops, _DYING_ is cleared!

Now using a series of atomic bit operation to solve the above issue.

Also note, user shouldn't set IPS_TEMPLATE, IPS_SEQ_ADJUST directly,
so make these two bits be unchangable too.

If we set the IPS_TEMPLATE_BIT, ct will be freed by nf_ct_tmpl_free,
but actually it is alloced by nf_conntrack_alloc.
If we set the IPS_SEQ_ADJUST_BIT, this may cause the NULL pointer
deference, as the nfct_seqadj(ct) maybe NULL.

Last, add some comments to describe the logic change due to the
commit a963d710f367 ("netfilter: ctnetlink: Fix regression in CTA_STATUS
processing"), which makes me feel a little confusing.

Fixes: 76507f69c44e ("[NETFILTER]: nf_conntrack: use RCU for conntrack hash")
Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_netlink.c | 33 ++++++++++++++++++++++++---------
 1 file changed, 24 insertions(+), 9 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index e5f97777b1f4..86deed6a8db4 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1419,6 +1419,24 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct,
 }
 #endif
 
+static void
+__ctnetlink_change_status(struct nf_conn *ct, unsigned long on,
+			  unsigned long off)
+{
+	unsigned int bit;
+
+	/* Ignore these unchangable bits */
+	on &= ~IPS_UNCHANGEABLE_MASK;
+	off &= ~IPS_UNCHANGEABLE_MASK;
+
+	for (bit = 0; bit < __IPS_MAX_BIT; bit++) {
+		if (on & (1 << bit))
+			set_bit(bit, &ct->status);
+		else if (off & (1 << bit))
+			clear_bit(bit, &ct->status);
+	}
+}
+
 static int
 ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[])
 {
@@ -1438,10 +1456,7 @@ ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[])
 		/* ASSURED bit can only be set */
 		return -EBUSY;
 
-	/* Be careful here, modifying NAT bits can screw up things,
-	 * so don't let users modify them directly if they don't pass
-	 * nf_nat_range. */
-	ct->status |= status & ~(IPS_NAT_DONE_MASK | IPS_NAT_MASK);
+	__ctnetlink_change_status(ct, status, 0);
 	return 0;
 }
 
@@ -1628,7 +1643,7 @@ ctnetlink_change_seq_adj(struct nf_conn *ct,
 		if (ret < 0)
 			return ret;
 
-		ct->status |= IPS_SEQ_ADJUST;
+		set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
 	}
 
 	if (cda[CTA_SEQ_ADJ_REPLY]) {
@@ -1637,7 +1652,7 @@ ctnetlink_change_seq_adj(struct nf_conn *ct,
 		if (ret < 0)
 			return ret;
 
-		ct->status |= IPS_SEQ_ADJUST;
+		set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
 	}
 
 	return 0;
@@ -2289,10 +2304,10 @@ ctnetlink_update_status(struct nf_conn *ct, const struct nlattr * const cda[])
 	/* This check is less strict than ctnetlink_change_status()
 	 * because callers often flip IPS_EXPECTED bits when sending
 	 * an NFQA_CT attribute to the kernel.  So ignore the
-	 * unchangeable bits but do not error out.
+	 * unchangeable bits but do not error out. Also user programs
+	 * are allowed to clear the bits that they are allowed to change.
 	 */
-	ct->status = (status & ~IPS_UNCHANGEABLE_MASK) |
-		     (ct->status & IPS_UNCHANGEABLE_MASK);
+	__ctnetlink_change_status(ct, status, ~status);
 	return 0;
 }
 
-- 
cgit 


From 64f3967c7aacbfa1f0614cdb1a23e3f7e76eb61b Mon Sep 17 00:00:00 2001
From: Liping Zhang <zlpnobody@gmail.com>
Date: Mon, 17 Apr 2017 21:18:58 +0800
Subject: netfilter: ctnetlink: acquire ct->lock before operating nf_ct_seqadj

We should acquire the ct->lock before accessing or modifying the
nf_ct_seqadj, as another CPU may modify the nf_ct_seqadj at the same
time during its packet proccessing.

Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_netlink.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 86deed6a8db4..78f8c9adbd3c 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -417,8 +417,7 @@ nla_put_failure:
 	return -1;
 }
 
-static int ctnetlink_dump_ct_seq_adj(struct sk_buff *skb,
-				     const struct nf_conn *ct)
+static int ctnetlink_dump_ct_seq_adj(struct sk_buff *skb, struct nf_conn *ct)
 {
 	struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
 	struct nf_ct_seqadj *seq;
@@ -426,15 +425,20 @@ static int ctnetlink_dump_ct_seq_adj(struct sk_buff *skb,
 	if (!(ct->status & IPS_SEQ_ADJUST) || !seqadj)
 		return 0;
 
+	spin_lock_bh(&ct->lock);
 	seq = &seqadj->seq[IP_CT_DIR_ORIGINAL];
 	if (dump_ct_seq_adj(skb, seq, CTA_SEQ_ADJ_ORIG) == -1)
-		return -1;
+		goto err;
 
 	seq = &seqadj->seq[IP_CT_DIR_REPLY];
 	if (dump_ct_seq_adj(skb, seq, CTA_SEQ_ADJ_REPLY) == -1)
-		return -1;
+		goto err;
 
+	spin_unlock_bh(&ct->lock);
 	return 0;
+err:
+	spin_unlock_bh(&ct->lock);
+	return -1;
 }
 
 static int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct)
@@ -1637,11 +1641,12 @@ ctnetlink_change_seq_adj(struct nf_conn *ct,
 	if (!seqadj)
 		return 0;
 
+	spin_lock_bh(&ct->lock);
 	if (cda[CTA_SEQ_ADJ_ORIG]) {
 		ret = change_seq_adj(&seqadj->seq[IP_CT_DIR_ORIGINAL],
 				     cda[CTA_SEQ_ADJ_ORIG]);
 		if (ret < 0)
-			return ret;
+			goto err;
 
 		set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
 	}
@@ -1650,12 +1655,16 @@ ctnetlink_change_seq_adj(struct nf_conn *ct,
 		ret = change_seq_adj(&seqadj->seq[IP_CT_DIR_REPLY],
 				     cda[CTA_SEQ_ADJ_REPLY]);
 		if (ret < 0)
-			return ret;
+			goto err;
 
 		set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
 	}
 
+	spin_unlock_bh(&ct->lock);
 	return 0;
+err:
+	spin_unlock_bh(&ct->lock);
+	return ret;
 }
 
 static int
-- 
cgit 


From 6bd3d1929246bb0cf4f3bd6e25a0ba442b0de845 Mon Sep 17 00:00:00 2001
From: Peter Tirsek <peter@tirsek.com>
Date: Tue, 18 Apr 2017 12:39:58 -0500
Subject: netfilter: xt_socket: Fix broken IPv6 handling

Commit 834184b1f3a4 ("netfilter: defrag: only register defrag
functionality if needed") used the outdated XT_SOCKET_HAVE_IPV6 macro
which was removed earlier in commit 8db4c5be88f6 ("netfilter: move
socket lookup infrastructure to nf_socket_ipv{4,6}.c"). With that macro
never being defined, the xt_socket match emits an "Unknown family 10"
warning when used with IPv6:

WARNING: CPU: 0 PID: 1377 at net/netfilter/xt_socket.c:160 socket_mt_enable_defrag+0x47/0x50 [xt_socket]
Unknown family 10
Modules linked in: xt_socket nf_socket_ipv4 nf_socket_ipv6 nf_defrag_ipv4 [...]
CPU: 0 PID: 1377 Comm: ip6tables-resto Not tainted 4.10.10 #1
Hardware name: [...]
Call Trace:
? __warn+0xe7/0x100
? socket_mt_enable_defrag+0x47/0x50 [xt_socket]
? socket_mt_enable_defrag+0x47/0x50 [xt_socket]
? warn_slowpath_fmt+0x39/0x40
? socket_mt_enable_defrag+0x47/0x50 [xt_socket]
? socket_mt_v2_check+0x12/0x40 [xt_socket]
? xt_check_match+0x6b/0x1a0 [x_tables]
? xt_find_match+0x93/0xd0 [x_tables]
? xt_request_find_match+0x20/0x80 [x_tables]
? translate_table+0x48e/0x870 [ip6_tables]
? translate_table+0x577/0x870 [ip6_tables]
? walk_component+0x3a/0x200
? kmalloc_order+0x1d/0x50
? do_ip6t_set_ctl+0x181/0x490 [ip6_tables]
? filename_lookup+0xa5/0x120
? nf_setsockopt+0x3a/0x60
? ipv6_setsockopt+0xb0/0xc0
? sock_common_setsockopt+0x23/0x30
? SyS_socketcall+0x41d/0x630
? vfs_read+0xfa/0x120
? do_fast_syscall_32+0x7a/0x110
? entry_SYSENTER_32+0x47/0x71

This patch brings the conditional back in line with how the rest of the
file handles IPv6.

Fixes: 834184b1f3a4 ("netfilter: defrag: only register defrag functionality if needed")
Signed-off-by: Peter Tirsek <peter@tirsek.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/xt_socket.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 770bbec878f1..e75ef39669c5 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -152,7 +152,7 @@ static int socket_mt_enable_defrag(struct net *net, int family)
 	switch (family) {
 	case NFPROTO_IPV4:
 		return nf_defrag_ipv4_enable(net);
-#ifdef XT_SOCKET_HAVE_IPV6
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
 	case NFPROTO_IPV6:
 		return nf_defrag_ipv6_enable(net);
 #endif
-- 
cgit 


From 277a292835c196894ef895d5e1fd6170bb916f55 Mon Sep 17 00:00:00 2001
From: Liping Zhang <zlpnobody@gmail.com>
Date: Sun, 23 Apr 2017 18:29:30 +0800
Subject: netfilter: nft_dynset: continue to next expr if _OP_ADD succeeded

Currently, after adding the following nft rules:
  # nft add set x target1 { type ipv4_addr \; flags timeout \;}
  # nft add rule x y set add ip daddr timeout 1d @target1 counter

the counters will always be zero despite of the elements are added
to the dynamic set "target1" or not, as we will break the nft expr
traversal unconditionally:
  # nft list ruleset
  ...
  set target1 {
      ...
      elements = { 8.8.8.8 expires 23h59m53s}
  }
  chain output {
      ...
      set add ip daddr timeout 1d @target1 counter packets 0 bytes 0
                                                           ^       ^
      ...
  }

Since we add the elements to the set successfully, we should continue
to the next expression.

Additionally, if elements are added to "flow table" successfully, we
will _always_ continue to the next expr, even if the operation is
_OP_ADD. So it's better to keep them to be consistent.

Fixes: 22fe54d5fefc ("netfilter: nf_tables: add support for dynamic set updates")
Reported-by: Robert White <rwhite@pobox.com>
Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_dynset.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 049ad2d9ee66..fafbeea3ed04 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -82,8 +82,7 @@ static void nft_dynset_eval(const struct nft_expr *expr,
 		    nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
 			timeout = priv->timeout ? : set->timeout;
 			*nft_set_ext_expiration(ext) = jiffies + timeout;
-		} else if (sexpr == NULL)
-			goto out;
+		}
 
 		if (sexpr != NULL)
 			sexpr->ops->eval(sexpr, regs, pkt);
@@ -92,7 +91,7 @@ static void nft_dynset_eval(const struct nft_expr *expr,
 			regs->verdict.code = NFT_BREAK;
 		return;
 	}
-out:
+
 	if (!priv->invert)
 		regs->verdict.code = NFT_BREAK;
 }
-- 
cgit 


From 1442f6f7c1b77de1c508318164a527e240c24a4d Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 20 Apr 2017 11:44:16 +0200
Subject: ipvs: explicitly forbid ipv6 service/dest creation if ipv6 mod is
 disabled

When creating a new ipvs service, ipv6 addresses are always accepted
if CONFIG_IP_VS_IPV6 is enabled. On dest creation the address family
is not explicitly checked.

This allows the user-space to configure ipvs services even if the
system is booted with ipv6.disable=1. On specific configuration, ipvs
can try to call ipv6 routing code at setup time, causing the kernel to
oops due to fib6_rules_ops being NULL.

This change addresses the issue adding a check for the ipv6
module being enabled while validating ipv6 service operations and
adding the same validation for dest operations.

According to git history, this issue is apparently present since
the introduction of ipv6 support, and the oops can be triggered
since commit 09571c7ae30865ad ("IPVS: Add function to determine
if IPv6 address is local")

Fixes: 09571c7ae30865ad ("IPVS: Add function to determine if IPv6 address is local")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/netfilter/ipvs/ip_vs_ctl.c | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 5aeb0dde6ccc..4d753beaac32 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -3078,6 +3078,17 @@ nla_put_failure:
 	return skb->len;
 }
 
+static bool ip_vs_is_af_valid(int af)
+{
+	if (af == AF_INET)
+		return true;
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6 && ipv6_mod_enabled())
+		return true;
+#endif
+	return false;
+}
+
 static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs,
 				    struct ip_vs_service_user_kern *usvc,
 				    struct nlattr *nla, int full_entry,
@@ -3104,11 +3115,7 @@ static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs,
 	memset(usvc, 0, sizeof(*usvc));
 
 	usvc->af = nla_get_u16(nla_af);
-#ifdef CONFIG_IP_VS_IPV6
-	if (usvc->af != AF_INET && usvc->af != AF_INET6)
-#else
-	if (usvc->af != AF_INET)
-#endif
+	if (!ip_vs_is_af_valid(usvc->af))
 		return -EAFNOSUPPORT;
 
 	if (nla_fwmark) {
@@ -3610,6 +3617,11 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
 		if (udest.af == 0)
 			udest.af = svc->af;
 
+		if (!ip_vs_is_af_valid(udest.af)) {
+			ret = -EAFNOSUPPORT;
+			goto out;
+		}
+
 		if (udest.af != svc->af && cmd != IPVS_CMD_DEL_DEST) {
 			/* The synchronization protocol is incompatible
 			 * with mixed family services
-- 
cgit 


From 7dde07e9c53617549d67dd3e1d791496d0d3868e Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 28 Apr 2017 15:57:56 +0300
Subject: netfilter: x_tables: unlock on error in xt_find_table_lock()

According to my static checker we should unlock here before the return.
That seems reasonable to me as well.

Fixes" b9e69e127397 ("netfilter: xtables: don't hook tables by default")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/x_tables.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 14857afc9937..f134d384852f 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1051,8 +1051,10 @@ struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
 	list_for_each_entry(t, &init_net.xt.tables[af], list) {
 		if (strcmp(t->name, name))
 			continue;
-		if (!try_module_get(t->me))
+		if (!try_module_get(t->me)) {
+			mutex_unlock(&xt[af].mutex);
 			return NULL;
+		}
 
 		mutex_unlock(&xt[af].mutex);
 		if (t->table_init(net) != 0) {
-- 
cgit 


From 9744a6fcefcb4d56501d69adb04c24559d353cad Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 1 May 2017 12:58:50 +0200
Subject: netfilter: nf_tables: check if same extensions are set when adding
 elements

If no NLM_F_EXCL is set and the element already exists in the set, make
sure that both elements have the same extensions.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net/netfilter')

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 434c739dfeca..11a96e8dd3cd 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3749,6 +3749,11 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 	err = set->ops->insert(ctx->net, set, &elem, &ext2);
 	if (err) {
 		if (err == -EEXIST) {
+			if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) ^
+			    nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) ||
+			    nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) ^
+			    nft_set_ext_exists(ext2, NFT_SET_EXT_OBJREF))
+				return -EBUSY;
 			if ((nft_set_ext_exists(ext, NFT_SET_EXT_DATA) &&
 			     nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) &&
 			     memcmp(nft_set_ext_data(ext),
-- 
cgit