From 46f7487e161b195a1bd7ddbd9c6aba9c93ec881a Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 6 Mar 2019 12:44:17 +0100
Subject: netfilter: nat: don't register device notifier twice

Otherwise, we get notifier list corruption.

This is the most simple fix: remove the device notifier call chain
from the ipv6 masquerade register function and handle it only
in the ipv4 version.

The better fix is merge
nf_nat_masquerade_ipv4/6_(un)register_notifier
  into a single
nf_nat_masquerade_(un)register_notifiers

but to do this its needed to first merge the two masquerade modules
into a single xt_MASQUERADE.

Furthermore, we need to use different refcounts for ipv4/ipv6
until we can merge MASQUERADE.

Fixes: d1aca8ab3104a ("netfilter: nat: merge ipv4 and ipv6 masquerade functionality")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_nat_masquerade.c | 35 +++++++++++++++++++----------------
 1 file changed, 19 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c
index 86fa4dcc63c5..d85c4d902e7b 100644
--- a/net/netfilter/nf_nat_masquerade.c
+++ b/net/netfilter/nf_nat_masquerade.c
@@ -11,7 +11,8 @@
 #include <net/netfilter/ipv6/nf_nat_masquerade.h>
 
 static DEFINE_MUTEX(masq_mutex);
-static unsigned int masq_refcnt __read_mostly;
+static unsigned int masq_refcnt4 __read_mostly;
+static unsigned int masq_refcnt6 __read_mostly;
 
 unsigned int
 nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
@@ -141,8 +142,13 @@ int nf_nat_masquerade_ipv4_register_notifier(void)
 	int ret = 0;
 
 	mutex_lock(&masq_mutex);
+	if (WARN_ON_ONCE(masq_refcnt4 == UINT_MAX)) {
+		ret = -EOVERFLOW;
+		goto out_unlock;
+	}
+
 	/* check if the notifier was already set */
-	if (++masq_refcnt > 1)
+	if (++masq_refcnt4 > 1)
 		goto out_unlock;
 
 	/* Register for device down reports */
@@ -160,7 +166,7 @@ int nf_nat_masquerade_ipv4_register_notifier(void)
 err_unregister:
 	unregister_netdevice_notifier(&masq_dev_notifier);
 err_dec:
-	masq_refcnt--;
+	masq_refcnt4--;
 out_unlock:
 	mutex_unlock(&masq_mutex);
 	return ret;
@@ -171,7 +177,7 @@ void nf_nat_masquerade_ipv4_unregister_notifier(void)
 {
 	mutex_lock(&masq_mutex);
 	/* check if the notifier still has clients */
-	if (--masq_refcnt > 0)
+	if (--masq_refcnt4 > 0)
 		goto out_unlock;
 
 	unregister_netdevice_notifier(&masq_dev_notifier);
@@ -321,25 +327,23 @@ int nf_nat_masquerade_ipv6_register_notifier(void)
 	int ret = 0;
 
 	mutex_lock(&masq_mutex);
-	/* check if the notifier is already set */
-	if (++masq_refcnt > 1)
+	if (WARN_ON_ONCE(masq_refcnt6 == UINT_MAX)) {
+		ret = -EOVERFLOW;
 		goto out_unlock;
+	}
 
-	ret = register_netdevice_notifier(&masq_dev_notifier);
-	if (ret)
-		goto err_dec;
+	/* check if the notifier is already set */
+	if (++masq_refcnt6 > 1)
+		goto out_unlock;
 
 	ret = register_inet6addr_notifier(&masq_inet6_notifier);
 	if (ret)
-		goto err_unregister;
+		goto err_dec;
 
 	mutex_unlock(&masq_mutex);
 	return ret;
-
-err_unregister:
-	unregister_netdevice_notifier(&masq_dev_notifier);
 err_dec:
-	masq_refcnt--;
+	masq_refcnt6--;
 out_unlock:
 	mutex_unlock(&masq_mutex);
 	return ret;
@@ -350,11 +354,10 @@ void nf_nat_masquerade_ipv6_unregister_notifier(void)
 {
 	mutex_lock(&masq_mutex);
 	/* check if the notifier still has clients */
-	if (--masq_refcnt > 0)
+	if (--masq_refcnt6 > 0)
 		goto out_unlock;
 
 	unregister_inet6addr_notifier(&masq_inet6_notifier);
-	unregister_netdevice_notifier(&masq_dev_notifier);
 out_unlock:
 	mutex_unlock(&masq_mutex);
 }
-- 
cgit 


From 40ba1d9b4d19796afc9b7ece872f5f3e8f5e2c13 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 8 Mar 2019 00:58:53 +0100
Subject: netfilter: nf_tables: fix set double-free in abort path

The abort path can cause a double-free of an anonymous set.
Added-and-to-be-aborted rule looks like this:

udp dport { 137, 138 } drop

The to-be-aborted transaction list looks like this:

newset
newsetelem
newsetelem
rule

This gets walked in reverse order, so first pass disables the rule, the
set elements, then the set.

After synchronize_rcu(), we then destroy those in same order: rule, set
element, set element, newset.

Problem is that the anonymous set has already been bound to the rule, so
the rule (lookup expression destructor) already frees the set, when then
cause use-after-free when trying to delete the elements from this set,
then try to free the set again when handling the newset expression.

Rule releases the bound set in first place from the abort path, this
causes the use-after-free on set element removal when undoing the new
element transactions. To handle this, skip new element transaction if
set is bound from the abort path.

This is still causes the use-after-free on set element removal.  To
handle this, remove transaction from the list when the set is already
bound.

Joint work with Florian Westphal.

Fixes: f6ac85858976 ("netfilter: nf_tables: unbind set in rule from commit path")
Bugzilla: https://bugzilla.netfilter.org/show_bug.cgi?id=1325
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index faf6bd10a19f..1333bf97dc26 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -142,7 +142,7 @@ static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set)
 	list_for_each_entry_reverse(trans, &net->nft.commit_list, list) {
 		if (trans->msg_type == NFT_MSG_NEWSET &&
 		    nft_trans_set(trans) == set) {
-			nft_trans_set_bound(trans) = true;
+			set->bound = true;
 			break;
 		}
 	}
@@ -6709,8 +6709,7 @@ static void nf_tables_abort_release(struct nft_trans *trans)
 		nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
 		break;
 	case NFT_MSG_NEWSET:
-		if (!nft_trans_set_bound(trans))
-			nft_set_destroy(nft_trans_set(trans));
+		nft_set_destroy(nft_trans_set(trans));
 		break;
 	case NFT_MSG_NEWSETELEM:
 		nft_set_elem_destroy(nft_trans_elem_set(trans),
@@ -6783,8 +6782,11 @@ static int __nf_tables_abort(struct net *net)
 			break;
 		case NFT_MSG_NEWSET:
 			trans->ctx.table->use--;
-			if (!nft_trans_set_bound(trans))
-				list_del_rcu(&nft_trans_set(trans)->list);
+			if (nft_trans_set(trans)->bound) {
+				nft_trans_destroy(trans);
+				break;
+			}
+			list_del_rcu(&nft_trans_set(trans)->list);
 			break;
 		case NFT_MSG_DELSET:
 			trans->ctx.table->use++;
@@ -6792,8 +6794,11 @@ static int __nf_tables_abort(struct net *net)
 			nft_trans_destroy(trans);
 			break;
 		case NFT_MSG_NEWSETELEM:
+			if (nft_trans_elem_set(trans)->bound) {
+				nft_trans_destroy(trans);
+				break;
+			}
 			te = (struct nft_trans_elem *)trans->data;
-
 			te->set->ops->remove(net, te->set, &te->elem);
 			atomic_dec(&te->set->nelems);
 			break;
-- 
cgit 


From 273fe3f1006ea5ebc63d6729e43e8e45e32b256a Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 8 Mar 2019 15:30:03 +0100
Subject: netfilter: nf_tables: bogus EBUSY when deleting set after flush
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Set deletion after flush coming in the same batch results in EBUSY. Add
set use counter to track the number of references to this set from
rules. We cannot rely on the list of bindings for this since such list
is still populated from the preparation phase.

Reported-by: Václav Zindulka <vaclav.zindulka@tlapnet.cz>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 28 +++++++++++++++++++++++++++-
 net/netfilter/nft_dynset.c    | 13 +++++++++----
 net/netfilter/nft_lookup.c    | 13 +++++++++----
 net/netfilter/nft_objref.c    | 13 +++++++++----
 4 files changed, 54 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 1333bf97dc26..ca09ef037ca5 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3672,6 +3672,9 @@ err1:
 
 static void nft_set_destroy(struct nft_set *set)
 {
+	if (WARN_ON(set->use > 0))
+		return;
+
 	set->ops->destroy(set);
 	module_put(to_set_type(set->ops)->owner);
 	kfree(set->name);
@@ -3712,7 +3715,7 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk,
 		NL_SET_BAD_ATTR(extack, attr);
 		return PTR_ERR(set);
 	}
-	if (!list_empty(&set->bindings) ||
+	if (set->use ||
 	    (nlh->nlmsg_flags & NLM_F_NONREC && atomic_read(&set->nelems) > 0)) {
 		NL_SET_BAD_ATTR(extack, attr);
 		return -EBUSY;
@@ -3742,6 +3745,9 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
 	struct nft_set_binding *i;
 	struct nft_set_iter iter;
 
+	if (set->use == UINT_MAX)
+		return -EOVERFLOW;
+
 	if (!list_empty(&set->bindings) && nft_set_is_anonymous(set))
 		return -EBUSY;
 
@@ -3769,6 +3775,7 @@ bind:
 	binding->chain = ctx->chain;
 	list_add_tail_rcu(&binding->list, &set->bindings);
 	nft_set_trans_bind(ctx, set);
+	set->use++;
 
 	return 0;
 }
@@ -3788,6 +3795,25 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
 }
 EXPORT_SYMBOL_GPL(nf_tables_unbind_set);
 
+void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set,
+			      struct nft_set_binding *binding,
+			      enum nft_trans_phase phase)
+{
+	switch (phase) {
+	case NFT_TRANS_PREPARE:
+		set->use--;
+		return;
+	case NFT_TRANS_ABORT:
+	case NFT_TRANS_RELEASE:
+		set->use--;
+		/* fall through */
+	default:
+		nf_tables_unbind_set(ctx, set, binding,
+				     phase == NFT_TRANS_COMMIT);
+	}
+}
+EXPORT_SYMBOL_GPL(nf_tables_deactivate_set);
+
 void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set)
 {
 	if (list_empty(&set->bindings) && nft_set_is_anonymous(set))
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index a8a74a16f9c4..e461007558e8 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -240,11 +240,15 @@ static void nft_dynset_deactivate(const struct nft_ctx *ctx,
 {
 	struct nft_dynset *priv = nft_expr_priv(expr);
 
-	if (phase == NFT_TRANS_PREPARE)
-		return;
+	nf_tables_deactivate_set(ctx, priv->set, &priv->binding, phase);
+}
+
+static void nft_dynset_activate(const struct nft_ctx *ctx,
+				const struct nft_expr *expr)
+{
+	struct nft_dynset *priv = nft_expr_priv(expr);
 
-	nf_tables_unbind_set(ctx, priv->set, &priv->binding,
-			     phase == NFT_TRANS_COMMIT);
+	priv->set->use++;
 }
 
 static void nft_dynset_destroy(const struct nft_ctx *ctx,
@@ -292,6 +296,7 @@ static const struct nft_expr_ops nft_dynset_ops = {
 	.eval		= nft_dynset_eval,
 	.init		= nft_dynset_init,
 	.destroy	= nft_dynset_destroy,
+	.activate	= nft_dynset_activate,
 	.deactivate	= nft_dynset_deactivate,
 	.dump		= nft_dynset_dump,
 };
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index 14496da5141d..161c3451a747 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -127,11 +127,15 @@ static void nft_lookup_deactivate(const struct nft_ctx *ctx,
 {
 	struct nft_lookup *priv = nft_expr_priv(expr);
 
-	if (phase == NFT_TRANS_PREPARE)
-		return;
+	nf_tables_deactivate_set(ctx, priv->set, &priv->binding, phase);
+}
+
+static void nft_lookup_activate(const struct nft_ctx *ctx,
+				const struct nft_expr *expr)
+{
+	struct nft_lookup *priv = nft_expr_priv(expr);
 
-	nf_tables_unbind_set(ctx, priv->set, &priv->binding,
-			     phase == NFT_TRANS_COMMIT);
+	priv->set->use++;
 }
 
 static void nft_lookup_destroy(const struct nft_ctx *ctx,
@@ -222,6 +226,7 @@ static const struct nft_expr_ops nft_lookup_ops = {
 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_lookup)),
 	.eval		= nft_lookup_eval,
 	.init		= nft_lookup_init,
+	.activate	= nft_lookup_activate,
 	.deactivate	= nft_lookup_deactivate,
 	.destroy	= nft_lookup_destroy,
 	.dump		= nft_lookup_dump,
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index 79ef074c18ca..457a9ceb46af 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -162,11 +162,15 @@ static void nft_objref_map_deactivate(const struct nft_ctx *ctx,
 {
 	struct nft_objref_map *priv = nft_expr_priv(expr);
 
-	if (phase == NFT_TRANS_PREPARE)
-		return;
+	nf_tables_deactivate_set(ctx, priv->set, &priv->binding, phase);
+}
+
+static void nft_objref_map_activate(const struct nft_ctx *ctx,
+				    const struct nft_expr *expr)
+{
+	struct nft_objref_map *priv = nft_expr_priv(expr);
 
-	nf_tables_unbind_set(ctx, priv->set, &priv->binding,
-			     phase == NFT_TRANS_COMMIT);
+	priv->set->use++;
 }
 
 static void nft_objref_map_destroy(const struct nft_ctx *ctx,
@@ -183,6 +187,7 @@ static const struct nft_expr_ops nft_objref_map_ops = {
 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_objref_map)),
 	.eval		= nft_objref_map_eval,
 	.init		= nft_objref_map_init,
+	.activate	= nft_objref_map_activate,
 	.deactivate	= nft_objref_map_deactivate,
 	.destroy	= nft_objref_map_destroy,
 	.dump		= nft_objref_map_dump,
-- 
cgit 


From 3f3a390dbd59d236f62cff8e8b20355ef7069e3d Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 11 Mar 2019 13:04:16 +0100
Subject: netfilter: nf_tables: use-after-free in dynamic operations

Smatch reports:

       net/netfilter/nf_tables_api.c:2167 nf_tables_expr_destroy()
        error: dereferencing freed memory 'expr->ops'

net/netfilter/nf_tables_api.c
    2162 static void nf_tables_expr_destroy(const struct nft_ctx *ctx,
    2163                                   struct nft_expr *expr)
    2164 {
    2165        if (expr->ops->destroy)
    2166                expr->ops->destroy(ctx, expr);
                                                ^^^^
--> 2167        module_put(expr->ops->type->owner);
                           ^^^^^^^^^
    2168 }

Smatch says there are three functions which free expr->ops.

Fixes: b8e204006340 ("netfilter: nft_compat: use .release_ops and remove list of extension")
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index ca09ef037ca5..9d8f51dfc593 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2162,9 +2162,11 @@ err1:
 static void nf_tables_expr_destroy(const struct nft_ctx *ctx,
 				   struct nft_expr *expr)
 {
+	const struct nft_expr_type *type = expr->ops->type;
+
 	if (expr->ops->destroy)
 		expr->ops->destroy(ctx, expr);
-	module_put(expr->ops->type->owner);
+	module_put(type->owner);
 }
 
 struct nft_expr *nft_expr_init(const struct nft_ctx *ctx,
-- 
cgit 


From a843dc4ebaecd15fca1f4d35a97210f72ea1473b Mon Sep 17 00:00:00 2001
From: Miaohe Lin <linmiaohe@huawei.com>
Date: Mon, 11 Mar 2019 16:29:32 +0800
Subject: net: sit: fix UBSAN Undefined behaviour in check_6rd

In func check_6rd,tunnel->ip6rd.relay_prefixlen may equal to
32,so UBSAN complain about it.

UBSAN: Undefined behaviour in net/ipv6/sit.c:781:47
shift exponent 32 is too large for 32-bit type 'unsigned int'
CPU: 6 PID: 20036 Comm: syz-executor.0 Not tainted 4.19.27 #2
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1
04/01/2014
Call Trace:
__dump_stack lib/dump_stack.c:77 [inline]
dump_stack+0xca/0x13e lib/dump_stack.c:113
ubsan_epilogue+0xe/0x81 lib/ubsan.c:159
__ubsan_handle_shift_out_of_bounds+0x293/0x2e8 lib/ubsan.c:425
check_6rd.constprop.9+0x433/0x4e0 net/ipv6/sit.c:781
try_6rd net/ipv6/sit.c:806 [inline]
ipip6_tunnel_xmit net/ipv6/sit.c:866 [inline]
sit_tunnel_xmit+0x141c/0x2720 net/ipv6/sit.c:1033
__netdev_start_xmit include/linux/netdevice.h:4300 [inline]
netdev_start_xmit include/linux/netdevice.h:4309 [inline]
xmit_one net/core/dev.c:3243 [inline]
dev_hard_start_xmit+0x17c/0x780 net/core/dev.c:3259
__dev_queue_xmit+0x1656/0x2500 net/core/dev.c:3829
neigh_output include/net/neighbour.h:501 [inline]
ip6_finish_output2+0xa36/0x2290 net/ipv6/ip6_output.c:120
ip6_finish_output+0x3e7/0xa20 net/ipv6/ip6_output.c:154
NF_HOOK_COND include/linux/netfilter.h:278 [inline]
ip6_output+0x1e2/0x720 net/ipv6/ip6_output.c:171
dst_output include/net/dst.h:444 [inline]
ip6_local_out+0x99/0x170 net/ipv6/output_core.c:176
ip6_send_skb+0x9d/0x2f0 net/ipv6/ip6_output.c:1697
ip6_push_pending_frames+0xc0/0x100 net/ipv6/ip6_output.c:1717
rawv6_push_pending_frames net/ipv6/raw.c:616 [inline]
rawv6_sendmsg+0x2435/0x3530 net/ipv6/raw.c:946
inet_sendmsg+0xf8/0x5c0 net/ipv4/af_inet.c:798
sock_sendmsg_nosec net/socket.c:621 [inline]
sock_sendmsg+0xc8/0x110 net/socket.c:631
___sys_sendmsg+0x6cf/0x890 net/socket.c:2114
__sys_sendmsg+0xf0/0x1b0 net/socket.c:2152
do_syscall_64+0xc8/0x580 arch/x86/entry/common.c:290
entry_SYSCALL_64_after_hwframe+0x49/0xbe

Signed-off-by: linmiaohe <linmiaohe@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/sit.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 09e440e8dfae..07e21a82ce4c 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -778,8 +778,9 @@ static bool check_6rd(struct ip_tunnel *tunnel, const struct in6_addr *v6dst,
 		pbw0 = tunnel->ip6rd.prefixlen >> 5;
 		pbi0 = tunnel->ip6rd.prefixlen & 0x1f;
 
-		d = (ntohl(v6dst->s6_addr32[pbw0]) << pbi0) >>
-		    tunnel->ip6rd.relay_prefixlen;
+		d = tunnel->ip6rd.relay_prefixlen < 32 ?
+			(ntohl(v6dst->s6_addr32[pbw0]) << pbi0) >>
+		    tunnel->ip6rd.relay_prefixlen : 0;
 
 		pbi1 = pbi0 - tunnel->ip6rd.relay_prefixlen;
 		if (pbi1 > 0)
-- 
cgit 


From b8b27498659c65034032af79842913844a6cc79a Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 7 Mar 2019 23:20:11 +0100
Subject: netfilter: nf_tables: return immediately on empty commit

When running 'nft flush ruleset' while no rules exist, we will increment
the generation counter and announce a new genid to userspace, yet
nothing had changed in the first place.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 9d8f51dfc593..513f93118604 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -6564,6 +6564,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 	struct nft_chain *chain;
 	struct nft_table *table;
 
+	if (list_empty(&net->nft.commit_list)) {
+		mutex_unlock(&net->nft.commit_mutex);
+		return 0;
+	}
+
 	/* 0. Validate ruleset, otherwise roll back for error reporting. */
 	if (nf_tables_validate(net) < 0)
 		return -EAGAIN;
-- 
cgit 


From f2feaefdabb0a6253aa020f65e7388f07a9ed47c Mon Sep 17 00:00:00 2001
From: Christoph Paasch <cpaasch@apple.com>
Date: Mon, 11 Mar 2019 11:41:05 -0700
Subject: tcp: Don't access TCP_SKB_CB before initializing it

Since commit eeea10b83a13 ("tcp: add
tcp_v4_fill_cb()/tcp_v4_restore_cb()"), tcp_vX_fill_cb is only called
after tcp_filter(). That means, TCP_SKB_CB(skb)->end_seq still points to
the IP-part of the cb.

We thus should not mock with it, as this can trigger bugs (thanks
syzkaller):
[   12.349396] ==================================================================
[   12.350188] BUG: KASAN: slab-out-of-bounds in ip6_datagram_recv_specific_ctl+0x19b3/0x1a20
[   12.351035] Read of size 1 at addr ffff88006adbc208 by task test_ip6_datagr/1799

Setting end_seq is actually no more necessary in tcp_filter as it gets
initialized later on in tcp_vX_fill_cb.

Cc: Eric Dumazet <edumazet@google.com>
Fixes: eeea10b83a13 ("tcp: add tcp_v4_fill_cb()/tcp_v4_restore_cb()")
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 831d844a27ca..277d71239d75 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1734,15 +1734,8 @@ EXPORT_SYMBOL(tcp_add_backlog);
 int tcp_filter(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcphdr *th = (struct tcphdr *)skb->data;
-	unsigned int eaten = skb->len;
-	int err;
 
-	err = sk_filter_trim_cap(sk, skb, th->doff * 4);
-	if (!err) {
-		eaten -= skb->len;
-		TCP_SKB_CB(skb)->end_seq -= eaten;
-	}
-	return err;
+	return sk_filter_trim_cap(sk, skb, th->doff * 4);
 }
 EXPORT_SYMBOL(tcp_filter);
 
-- 
cgit 


From ee74d0bd4325efb41e38affe5955f920ed973f23 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 11 Mar 2019 13:48:44 -0700
Subject: net/x25: reset state in x25_connect()

In case x25_connect() fails and frees the socket neighbour,
we also need to undo the change done to x25->state.

Before my last bug fix, we had use-after-free so this
patch fixes a latent bug.

syzbot report :

kasan: CONFIG_KASAN_INLINE enabled
kasan: GPF could be caused by NULL-ptr deref or user memory access
general protection fault: 0000 [#1] PREEMPT SMP KASAN
CPU: 1 PID: 16137 Comm: syz-executor.1 Not tainted 5.0.0+ #117
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
RIP: 0010:x25_write_internal+0x1e8/0xdf0 net/x25/x25_subr.c:173
Code: 00 40 88 b5 e0 fe ff ff 0f 85 01 0b 00 00 48 8b 8b 80 04 00 00 48 ba 00 00 00 00 00 fc ff df 48 8d 79 1c 48 89 fe 48 c1 ee 03 <0f> b6 34 16 48 89 fa 83 e2 07 83 c2 03 40 38 f2 7c 09 40 84 f6 0f
RSP: 0018:ffff888076717a08 EFLAGS: 00010207
RAX: ffff88805f2f2292 RBX: ffff8880a0ae6000 RCX: 0000000000000000
kobject: 'loop5' (0000000018d0d0ee): kobject_uevent_env
RDX: dffffc0000000000 RSI: 0000000000000003 RDI: 000000000000001c
RBP: ffff888076717b40 R08: ffff8880950e0580 R09: ffffed100be5e46d
R10: ffffed100be5e46c R11: ffff88805f2f2363 R12: ffff888065579840
kobject: 'loop5' (0000000018d0d0ee): fill_kobj_path: path = '/devices/virtual/block/loop5'
R13: 1ffff1100ece2f47 R14: 0000000000000013 R15: 0000000000000013
FS:  00007fb88cf43700(0000) GS:ffff8880ae900000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f9a42a41028 CR3: 0000000087a67000 CR4: 00000000001406e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 x25_release+0xd0/0x340 net/x25/af_x25.c:658
 __sock_release+0xd3/0x2b0 net/socket.c:579
 sock_close+0x1b/0x30 net/socket.c:1162
 __fput+0x2df/0x8d0 fs/file_table.c:278
 ____fput+0x16/0x20 fs/file_table.c:309
 task_work_run+0x14a/0x1c0 kernel/task_work.c:113
 get_signal+0x1961/0x1d50 kernel/signal.c:2388
 do_signal+0x87/0x1940 arch/x86/kernel/signal.c:816
 exit_to_usermode_loop+0x244/0x2c0 arch/x86/entry/common.c:162
 prepare_exit_to_usermode arch/x86/entry/common.c:197 [inline]
 syscall_return_slowpath arch/x86/entry/common.c:268 [inline]
 do_syscall_64+0x52d/0x610 arch/x86/entry/common.c:293
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x457f29
Code: ad b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007fb88cf42c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002a
RAX: fffffffffffffe00 RBX: 0000000000000003 RCX: 0000000000457f29
RDX: 0000000000000012 RSI: 0000000020000080 RDI: 0000000000000004
RBP: 000000000073bf00 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 00007fb88cf436d4
R13: 00000000004be462 R14: 00000000004cec98 R15: 00000000ffffffff
Modules linked in:

Fixes: 95d6ebd53c79 ("net/x25: fix use-after-free in x25_device_event()")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: andrew hendry <andrew.hendry@gmail.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/af_x25.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 27171ac6fe3b..20a511398389 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -825,6 +825,7 @@ out_put_neigh:
 		x25_neigh_put(x25->neighbour);
 		x25->neighbour = NULL;
 		read_unlock_bh(&x25_list_lock);
+		x25->state = X25_STATE_0;
 	}
 out_put_route:
 	x25_route_put(rt);
-- 
cgit 


From 5b5f99b186906d198f4455b3add911c87ab361fc Mon Sep 17 00:00:00 2001
From: Zhike Wang <wangzhike@jd.com>
Date: Mon, 11 Mar 2019 03:15:54 -0700
Subject: net_sched: return correct value for *notify* functions

It is confusing to directly use return value of netlink_send()/
netlink_unicast() as the return value of *notify*, as it may be not
error at all.

Example: in tc_del_tfilter(), after calling tfilter_del_notify(), it will
goto errout if (err). However, the netlink_send()/netlink_unicast() will
return positive value even for successful case. So it may not call
tcf_chain_tp_remove() and so on to clean up the resource, as a result,
resource is leaked.

It may be easier to only check the return value of tfilter_del_nofiy(),
but it is more clean to correct all related functions.

Co-developed-by: Zengmo Gao <gaozengmo@jd.com>
Signed-off-by: Zhike Wang <wangzhike@jd.com>
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_api.c | 32 +++++++++++++++++++++++---------
 net/sched/sch_api.c | 15 +++++++++++----
 2 files changed, 34 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 2c2aac4ac721..dc10525e90e7 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1893,6 +1893,7 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
 {
 	struct sk_buff *skb;
 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+	int err = 0;
 
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!skb)
@@ -1906,10 +1907,14 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
 	}
 
 	if (unicast)
-		return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+		err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+	else
+		err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+				     n->nlmsg_flags & NLM_F_ECHO);
 
-	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
-			      n->nlmsg_flags & NLM_F_ECHO);
+	if (err > 0)
+		err = 0;
+	return err;
 }
 
 static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
@@ -1941,12 +1946,15 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
 	}
 
 	if (unicast)
-		return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
-
-	err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
-			     n->nlmsg_flags & NLM_F_ECHO);
+		err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+	else
+		err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+				     n->nlmsg_flags & NLM_F_ECHO);
 	if (err < 0)
 		NL_SET_ERR_MSG(extack, "Failed to send filter delete notification");
+
+	if (err > 0)
+		err = 0;
 	return err;
 }
 
@@ -2688,6 +2696,7 @@ static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
 	struct tcf_block *block = chain->block;
 	struct net *net = block->net;
 	struct sk_buff *skb;
+	int err = 0;
 
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!skb)
@@ -2701,9 +2710,14 @@ static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
 	}
 
 	if (unicast)
-		return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+		err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+	else
+		err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+				     flags & NLM_F_ECHO);
 
-	return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO);
+	if (err > 0)
+		err = 0;
+	return err;
 }
 
 static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 352b46f98440..fb8f138b9776 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1824,6 +1824,7 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb,
 {
 	struct sk_buff *skb;
 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+	int err = 0;
 
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!skb)
@@ -1834,8 +1835,11 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb,
 		return -EINVAL;
 	}
 
-	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
-			      n->nlmsg_flags & NLM_F_ECHO);
+	err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+			     n->nlmsg_flags & NLM_F_ECHO);
+	if (err > 0)
+		err = 0;
+	return err;
 }
 
 static int tclass_del_notify(struct net *net,
@@ -1866,8 +1870,11 @@ static int tclass_del_notify(struct net *net,
 		return err;
 	}
 
-	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
-			      n->nlmsg_flags & NLM_F_ECHO);
+	err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+			     n->nlmsg_flags & NLM_F_ECHO);
+	if (err > 0)
+		err = 0;
+	return err;
 }
 
 #ifdef CONFIG_NET_CLS
-- 
cgit 


From 4504ab0e6eb801555368cbb3011ab0530f659d4b Mon Sep 17 00:00:00 2001
From: Vakul Garg <vakul.garg@nxp.com>
Date: Tue, 12 Mar 2019 08:22:57 +0000
Subject: net/tls: Inform user space about send buffer availability

A previous fix ("tls: Fix write space handling") assumed that
user space application gets informed about the socket send buffer
availability when tls_push_sg() gets called. Inside tls_push_sg(), in
case do_tcp_sendpages() returns 0, the function returns without calling
ctx->sk_write_space. Further, the new function tls_sw_write_space()
did not invoke ctx->sk_write_space. This leads to situation that user
space application encounters a lockup always waiting for socket send
buffer to become available.

Rather than call ctx->sk_write_space from tls_push_sg(), it should be
called from tls_write_space. So whenever tcp stack invokes
sk->sk_write_space after freeing socket send buffer, we always declare
the same to user space by the way of invoking ctx->sk_write_space.

Fixes: 7463d3a2db0ef ("tls: Fix write space handling")
Signed-off-by: Vakul Garg <vakul.garg@nxp.com>
Reviewed-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tls/tls_device.c | 3 ---
 net/tls/tls_main.c   | 3 ++-
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 4a1da837a733..135a7ee9db03 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -558,9 +558,6 @@ void tls_device_write_space(struct sock *sk, struct tls_context *ctx)
 					     MSG_DONTWAIT | MSG_NOSIGNAL);
 		sk->sk_allocation = sk_allocation;
 	}
-
-	if (!rc)
-		ctx->sk_write_space(sk);
 }
 
 void handle_device_resync(struct sock *sk, u32 seq, u64 rcd_sn)
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 17e8667917aa..df921a2904b9 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -146,7 +146,6 @@ retry:
 	}
 
 	ctx->in_tcp_sendpages = false;
-	ctx->sk_write_space(sk);
 
 	return 0;
 }
@@ -228,6 +227,8 @@ static void tls_write_space(struct sock *sk)
 	else
 #endif
 		tls_sw_write_space(sk, ctx);
+
+	ctx->sk_write_space(sk);
 }
 
 static void tls_ctx_free(struct tls_context *ctx)
-- 
cgit 


From 163d1c3d6f17556ed3c340d3789ea93be95d6c28 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 12 Mar 2019 06:50:11 -0700
Subject: l2tp: fix infoleak in l2tp_ip6_recvmsg()

Back in 2013 Hannes took care of most of such leaks in commit
bceaa90240b6 ("inet: prevent leakage of uninitialized memory to user in recv syscalls")

But the bug in l2tp_ip6_recvmsg() has not been fixed.

syzbot report :

BUG: KMSAN: kernel-infoleak in _copy_to_user+0x16b/0x1f0 lib/usercopy.c:32
CPU: 1 PID: 10996 Comm: syz-executor362 Not tainted 5.0.0+ #11
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x173/0x1d0 lib/dump_stack.c:113
 kmsan_report+0x12e/0x2a0 mm/kmsan/kmsan.c:600
 kmsan_internal_check_memory+0x9f4/0xb10 mm/kmsan/kmsan.c:694
 kmsan_copy_to_user+0xab/0xc0 mm/kmsan/kmsan_hooks.c:601
 _copy_to_user+0x16b/0x1f0 lib/usercopy.c:32
 copy_to_user include/linux/uaccess.h:174 [inline]
 move_addr_to_user+0x311/0x570 net/socket.c:227
 ___sys_recvmsg+0xb65/0x1310 net/socket.c:2283
 do_recvmmsg+0x646/0x10c0 net/socket.c:2390
 __sys_recvmmsg net/socket.c:2469 [inline]
 __do_sys_recvmmsg net/socket.c:2492 [inline]
 __se_sys_recvmmsg+0x1d1/0x350 net/socket.c:2485
 __x64_sys_recvmmsg+0x62/0x80 net/socket.c:2485
 do_syscall_64+0xbc/0xf0 arch/x86/entry/common.c:291
 entry_SYSCALL_64_after_hwframe+0x63/0xe7
RIP: 0033:0x445819
Code: e8 6c b6 02 00 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 2b 12 fc ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007f64453eddb8 EFLAGS: 00000246 ORIG_RAX: 000000000000012b
RAX: ffffffffffffffda RBX: 00000000006dac28 RCX: 0000000000445819
RDX: 0000000000000005 RSI: 0000000020002f80 RDI: 0000000000000003
RBP: 00000000006dac20 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 00000000006dac2c
R13: 00007ffeba8f87af R14: 00007f64453ee9c0 R15: 20c49ba5e353f7cf

Local variable description: ----addr@___sys_recvmsg
Variable was created at:
 ___sys_recvmsg+0xf6/0x1310 net/socket.c:2244
 do_recvmmsg+0x646/0x10c0 net/socket.c:2390

Bytes 0-31 of 32 are uninitialized
Memory access of size 32 starts at ffff8880ae62fbb0
Data copied to user address 0000000020000000

Fixes: a32e0eec7042 ("l2tp: introduce L2TPv3 IP encapsulation support for IPv6")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_ip6.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 0ae6899edac0..37a69df17cab 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -674,9 +674,6 @@ static int l2tp_ip6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 	if (flags & MSG_OOB)
 		goto out;
 
-	if (addr_len)
-		*addr_len = sizeof(*lsa);
-
 	if (flags & MSG_ERRQUEUE)
 		return ipv6_recv_error(sk, msg, len, addr_len);
 
@@ -706,6 +703,7 @@ static int l2tp_ip6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 		lsa->l2tp_conn_id = 0;
 		if (ipv6_addr_type(&lsa->l2tp_addr) & IPV6_ADDR_LINKLOCAL)
 			lsa->l2tp_scope_id = inet6_iif(skb);
+		*addr_len = sizeof(*lsa);
 	}
 
 	if (np->rxopt.all)
-- 
cgit