From 46f7487e161b195a1bd7ddbd9c6aba9c93ec881a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 6 Mar 2019 12:44:17 +0100 Subject: netfilter: nat: don't register device notifier twice Otherwise, we get notifier list corruption. This is the most simple fix: remove the device notifier call chain from the ipv6 masquerade register function and handle it only in the ipv4 version. The better fix is merge nf_nat_masquerade_ipv4/6_(un)register_notifier into a single nf_nat_masquerade_(un)register_notifiers but to do this its needed to first merge the two masquerade modules into a single xt_MASQUERADE. Furthermore, we need to use different refcounts for ipv4/ipv6 until we can merge MASQUERADE. Fixes: d1aca8ab3104a ("netfilter: nat: merge ipv4 and ipv6 masquerade functionality") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_nat_masquerade.c | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c index 86fa4dcc63c5..d85c4d902e7b 100644 --- a/net/netfilter/nf_nat_masquerade.c +++ b/net/netfilter/nf_nat_masquerade.c @@ -11,7 +11,8 @@ #include static DEFINE_MUTEX(masq_mutex); -static unsigned int masq_refcnt __read_mostly; +static unsigned int masq_refcnt4 __read_mostly; +static unsigned int masq_refcnt6 __read_mostly; unsigned int nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, @@ -141,8 +142,13 @@ int nf_nat_masquerade_ipv4_register_notifier(void) int ret = 0; mutex_lock(&masq_mutex); + if (WARN_ON_ONCE(masq_refcnt4 == UINT_MAX)) { + ret = -EOVERFLOW; + goto out_unlock; + } + /* check if the notifier was already set */ - if (++masq_refcnt > 1) + if (++masq_refcnt4 > 1) goto out_unlock; /* Register for device down reports */ @@ -160,7 +166,7 @@ int nf_nat_masquerade_ipv4_register_notifier(void) err_unregister: unregister_netdevice_notifier(&masq_dev_notifier); err_dec: - masq_refcnt--; + masq_refcnt4--; out_unlock: mutex_unlock(&masq_mutex); return ret; @@ -171,7 +177,7 @@ void nf_nat_masquerade_ipv4_unregister_notifier(void) { mutex_lock(&masq_mutex); /* check if the notifier still has clients */ - if (--masq_refcnt > 0) + if (--masq_refcnt4 > 0) goto out_unlock; unregister_netdevice_notifier(&masq_dev_notifier); @@ -321,25 +327,23 @@ int nf_nat_masquerade_ipv6_register_notifier(void) int ret = 0; mutex_lock(&masq_mutex); - /* check if the notifier is already set */ - if (++masq_refcnt > 1) + if (WARN_ON_ONCE(masq_refcnt6 == UINT_MAX)) { + ret = -EOVERFLOW; goto out_unlock; + } - ret = register_netdevice_notifier(&masq_dev_notifier); - if (ret) - goto err_dec; + /* check if the notifier is already set */ + if (++masq_refcnt6 > 1) + goto out_unlock; ret = register_inet6addr_notifier(&masq_inet6_notifier); if (ret) - goto err_unregister; + goto err_dec; mutex_unlock(&masq_mutex); return ret; - -err_unregister: - unregister_netdevice_notifier(&masq_dev_notifier); err_dec: - masq_refcnt--; + masq_refcnt6--; out_unlock: mutex_unlock(&masq_mutex); return ret; @@ -350,11 +354,10 @@ void nf_nat_masquerade_ipv6_unregister_notifier(void) { mutex_lock(&masq_mutex); /* check if the notifier still has clients */ - if (--masq_refcnt > 0) + if (--masq_refcnt6 > 0) goto out_unlock; unregister_inet6addr_notifier(&masq_inet6_notifier); - unregister_netdevice_notifier(&masq_dev_notifier); out_unlock: mutex_unlock(&masq_mutex); } -- cgit From 40ba1d9b4d19796afc9b7ece872f5f3e8f5e2c13 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 8 Mar 2019 00:58:53 +0100 Subject: netfilter: nf_tables: fix set double-free in abort path The abort path can cause a double-free of an anonymous set. Added-and-to-be-aborted rule looks like this: udp dport { 137, 138 } drop The to-be-aborted transaction list looks like this: newset newsetelem newsetelem rule This gets walked in reverse order, so first pass disables the rule, the set elements, then the set. After synchronize_rcu(), we then destroy those in same order: rule, set element, set element, newset. Problem is that the anonymous set has already been bound to the rule, so the rule (lookup expression destructor) already frees the set, when then cause use-after-free when trying to delete the elements from this set, then try to free the set again when handling the newset expression. Rule releases the bound set in first place from the abort path, this causes the use-after-free on set element removal when undoing the new element transactions. To handle this, skip new element transaction if set is bound from the abort path. This is still causes the use-after-free on set element removal. To handle this, remove transaction from the list when the set is already bound. Joint work with Florian Westphal. Fixes: f6ac85858976 ("netfilter: nf_tables: unbind set in rule from commit path") Bugzilla: https://bugzilla.netfilter.org/show_bug.cgi?id=1325 Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index faf6bd10a19f..1333bf97dc26 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -142,7 +142,7 @@ static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set) list_for_each_entry_reverse(trans, &net->nft.commit_list, list) { if (trans->msg_type == NFT_MSG_NEWSET && nft_trans_set(trans) == set) { - nft_trans_set_bound(trans) = true; + set->bound = true; break; } } @@ -6709,8 +6709,7 @@ static void nf_tables_abort_release(struct nft_trans *trans) nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); break; case NFT_MSG_NEWSET: - if (!nft_trans_set_bound(trans)) - nft_set_destroy(nft_trans_set(trans)); + nft_set_destroy(nft_trans_set(trans)); break; case NFT_MSG_NEWSETELEM: nft_set_elem_destroy(nft_trans_elem_set(trans), @@ -6783,8 +6782,11 @@ static int __nf_tables_abort(struct net *net) break; case NFT_MSG_NEWSET: trans->ctx.table->use--; - if (!nft_trans_set_bound(trans)) - list_del_rcu(&nft_trans_set(trans)->list); + if (nft_trans_set(trans)->bound) { + nft_trans_destroy(trans); + break; + } + list_del_rcu(&nft_trans_set(trans)->list); break; case NFT_MSG_DELSET: trans->ctx.table->use++; @@ -6792,8 +6794,11 @@ static int __nf_tables_abort(struct net *net) nft_trans_destroy(trans); break; case NFT_MSG_NEWSETELEM: + if (nft_trans_elem_set(trans)->bound) { + nft_trans_destroy(trans); + break; + } te = (struct nft_trans_elem *)trans->data; - te->set->ops->remove(net, te->set, &te->elem); atomic_dec(&te->set->nelems); break; -- cgit From 273fe3f1006ea5ebc63d6729e43e8e45e32b256a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 8 Mar 2019 15:30:03 +0100 Subject: netfilter: nf_tables: bogus EBUSY when deleting set after flush MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Set deletion after flush coming in the same batch results in EBUSY. Add set use counter to track the number of references to this set from rules. We cannot rely on the list of bindings for this since such list is still populated from the preparation phase. Reported-by: Václav Zindulka Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 28 +++++++++++++++++++++++++++- net/netfilter/nft_dynset.c | 13 +++++++++---- net/netfilter/nft_lookup.c | 13 +++++++++---- net/netfilter/nft_objref.c | 13 +++++++++---- 4 files changed, 54 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 1333bf97dc26..ca09ef037ca5 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3672,6 +3672,9 @@ err1: static void nft_set_destroy(struct nft_set *set) { + if (WARN_ON(set->use > 0)) + return; + set->ops->destroy(set); module_put(to_set_type(set->ops)->owner); kfree(set->name); @@ -3712,7 +3715,7 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk, NL_SET_BAD_ATTR(extack, attr); return PTR_ERR(set); } - if (!list_empty(&set->bindings) || + if (set->use || (nlh->nlmsg_flags & NLM_F_NONREC && atomic_read(&set->nelems) > 0)) { NL_SET_BAD_ATTR(extack, attr); return -EBUSY; @@ -3742,6 +3745,9 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *i; struct nft_set_iter iter; + if (set->use == UINT_MAX) + return -EOVERFLOW; + if (!list_empty(&set->bindings) && nft_set_is_anonymous(set)) return -EBUSY; @@ -3769,6 +3775,7 @@ bind: binding->chain = ctx->chain; list_add_tail_rcu(&binding->list, &set->bindings); nft_set_trans_bind(ctx, set); + set->use++; return 0; } @@ -3788,6 +3795,25 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, } EXPORT_SYMBOL_GPL(nf_tables_unbind_set); +void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, + struct nft_set_binding *binding, + enum nft_trans_phase phase) +{ + switch (phase) { + case NFT_TRANS_PREPARE: + set->use--; + return; + case NFT_TRANS_ABORT: + case NFT_TRANS_RELEASE: + set->use--; + /* fall through */ + default: + nf_tables_unbind_set(ctx, set, binding, + phase == NFT_TRANS_COMMIT); + } +} +EXPORT_SYMBOL_GPL(nf_tables_deactivate_set); + void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set) { if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index a8a74a16f9c4..e461007558e8 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -240,11 +240,15 @@ static void nft_dynset_deactivate(const struct nft_ctx *ctx, { struct nft_dynset *priv = nft_expr_priv(expr); - if (phase == NFT_TRANS_PREPARE) - return; + nf_tables_deactivate_set(ctx, priv->set, &priv->binding, phase); +} + +static void nft_dynset_activate(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + struct nft_dynset *priv = nft_expr_priv(expr); - nf_tables_unbind_set(ctx, priv->set, &priv->binding, - phase == NFT_TRANS_COMMIT); + priv->set->use++; } static void nft_dynset_destroy(const struct nft_ctx *ctx, @@ -292,6 +296,7 @@ static const struct nft_expr_ops nft_dynset_ops = { .eval = nft_dynset_eval, .init = nft_dynset_init, .destroy = nft_dynset_destroy, + .activate = nft_dynset_activate, .deactivate = nft_dynset_deactivate, .dump = nft_dynset_dump, }; diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index 14496da5141d..161c3451a747 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -127,11 +127,15 @@ static void nft_lookup_deactivate(const struct nft_ctx *ctx, { struct nft_lookup *priv = nft_expr_priv(expr); - if (phase == NFT_TRANS_PREPARE) - return; + nf_tables_deactivate_set(ctx, priv->set, &priv->binding, phase); +} + +static void nft_lookup_activate(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + struct nft_lookup *priv = nft_expr_priv(expr); - nf_tables_unbind_set(ctx, priv->set, &priv->binding, - phase == NFT_TRANS_COMMIT); + priv->set->use++; } static void nft_lookup_destroy(const struct nft_ctx *ctx, @@ -222,6 +226,7 @@ static const struct nft_expr_ops nft_lookup_ops = { .size = NFT_EXPR_SIZE(sizeof(struct nft_lookup)), .eval = nft_lookup_eval, .init = nft_lookup_init, + .activate = nft_lookup_activate, .deactivate = nft_lookup_deactivate, .destroy = nft_lookup_destroy, .dump = nft_lookup_dump, diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c index 79ef074c18ca..457a9ceb46af 100644 --- a/net/netfilter/nft_objref.c +++ b/net/netfilter/nft_objref.c @@ -162,11 +162,15 @@ static void nft_objref_map_deactivate(const struct nft_ctx *ctx, { struct nft_objref_map *priv = nft_expr_priv(expr); - if (phase == NFT_TRANS_PREPARE) - return; + nf_tables_deactivate_set(ctx, priv->set, &priv->binding, phase); +} + +static void nft_objref_map_activate(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + struct nft_objref_map *priv = nft_expr_priv(expr); - nf_tables_unbind_set(ctx, priv->set, &priv->binding, - phase == NFT_TRANS_COMMIT); + priv->set->use++; } static void nft_objref_map_destroy(const struct nft_ctx *ctx, @@ -183,6 +187,7 @@ static const struct nft_expr_ops nft_objref_map_ops = { .size = NFT_EXPR_SIZE(sizeof(struct nft_objref_map)), .eval = nft_objref_map_eval, .init = nft_objref_map_init, + .activate = nft_objref_map_activate, .deactivate = nft_objref_map_deactivate, .destroy = nft_objref_map_destroy, .dump = nft_objref_map_dump, -- cgit From 3f3a390dbd59d236f62cff8e8b20355ef7069e3d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 11 Mar 2019 13:04:16 +0100 Subject: netfilter: nf_tables: use-after-free in dynamic operations Smatch reports: net/netfilter/nf_tables_api.c:2167 nf_tables_expr_destroy() error: dereferencing freed memory 'expr->ops' net/netfilter/nf_tables_api.c 2162 static void nf_tables_expr_destroy(const struct nft_ctx *ctx, 2163 struct nft_expr *expr) 2164 { 2165 if (expr->ops->destroy) 2166 expr->ops->destroy(ctx, expr); ^^^^ --> 2167 module_put(expr->ops->type->owner); ^^^^^^^^^ 2168 } Smatch says there are three functions which free expr->ops. Fixes: b8e204006340 ("netfilter: nft_compat: use .release_ops and remove list of extension") Reported-by: Dan Carpenter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index ca09ef037ca5..9d8f51dfc593 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2162,9 +2162,11 @@ err1: static void nf_tables_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr) { + const struct nft_expr_type *type = expr->ops->type; + if (expr->ops->destroy) expr->ops->destroy(ctx, expr); - module_put(expr->ops->type->owner); + module_put(type->owner); } struct nft_expr *nft_expr_init(const struct nft_ctx *ctx, -- cgit From a843dc4ebaecd15fca1f4d35a97210f72ea1473b Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Mon, 11 Mar 2019 16:29:32 +0800 Subject: net: sit: fix UBSAN Undefined behaviour in check_6rd In func check_6rd,tunnel->ip6rd.relay_prefixlen may equal to 32,so UBSAN complain about it. UBSAN: Undefined behaviour in net/ipv6/sit.c:781:47 shift exponent 32 is too large for 32-bit type 'unsigned int' CPU: 6 PID: 20036 Comm: syz-executor.0 Not tainted 4.19.27 #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0xca/0x13e lib/dump_stack.c:113 ubsan_epilogue+0xe/0x81 lib/ubsan.c:159 __ubsan_handle_shift_out_of_bounds+0x293/0x2e8 lib/ubsan.c:425 check_6rd.constprop.9+0x433/0x4e0 net/ipv6/sit.c:781 try_6rd net/ipv6/sit.c:806 [inline] ipip6_tunnel_xmit net/ipv6/sit.c:866 [inline] sit_tunnel_xmit+0x141c/0x2720 net/ipv6/sit.c:1033 __netdev_start_xmit include/linux/netdevice.h:4300 [inline] netdev_start_xmit include/linux/netdevice.h:4309 [inline] xmit_one net/core/dev.c:3243 [inline] dev_hard_start_xmit+0x17c/0x780 net/core/dev.c:3259 __dev_queue_xmit+0x1656/0x2500 net/core/dev.c:3829 neigh_output include/net/neighbour.h:501 [inline] ip6_finish_output2+0xa36/0x2290 net/ipv6/ip6_output.c:120 ip6_finish_output+0x3e7/0xa20 net/ipv6/ip6_output.c:154 NF_HOOK_COND include/linux/netfilter.h:278 [inline] ip6_output+0x1e2/0x720 net/ipv6/ip6_output.c:171 dst_output include/net/dst.h:444 [inline] ip6_local_out+0x99/0x170 net/ipv6/output_core.c:176 ip6_send_skb+0x9d/0x2f0 net/ipv6/ip6_output.c:1697 ip6_push_pending_frames+0xc0/0x100 net/ipv6/ip6_output.c:1717 rawv6_push_pending_frames net/ipv6/raw.c:616 [inline] rawv6_sendmsg+0x2435/0x3530 net/ipv6/raw.c:946 inet_sendmsg+0xf8/0x5c0 net/ipv4/af_inet.c:798 sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg+0xc8/0x110 net/socket.c:631 ___sys_sendmsg+0x6cf/0x890 net/socket.c:2114 __sys_sendmsg+0xf0/0x1b0 net/socket.c:2152 do_syscall_64+0xc8/0x580 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Signed-off-by: linmiaohe Signed-off-by: David S. Miller --- net/ipv6/sit.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 09e440e8dfae..07e21a82ce4c 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -778,8 +778,9 @@ static bool check_6rd(struct ip_tunnel *tunnel, const struct in6_addr *v6dst, pbw0 = tunnel->ip6rd.prefixlen >> 5; pbi0 = tunnel->ip6rd.prefixlen & 0x1f; - d = (ntohl(v6dst->s6_addr32[pbw0]) << pbi0) >> - tunnel->ip6rd.relay_prefixlen; + d = tunnel->ip6rd.relay_prefixlen < 32 ? + (ntohl(v6dst->s6_addr32[pbw0]) << pbi0) >> + tunnel->ip6rd.relay_prefixlen : 0; pbi1 = pbi0 - tunnel->ip6rd.relay_prefixlen; if (pbi1 > 0) -- cgit From b8b27498659c65034032af79842913844a6cc79a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 7 Mar 2019 23:20:11 +0100 Subject: netfilter: nf_tables: return immediately on empty commit When running 'nft flush ruleset' while no rules exist, we will increment the generation counter and announce a new genid to userspace, yet nothing had changed in the first place. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 9d8f51dfc593..513f93118604 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -6564,6 +6564,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) struct nft_chain *chain; struct nft_table *table; + if (list_empty(&net->nft.commit_list)) { + mutex_unlock(&net->nft.commit_mutex); + return 0; + } + /* 0. Validate ruleset, otherwise roll back for error reporting. */ if (nf_tables_validate(net) < 0) return -EAGAIN; -- cgit From f2feaefdabb0a6253aa020f65e7388f07a9ed47c Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Mon, 11 Mar 2019 11:41:05 -0700 Subject: tcp: Don't access TCP_SKB_CB before initializing it Since commit eeea10b83a13 ("tcp: add tcp_v4_fill_cb()/tcp_v4_restore_cb()"), tcp_vX_fill_cb is only called after tcp_filter(). That means, TCP_SKB_CB(skb)->end_seq still points to the IP-part of the cb. We thus should not mock with it, as this can trigger bugs (thanks syzkaller): [ 12.349396] ================================================================== [ 12.350188] BUG: KASAN: slab-out-of-bounds in ip6_datagram_recv_specific_ctl+0x19b3/0x1a20 [ 12.351035] Read of size 1 at addr ffff88006adbc208 by task test_ip6_datagr/1799 Setting end_seq is actually no more necessary in tcp_filter as it gets initialized later on in tcp_vX_fill_cb. Cc: Eric Dumazet Fixes: eeea10b83a13 ("tcp: add tcp_v4_fill_cb()/tcp_v4_restore_cb()") Signed-off-by: Christoph Paasch Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 831d844a27ca..277d71239d75 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1734,15 +1734,8 @@ EXPORT_SYMBOL(tcp_add_backlog); int tcp_filter(struct sock *sk, struct sk_buff *skb) { struct tcphdr *th = (struct tcphdr *)skb->data; - unsigned int eaten = skb->len; - int err; - err = sk_filter_trim_cap(sk, skb, th->doff * 4); - if (!err) { - eaten -= skb->len; - TCP_SKB_CB(skb)->end_seq -= eaten; - } - return err; + return sk_filter_trim_cap(sk, skb, th->doff * 4); } EXPORT_SYMBOL(tcp_filter); -- cgit From ee74d0bd4325efb41e38affe5955f920ed973f23 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 11 Mar 2019 13:48:44 -0700 Subject: net/x25: reset state in x25_connect() In case x25_connect() fails and frees the socket neighbour, we also need to undo the change done to x25->state. Before my last bug fix, we had use-after-free so this patch fixes a latent bug. syzbot report : kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] PREEMPT SMP KASAN CPU: 1 PID: 16137 Comm: syz-executor.1 Not tainted 5.0.0+ #117 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:x25_write_internal+0x1e8/0xdf0 net/x25/x25_subr.c:173 Code: 00 40 88 b5 e0 fe ff ff 0f 85 01 0b 00 00 48 8b 8b 80 04 00 00 48 ba 00 00 00 00 00 fc ff df 48 8d 79 1c 48 89 fe 48 c1 ee 03 <0f> b6 34 16 48 89 fa 83 e2 07 83 c2 03 40 38 f2 7c 09 40 84 f6 0f RSP: 0018:ffff888076717a08 EFLAGS: 00010207 RAX: ffff88805f2f2292 RBX: ffff8880a0ae6000 RCX: 0000000000000000 kobject: 'loop5' (0000000018d0d0ee): kobject_uevent_env RDX: dffffc0000000000 RSI: 0000000000000003 RDI: 000000000000001c RBP: ffff888076717b40 R08: ffff8880950e0580 R09: ffffed100be5e46d R10: ffffed100be5e46c R11: ffff88805f2f2363 R12: ffff888065579840 kobject: 'loop5' (0000000018d0d0ee): fill_kobj_path: path = '/devices/virtual/block/loop5' R13: 1ffff1100ece2f47 R14: 0000000000000013 R15: 0000000000000013 FS: 00007fb88cf43700(0000) GS:ffff8880ae900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f9a42a41028 CR3: 0000000087a67000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: x25_release+0xd0/0x340 net/x25/af_x25.c:658 __sock_release+0xd3/0x2b0 net/socket.c:579 sock_close+0x1b/0x30 net/socket.c:1162 __fput+0x2df/0x8d0 fs/file_table.c:278 ____fput+0x16/0x20 fs/file_table.c:309 task_work_run+0x14a/0x1c0 kernel/task_work.c:113 get_signal+0x1961/0x1d50 kernel/signal.c:2388 do_signal+0x87/0x1940 arch/x86/kernel/signal.c:816 exit_to_usermode_loop+0x244/0x2c0 arch/x86/entry/common.c:162 prepare_exit_to_usermode arch/x86/entry/common.c:197 [inline] syscall_return_slowpath arch/x86/entry/common.c:268 [inline] do_syscall_64+0x52d/0x610 arch/x86/entry/common.c:293 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x457f29 Code: ad b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fb88cf42c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002a RAX: fffffffffffffe00 RBX: 0000000000000003 RCX: 0000000000457f29 RDX: 0000000000000012 RSI: 0000000020000080 RDI: 0000000000000004 RBP: 000000000073bf00 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007fb88cf436d4 R13: 00000000004be462 R14: 00000000004cec98 R15: 00000000ffffffff Modules linked in: Fixes: 95d6ebd53c79 ("net/x25: fix use-after-free in x25_device_event()") Signed-off-by: Eric Dumazet Cc: andrew hendry Reported-by: syzbot Signed-off-by: David S. Miller --- net/x25/af_x25.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 27171ac6fe3b..20a511398389 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -825,6 +825,7 @@ out_put_neigh: x25_neigh_put(x25->neighbour); x25->neighbour = NULL; read_unlock_bh(&x25_list_lock); + x25->state = X25_STATE_0; } out_put_route: x25_route_put(rt); -- cgit From 5b5f99b186906d198f4455b3add911c87ab361fc Mon Sep 17 00:00:00 2001 From: Zhike Wang Date: Mon, 11 Mar 2019 03:15:54 -0700 Subject: net_sched: return correct value for *notify* functions It is confusing to directly use return value of netlink_send()/ netlink_unicast() as the return value of *notify*, as it may be not error at all. Example: in tc_del_tfilter(), after calling tfilter_del_notify(), it will goto errout if (err). However, the netlink_send()/netlink_unicast() will return positive value even for successful case. So it may not call tcf_chain_tp_remove() and so on to clean up the resource, as a result, resource is leaked. It may be easier to only check the return value of tfilter_del_nofiy(), but it is more clean to correct all related functions. Co-developed-by: Zengmo Gao Signed-off-by: Zhike Wang Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_api.c | 32 +++++++++++++++++++++++--------- net/sched/sch_api.c | 15 +++++++++++---- 2 files changed, 34 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 2c2aac4ac721..dc10525e90e7 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1893,6 +1893,7 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb, { struct sk_buff *skb; u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; + int err = 0; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) @@ -1906,10 +1907,14 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb, } if (unicast) - return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT); + err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT); + else + err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); - return rtnetlink_send(skb, net, portid, RTNLGRP_TC, - n->nlmsg_flags & NLM_F_ECHO); + if (err > 0) + err = 0; + return err; } static int tfilter_del_notify(struct net *net, struct sk_buff *oskb, @@ -1941,12 +1946,15 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb, } if (unicast) - return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT); - - err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, - n->nlmsg_flags & NLM_F_ECHO); + err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT); + else + err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); if (err < 0) NL_SET_ERR_MSG(extack, "Failed to send filter delete notification"); + + if (err > 0) + err = 0; return err; } @@ -2688,6 +2696,7 @@ static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb, struct tcf_block *block = chain->block; struct net *net = block->net; struct sk_buff *skb; + int err = 0; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) @@ -2701,9 +2710,14 @@ static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb, } if (unicast) - return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT); + err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT); + else + err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, + flags & NLM_F_ECHO); - return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO); + if (err > 0) + err = 0; + return err; } static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops, diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 352b46f98440..fb8f138b9776 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1824,6 +1824,7 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb, { struct sk_buff *skb; u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; + int err = 0; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) @@ -1834,8 +1835,11 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb, return -EINVAL; } - return rtnetlink_send(skb, net, portid, RTNLGRP_TC, - n->nlmsg_flags & NLM_F_ECHO); + err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); + if (err > 0) + err = 0; + return err; } static int tclass_del_notify(struct net *net, @@ -1866,8 +1870,11 @@ static int tclass_del_notify(struct net *net, return err; } - return rtnetlink_send(skb, net, portid, RTNLGRP_TC, - n->nlmsg_flags & NLM_F_ECHO); + err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); + if (err > 0) + err = 0; + return err; } #ifdef CONFIG_NET_CLS -- cgit From 4504ab0e6eb801555368cbb3011ab0530f659d4b Mon Sep 17 00:00:00 2001 From: Vakul Garg Date: Tue, 12 Mar 2019 08:22:57 +0000 Subject: net/tls: Inform user space about send buffer availability A previous fix ("tls: Fix write space handling") assumed that user space application gets informed about the socket send buffer availability when tls_push_sg() gets called. Inside tls_push_sg(), in case do_tcp_sendpages() returns 0, the function returns without calling ctx->sk_write_space. Further, the new function tls_sw_write_space() did not invoke ctx->sk_write_space. This leads to situation that user space application encounters a lockup always waiting for socket send buffer to become available. Rather than call ctx->sk_write_space from tls_push_sg(), it should be called from tls_write_space. So whenever tcp stack invokes sk->sk_write_space after freeing socket send buffer, we always declare the same to user space by the way of invoking ctx->sk_write_space. Fixes: 7463d3a2db0ef ("tls: Fix write space handling") Signed-off-by: Vakul Garg Reviewed-by: Boris Pismenny Signed-off-by: David S. Miller --- net/tls/tls_device.c | 3 --- net/tls/tls_main.c | 3 ++- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 4a1da837a733..135a7ee9db03 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -558,9 +558,6 @@ void tls_device_write_space(struct sock *sk, struct tls_context *ctx) MSG_DONTWAIT | MSG_NOSIGNAL); sk->sk_allocation = sk_allocation; } - - if (!rc) - ctx->sk_write_space(sk); } void handle_device_resync(struct sock *sk, u32 seq, u64 rcd_sn) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 17e8667917aa..df921a2904b9 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -146,7 +146,6 @@ retry: } ctx->in_tcp_sendpages = false; - ctx->sk_write_space(sk); return 0; } @@ -228,6 +227,8 @@ static void tls_write_space(struct sock *sk) else #endif tls_sw_write_space(sk, ctx); + + ctx->sk_write_space(sk); } static void tls_ctx_free(struct tls_context *ctx) -- cgit From 163d1c3d6f17556ed3c340d3789ea93be95d6c28 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Mar 2019 06:50:11 -0700 Subject: l2tp: fix infoleak in l2tp_ip6_recvmsg() Back in 2013 Hannes took care of most of such leaks in commit bceaa90240b6 ("inet: prevent leakage of uninitialized memory to user in recv syscalls") But the bug in l2tp_ip6_recvmsg() has not been fixed. syzbot report : BUG: KMSAN: kernel-infoleak in _copy_to_user+0x16b/0x1f0 lib/usercopy.c:32 CPU: 1 PID: 10996 Comm: syz-executor362 Not tainted 5.0.0+ #11 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x173/0x1d0 lib/dump_stack.c:113 kmsan_report+0x12e/0x2a0 mm/kmsan/kmsan.c:600 kmsan_internal_check_memory+0x9f4/0xb10 mm/kmsan/kmsan.c:694 kmsan_copy_to_user+0xab/0xc0 mm/kmsan/kmsan_hooks.c:601 _copy_to_user+0x16b/0x1f0 lib/usercopy.c:32 copy_to_user include/linux/uaccess.h:174 [inline] move_addr_to_user+0x311/0x570 net/socket.c:227 ___sys_recvmsg+0xb65/0x1310 net/socket.c:2283 do_recvmmsg+0x646/0x10c0 net/socket.c:2390 __sys_recvmmsg net/socket.c:2469 [inline] __do_sys_recvmmsg net/socket.c:2492 [inline] __se_sys_recvmmsg+0x1d1/0x350 net/socket.c:2485 __x64_sys_recvmmsg+0x62/0x80 net/socket.c:2485 do_syscall_64+0xbc/0xf0 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 RIP: 0033:0x445819 Code: e8 6c b6 02 00 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 2b 12 fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f64453eddb8 EFLAGS: 00000246 ORIG_RAX: 000000000000012b RAX: ffffffffffffffda RBX: 00000000006dac28 RCX: 0000000000445819 RDX: 0000000000000005 RSI: 0000000020002f80 RDI: 0000000000000003 RBP: 00000000006dac20 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000006dac2c R13: 00007ffeba8f87af R14: 00007f64453ee9c0 R15: 20c49ba5e353f7cf Local variable description: ----addr@___sys_recvmsg Variable was created at: ___sys_recvmsg+0xf6/0x1310 net/socket.c:2244 do_recvmmsg+0x646/0x10c0 net/socket.c:2390 Bytes 0-31 of 32 are uninitialized Memory access of size 32 starts at ffff8880ae62fbb0 Data copied to user address 0000000020000000 Fixes: a32e0eec7042 ("l2tp: introduce L2TPv3 IP encapsulation support for IPv6") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- net/l2tp/l2tp_ip6.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 0ae6899edac0..37a69df17cab 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -674,9 +674,6 @@ static int l2tp_ip6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (flags & MSG_OOB) goto out; - if (addr_len) - *addr_len = sizeof(*lsa); - if (flags & MSG_ERRQUEUE) return ipv6_recv_error(sk, msg, len, addr_len); @@ -706,6 +703,7 @@ static int l2tp_ip6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, lsa->l2tp_conn_id = 0; if (ipv6_addr_type(&lsa->l2tp_addr) & IPV6_ADDR_LINKLOCAL) lsa->l2tp_scope_id = inet6_iif(skb); + *addr_len = sizeof(*lsa); } if (np->rxopt.all) -- cgit