From e010ae08c71fda8be3d6bda256837795a0b3ea41 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 8 Feb 2023 18:13:59 +0100 Subject: ipv6: Fix datagram socket connection with DSCP. Take into account the IPV6_TCLASS socket option (DSCP) in ip6_datagram_flow_key_init(). Otherwise fib6_rule_match() can't properly match the DSCP value, resulting in invalid route lookup. For example: ip route add unreachable table main 2001:db8::10/124 ip route add table 100 2001:db8::10/124 dev eth0 ip -6 rule add dsfield 0x04 table 100 echo test | socat - UDP6:[2001:db8::11]:54321,ipv6-tclass=0x04 Without this patch, socat fails at connect() time ("No route to host") because the fib-rule doesn't jump to table 100 and the lookup ends up being done in the main table. Fixes: 2cc67cc731d9 ("[IPV6] ROUTE: Routing by Traffic Class.") Signed-off-by: Guillaume Nault Reviewed-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/ipv6/datagram.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index e624497fa992..9b6818453afe 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -51,7 +51,7 @@ static void ip6_datagram_flow_key_init(struct flowi6 *fl6, struct sock *sk) fl6->flowi6_mark = sk->sk_mark; fl6->fl6_dport = inet->inet_dport; fl6->fl6_sport = inet->inet_sport; - fl6->flowlabel = np->flow_label; + fl6->flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label); fl6->flowi6_uid = sk->sk_uid; if (!oif) -- cgit From 8230680f36fd1525303d1117768c8852314c488c Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 8 Feb 2023 18:14:03 +0100 Subject: ipv6: Fix tcp socket connection with DSCP. Take into account the IPV6_TCLASS socket option (DSCP) in tcp_v6_connect(). Otherwise fib6_rule_match() can't properly match the DSCP value, resulting in invalid route lookup. For example: ip route add unreachable table main 2001:db8::10/124 ip route add table 100 2001:db8::10/124 dev eth0 ip -6 rule add dsfield 0x04 table 100 echo test | socat - TCP6:[2001:db8::11]:54321,ipv6-tclass=0x04 Without this patch, socat fails at connect() time ("No route to host") because the fib-rule doesn't jump to table 100 and the lookup ends up being done in the main table. Fixes: 2cc67cc731d9 ("[IPV6] ROUTE: Routing by Traffic Class.") Signed-off-by: Guillaume Nault Reviewed-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/ipv6/tcp_ipv6.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 11b736a76bd7..0d25e813288d 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -272,6 +272,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl6.flowi6_proto = IPPROTO_TCP; fl6.daddr = sk->sk_v6_daddr; fl6.saddr = saddr ? *saddr : np->saddr; + fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label); fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = usin->sin6_port; -- cgit From 6e77a5a4af05d5e7391c841a4a4f3e4cadf72c25 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Feb 2023 18:21:23 +0000 Subject: net: initialize net->notrefcnt_tracker earlier syzbot was able to trigger a warning [1] from net_free() calling ref_tracker_dir_exit(&net->notrefcnt_tracker) while the corresponding ref_tracker_dir_init() has not been done yet. copy_net_ns() can indeed bypass the call to setup_net() in some error conditions. Note: We might factorize/move more code in preinit_net() in the future. [1] INFO: trying to register non-static key. The code is fine but needs lockdep annotation, or maybe you didn't initialize this object before use? turning off the locking correctness validator. CPU: 0 PID: 5817 Comm: syz-executor.3 Not tainted 6.2.0-rc7-next-20230208-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/12/2023 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xd9/0x150 lib/dump_stack.c:106 assign_lock_key kernel/locking/lockdep.c:982 [inline] register_lock_class+0xdb6/0x1120 kernel/locking/lockdep.c:1295 __lock_acquire+0x10a/0x5df0 kernel/locking/lockdep.c:4951 lock_acquire.part.0+0x11c/0x370 kernel/locking/lockdep.c:5691 __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline] _raw_spin_lock_irqsave+0x3d/0x60 kernel/locking/spinlock.c:162 ref_tracker_dir_exit+0x52/0x600 lib/ref_tracker.c:24 net_free net/core/net_namespace.c:442 [inline] net_free+0x98/0xd0 net/core/net_namespace.c:436 copy_net_ns+0x4f3/0x6b0 net/core/net_namespace.c:493 create_new_namespaces+0x3f6/0xb20 kernel/nsproxy.c:110 unshare_nsproxy_namespaces+0xc1/0x1f0 kernel/nsproxy.c:228 ksys_unshare+0x449/0x920 kernel/fork.c:3205 __do_sys_unshare kernel/fork.c:3276 [inline] __se_sys_unshare kernel/fork.c:3274 [inline] __x64_sys_unshare+0x31/0x40 kernel/fork.c:3274 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80 Fixes: 0cafd77dcd03 ("net: add a refcount tracker for kernel sockets") Reported-by: syzbot Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20230208182123.3821604-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/net_namespace.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 078a0a420c8a..7b69cf882b8e 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -304,6 +304,12 @@ struct net *get_net_ns_by_id(const struct net *net, int id) } EXPORT_SYMBOL_GPL(get_net_ns_by_id); +/* init code that must occur even if setup_net() is not called. */ +static __net_init void preinit_net(struct net *net) +{ + ref_tracker_dir_init(&net->notrefcnt_tracker, 128); +} + /* * setup_net runs the initializers for the network namespace object. */ @@ -316,7 +322,6 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) refcount_set(&net->ns.count, 1); ref_tracker_dir_init(&net->refcnt_tracker, 128); - ref_tracker_dir_init(&net->notrefcnt_tracker, 128); refcount_set(&net->passive, 1); get_random_bytes(&net->hash_mix, sizeof(u32)); @@ -472,6 +477,8 @@ struct net *copy_net_ns(unsigned long flags, rv = -ENOMEM; goto dec_ucounts; } + + preinit_net(net); refcount_set(&net->passive, 1); net->ucounts = ucounts; get_user_ns(user_ns); @@ -1118,6 +1125,7 @@ void __init net_ns_init(void) init_net.key_domain = &init_net_key_domain; #endif down_write(&pernet_ops_rwsem); + preinit_net(&init_net); if (setup_net(&init_net, &init_user_ns)) panic("Could not setup the initial network namespace"); -- cgit From a1221703a0f75a9d81748c516457e0fc76951496 Mon Sep 17 00:00:00 2001 From: Pietro Borrello Date: Thu, 9 Feb 2023 12:13:05 +0000 Subject: sctp: sctp_sock_filter(): avoid list_entry() on possibly empty list Use list_is_first() to check whether tsp->asoc matches the first element of ep->asocs, as the list is not guaranteed to have an entry. Fixes: 8f840e47f190 ("sctp: add the sctp_diag.c file") Signed-off-by: Pietro Borrello Acked-by: Xin Long Link: https://lore.kernel.org/r/20230208-sctp-filter-v2-1-6e1f4017f326@diag.uniroma1.it Signed-off-by: Jakub Kicinski --- net/sctp/diag.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/sctp/diag.c b/net/sctp/diag.c index a557009e9832..c3d6b92dd386 100644 --- a/net/sctp/diag.c +++ b/net/sctp/diag.c @@ -343,11 +343,9 @@ static int sctp_sock_filter(struct sctp_endpoint *ep, struct sctp_transport *tsp struct sctp_comm_param *commp = p; struct sock *sk = ep->base.sk; const struct inet_diag_req_v2 *r = commp->r; - struct sctp_association *assoc = - list_entry(ep->asocs.next, struct sctp_association, asocs); /* find the ep only once through the transports by this condition */ - if (tsp->asoc != assoc) + if (!list_is_first(&tsp->asoc->asocs, &ep->asocs)) return 0; if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family) -- cgit From ee059170b1f7e94e55fa6cadee544e176a6e59c2 Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Thu, 9 Feb 2023 11:37:39 -0300 Subject: net/sched: tcindex: update imperfect hash filters respecting rcu The imperfect hash area can be updated while packets are traversing, which will cause a use-after-free when 'tcf_exts_exec()' is called with the destroyed tcf_ext. CPU 0: CPU 1: tcindex_set_parms tcindex_classify tcindex_lookup tcindex_lookup tcf_exts_change tcf_exts_exec [UAF] Stop operating on the shared area directly, by using a local copy, and update the filter with 'rcu_replace_pointer()'. Delete the old filter version only after a rcu grace period elapsed. Fixes: 9b0d4446b569 ("net: sched: avoid atomic swap in tcf_exts_change") Reported-by: valis Suggested-by: valis Signed-off-by: Jamal Hadi Salim Signed-off-by: Pedro Tammela Link: https://lore.kernel.org/r/20230209143739.279867-1-pctammela@mojatatu.com Signed-off-by: Jakub Kicinski --- net/sched/cls_tcindex.c | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index ee2a050c887b..ba7f22a49397 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -339,6 +340,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct tcf_result cr = {}; int err, balloc = 0; struct tcf_exts e; + bool update_h = false; err = tcf_exts_init(&e, net, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); if (err < 0) @@ -456,10 +458,13 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, } } - if (cp->perfect) + if (cp->perfect) { r = cp->perfect + handle; - else - r = tcindex_lookup(cp, handle) ? : &new_filter_result; + } else { + /* imperfect area is updated in-place using rcu */ + update_h = !!tcindex_lookup(cp, handle); + r = &new_filter_result; + } if (r == &new_filter_result) { f = kzalloc(sizeof(*f), GFP_KERNEL); @@ -485,7 +490,28 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, rcu_assign_pointer(tp->root, cp); - if (r == &new_filter_result) { + if (update_h) { + struct tcindex_filter __rcu **fp; + struct tcindex_filter *cf; + + f->result.res = r->res; + tcf_exts_change(&f->result.exts, &r->exts); + + /* imperfect area bucket */ + fp = cp->h + (handle % cp->hash); + + /* lookup the filter, guaranteed to exist */ + for (cf = rcu_dereference_bh_rtnl(*fp); cf; + fp = &cf->next, cf = rcu_dereference_bh_rtnl(*fp)) + if (cf->key == handle) + break; + + f->next = cf->next; + + cf = rcu_replace_pointer(*fp, f, 1); + tcf_exts_get_net(&cf->result.exts); + tcf_queue_work(&cf->rwork, tcindex_destroy_fexts_work); + } else if (r == &new_filter_result) { struct tcindex_filter *nfp; struct tcindex_filter __rcu **fp; -- cgit From ca43ccf41224b023fc290073d5603a755fd12eed Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 9 Feb 2023 16:22:01 -0800 Subject: dccp/tcp: Avoid negative sk_forward_alloc by ipv6_pinfo.pktoptions. Eric Dumazet pointed out [0] that when we call skb_set_owner_r() for ipv6_pinfo.pktoptions, sk_rmem_schedule() has not been called, resulting in a negative sk_forward_alloc. We add a new helper which clones a skb and sets its owner only when sk_rmem_schedule() succeeds. Note that we move skb_set_owner_r() forward in (dccp|tcp)_v6_do_rcv() because tcp_send_synack() can make sk_forward_alloc negative before ipv6_opt_accepted() in the crossed SYN-ACK or self-connect() cases. [0]: https://lore.kernel.org/netdev/CANn89iK9oc20Jdi_41jb9URdF210r7d1Y-+uypbMSbOfY6jqrg@mail.gmail.com/ Fixes: 323fbd0edf3f ("net: dccp: Add handling of IPV6_PKTOPTIONS to dccp_v6_do_rcv()") Fixes: 3df80d9320bc ("[DCCP]: Introduce DCCPv6") Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- net/dccp/ipv6.c | 7 ++----- net/ipv6/tcp_ipv6.c | 10 +++------- 2 files changed, 5 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 4260fe466993..b9d7c3dd1cb3 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -551,11 +551,9 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), NULL); /* Clone pktoptions received with SYN, if we own the req */ if (*own_req && ireq->pktopts) { - newnp->pktoptions = skb_clone(ireq->pktopts, GFP_ATOMIC); + newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk); consume_skb(ireq->pktopts); ireq->pktopts = NULL; - if (newnp->pktoptions) - skb_set_owner_r(newnp->pktoptions, newsk); } return newsk; @@ -615,7 +613,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) --ANK (980728) */ if (np->rxopt.all) - opt_skb = skb_clone(skb, GFP_ATOMIC); + opt_skb = skb_clone_and_charge_r(skb, sk); if (sk->sk_state == DCCP_OPEN) { /* Fast path */ if (dccp_rcv_established(sk, skb, dccp_hdr(skb), skb->len)) @@ -679,7 +677,6 @@ ipv6_pktoptions: np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); if (ipv6_opt_accepted(sk, opt_skb, &DCCP_SKB_CB(opt_skb)->header.h6)) { - skb_set_owner_r(opt_skb, sk); memmove(IP6CB(opt_skb), &DCCP_SKB_CB(opt_skb)->header.h6, sizeof(struct inet6_skb_parm)); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 0d25e813288d..a52a4f12f146 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1388,14 +1388,11 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * /* Clone pktoptions received with SYN, if we own the req */ if (ireq->pktopts) { - newnp->pktoptions = skb_clone(ireq->pktopts, - sk_gfp_mask(sk, GFP_ATOMIC)); + newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk); consume_skb(ireq->pktopts); ireq->pktopts = NULL; - if (newnp->pktoptions) { + if (newnp->pktoptions) tcp_v6_restore_cb(newnp->pktoptions); - skb_set_owner_r(newnp->pktoptions, newsk); - } } } else { if (!req_unhash && found_dup_sk) { @@ -1467,7 +1464,7 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) --ANK (980728) */ if (np->rxopt.all) - opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC)); + opt_skb = skb_clone_and_charge_r(skb, sk); reason = SKB_DROP_REASON_NOT_SPECIFIED; if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ @@ -1553,7 +1550,6 @@ ipv6_pktoptions: if (np->repflow) np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) { - skb_set_owner_r(opt_skb, sk); tcp_v6_restore_cb(opt_skb); opt_skb = xchg(&np->pktoptions, opt_skb); } else { -- cgit From 62ec33b44e0f7168ff2886520fec6fb62d03b5a3 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 9 Feb 2023 16:22:02 -0800 Subject: net: Remove WARN_ON_ONCE(sk->sk_forward_alloc) from sk_stream_kill_queues(). Christoph Paasch reported that commit b5fc29233d28 ("inet6: Remove inet6_destroy_sock() in sk->sk_prot->destroy().") started triggering WARN_ON_ONCE(sk->sk_forward_alloc) in sk_stream_kill_queues(). [0 - 2] Also, we can reproduce it by a program in [3]. In the commit, we delay freeing ipv6_pinfo.pktoptions from sk->destroy() to sk->sk_destruct(), so sk->sk_forward_alloc is no longer zero in inet_csk_destroy_sock(). The same check has been in inet_sock_destruct() from at least v2.6, we can just remove the WARN_ON_ONCE(). However, among the users of sk_stream_kill_queues(), only CAIF is not calling inet_sock_destruct(). Thus, we add the same WARN_ON_ONCE() to caif_sock_destructor(). [0]: https://lore.kernel.org/netdev/39725AB4-88F1-41B3-B07F-949C5CAEFF4F@icloud.com/ [1]: https://github.com/multipath-tcp/mptcp_net-next/issues/341 [2]: WARNING: CPU: 0 PID: 3232 at net/core/stream.c:212 sk_stream_kill_queues+0x2f9/0x3e0 Modules linked in: CPU: 0 PID: 3232 Comm: syz-executor.0 Not tainted 6.2.0-rc5ab24eb4698afbe147b424149c529e2a43ec24eb5 #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:sk_stream_kill_queues+0x2f9/0x3e0 Code: 03 0f b6 04 02 84 c0 74 08 3c 03 0f 8e ec 00 00 00 8b ab 08 01 00 00 e9 60 ff ff ff e8 d0 5f b6 fe 0f 0b eb 97 e8 c7 5f b6 fe <0f> 0b eb a0 e8 be 5f b6 fe 0f 0b e9 6a fe ff ff e8 02 07 e3 fe e9 RSP: 0018:ffff88810570fc68 EFLAGS: 00010293 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: ffff888101f38f40 RSI: ffffffff8285e529 RDI: 0000000000000005 RBP: 0000000000000ce0 R08: 0000000000000005 R09: 0000000000000000 R10: 0000000000000ce0 R11: 0000000000000001 R12: ffff8881009e9488 R13: ffffffff84af2cc0 R14: 0000000000000000 R15: ffff8881009e9458 FS: 00007f7fdfbd5800(0000) GS:ffff88811b600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001b32923000 CR3: 00000001062fc006 CR4: 0000000000170ef0 Call Trace: inet_csk_destroy_sock+0x1a1/0x320 __tcp_close+0xab6/0xe90 tcp_close+0x30/0xc0 inet_release+0xe9/0x1f0 inet6_release+0x4c/0x70 __sock_release+0xd2/0x280 sock_close+0x15/0x20 __fput+0x252/0xa20 task_work_run+0x169/0x250 exit_to_user_mode_prepare+0x113/0x120 syscall_exit_to_user_mode+0x1d/0x40 do_syscall_64+0x48/0x90 entry_SYSCALL_64_after_hwframe+0x72/0xdc RIP: 0033:0x7f7fdf7ae28d Code: c1 20 00 00 75 10 b8 03 00 00 00 0f 05 48 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 ee fb ff ff 48 89 04 24 b8 03 00 00 00 0f 05 <48> 8b 3c 24 48 89 c2 e8 37 fc ff ff 48 89 d0 48 83 c4 08 48 3d 01 RSP: 002b:00000000007dfbb0 EFLAGS: 00000293 ORIG_RAX: 0000000000000003 RAX: 0000000000000000 RBX: 0000000000000004 RCX: 00007f7fdf7ae28d RDX: 0000000000000000 RSI: ffffffffffffffff RDI: 0000000000000003 RBP: 0000000000000000 R08: 000000007f338e0f R09: 0000000000000e0f R10: 000000007f338e13 R11: 0000000000000293 R12: 00007f7fdefff000 R13: 00007f7fdefffcd8 R14: 00007f7fdefffce0 R15: 00007f7fdefffcd8 [3]: https://lore.kernel.org/netdev/20230208004245.83497-1-kuniyu@amazon.com/ Fixes: b5fc29233d28 ("inet6: Remove inet6_destroy_sock() in sk->sk_prot->destroy().") Reported-by: syzbot Reported-by: Christoph Paasch Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- net/caif/caif_socket.c | 1 + net/core/stream.c | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 748be7253248..78c9729a6057 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -1015,6 +1015,7 @@ static void caif_sock_destructor(struct sock *sk) return; } sk_stream_kill_queues(&cf_sk->sk); + WARN_ON_ONCE(sk->sk_forward_alloc); caif_free_client(&cf_sk->layer); } diff --git a/net/core/stream.c b/net/core/stream.c index cd06750dd329..434446ab14c5 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -209,7 +209,6 @@ void sk_stream_kill_queues(struct sock *sk) sk_mem_reclaim_final(sk); WARN_ON_ONCE(sk->sk_wmem_queued); - WARN_ON_ONCE(sk->sk_forward_alloc); /* It is _impossible_ for the backlog to contain anything * when we get here. All user references to this socket -- cgit From 2f4796518315ab246638db8feebfcb494212e7ee Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Thu, 9 Feb 2023 01:16:48 -0800 Subject: af_key: Fix heap information leak Since x->encap of pfkey_msg2xfrm_state() is not initialized to 0, kernel heap data can be leaked. Fix with kzalloc() to prevent this. Signed-off-by: Hyunwoo Kim Acked-by: Herbert Xu Reviewed-by: Sabrina Dubroca Signed-off-by: David S. Miller --- net/key/af_key.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/key/af_key.c b/net/key/af_key.c index 2bdbcec781cd..a815f5ab4c49 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1261,7 +1261,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, const struct sadb_x_nat_t_type* n_type; struct xfrm_encap_tmpl *natt; - x->encap = kmalloc(sizeof(*x->encap), GFP_KERNEL); + x->encap = kzalloc(sizeof(*x->encap), GFP_KERNEL); if (!x->encap) { err = -ENOMEM; goto out; -- cgit From 2fa28f5c6fcbfc794340684f36d2581b4f2d20b5 Mon Sep 17 00:00:00 2001 From: Hangyu Hua Date: Fri, 10 Feb 2023 10:05:51 +0800 Subject: net: openvswitch: fix possible memory leak in ovs_meter_cmd_set() old_meter needs to be free after it is detached regardless of whether the new meter is successfully attached. Fixes: c7c4c44c9a95 ("net: openvswitch: expand the meters supported number") Signed-off-by: Hangyu Hua Acked-by: Eelco Chaudron Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/openvswitch/meter.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c index 6e38f68f88c2..f2698d2316df 100644 --- a/net/openvswitch/meter.c +++ b/net/openvswitch/meter.c @@ -449,7 +449,7 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info) err = attach_meter(meter_tbl, meter); if (err) - goto exit_unlock; + goto exit_free_old_meter; ovs_unlock(); @@ -472,6 +472,8 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info) genlmsg_end(reply, ovs_reply_header); return genlmsg_reply(reply, info); +exit_free_old_meter: + ovs_meter_free(old_meter); exit_unlock: ovs_unlock(); nlmsg_free(reply); -- cgit From 9b55d3f0a69af649c62cbc2633e6d695bb3cc583 Mon Sep 17 00:00:00 2001 From: Felix Riemann Date: Fri, 10 Feb 2023 13:36:44 +0100 Subject: net: Fix unwanted sign extension in netdev_stats_to_stats64() When converting net_device_stats to rtnl_link_stats64 sign extension is triggered on ILP32 machines as 6c1c509778 changed the previous "ulong -> u64" conversion to "long -> u64" by accessing the net_device_stats fields through a (signed) atomic_long_t. This causes for example the received bytes counter to jump to 16EiB after having received 2^31 bytes. Casting the atomic value to "unsigned long" beforehand converting it into u64 avoids this. Fixes: 6c1c5097781f ("net: add atomic_long_t to net_device_stats fields") Signed-off-by: Felix Riemann Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index b76fb37b381e..ea0a7bac1e5c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10375,7 +10375,7 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, BUILD_BUG_ON(n > sizeof(*stats64) / sizeof(u64)); for (i = 0; i < n; i++) - dst[i] = atomic_long_read(&src[i]); + dst[i] = (unsigned long)atomic_long_read(&src[i]); /* zero out counters that only exist in rtnl_link_stats64 */ memset((char *)stats64 + n * sizeof(u64), 0, sizeof(*stats64) - n * sizeof(u64)); -- cgit From 21c167aa0ba943a7cac2f6969814f83bb701666b Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Fri, 10 Feb 2023 17:08:25 -0300 Subject: net/sched: act_ctinfo: use percpu stats The tc action act_ctinfo was using shared stats, fix it to use percpu stats since bstats_update() must be called with locks or with a percpu pointer argument. tdc results: 1..12 ok 1 c826 - Add ctinfo action with default setting ok 2 0286 - Add ctinfo action with dscp ok 3 4938 - Add ctinfo action with valid cpmark and zone ok 4 7593 - Add ctinfo action with drop control ok 5 2961 - Replace ctinfo action zone and action control ok 6 e567 - Delete ctinfo action with valid index ok 7 6a91 - Delete ctinfo action with invalid index ok 8 5232 - List ctinfo actions ok 9 7702 - Flush ctinfo actions ok 10 3201 - Add ctinfo action with duplicate index ok 11 8295 - Add ctinfo action with invalid index ok 12 3964 - Replace ctinfo action with invalid goto_chain control Fixes: 24ec483cec98 ("net: sched: Introduce act_ctinfo action") Reviewed-by: Jamal Hadi Salim Signed-off-by: Pedro Tammela Reviewed-by: Larysa Zaremba Link: https://lore.kernel.org/r/20230210200824.444856-1-pctammela@mojatatu.com Signed-off-by: Jakub Kicinski --- net/sched/act_ctinfo.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c index 4b1b59da5c0b..4d15b6a6169c 100644 --- a/net/sched/act_ctinfo.c +++ b/net/sched/act_ctinfo.c @@ -93,7 +93,7 @@ TC_INDIRECT_SCOPE int tcf_ctinfo_act(struct sk_buff *skb, cp = rcu_dereference_bh(ca->params); tcf_lastuse_update(&ca->tcf_tm); - bstats_update(&ca->tcf_bstats, skb); + tcf_action_update_bstats(&ca->common, skb); action = READ_ONCE(ca->tcf_action); wlen = skb_network_offset(skb); @@ -212,8 +212,8 @@ static int tcf_ctinfo_init(struct net *net, struct nlattr *nla, index = actparm->index; err = tcf_idr_check_alloc(tn, &index, a, bind); if (!err) { - ret = tcf_idr_create(tn, index, est, a, - &act_ctinfo_ops, bind, false, flags); + ret = tcf_idr_create_from_flags(tn, index, est, a, + &act_ctinfo_ops, bind, flags); if (ret) { tcf_idr_cleanup(tn, index); return ret; -- cgit From 2558b8039d059342197610498c8749ad294adee5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 13 Feb 2023 16:00:59 +0000 Subject: net: use a bounce buffer for copying skb->mark syzbot found arm64 builds would crash in sock_recv_mark() when CONFIG_HARDENED_USERCOPY=y x86 and powerpc are not detecting the issue because they define user_access_begin. This will be handled in a different patch, because a check_object_size() is missing. Only data from skb->cb[] can be copied directly to/from user space, as explained in commit 79a8a642bf05 ("net: Whitelist the skbuff_head_cache "cb" field") syzbot report was: usercopy: Kernel memory exposure attempt detected from SLUB object 'skbuff_head_cache' (offset 168, size 4)! ------------[ cut here ]------------ kernel BUG at mm/usercopy.c:102 ! Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP Modules linked in: CPU: 0 PID: 4410 Comm: syz-executor533 Not tainted 6.2.0-rc7-syzkaller-17907-g2d3827b3f393 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/21/2023 pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : usercopy_abort+0x90/0x94 mm/usercopy.c:90 lr : usercopy_abort+0x90/0x94 mm/usercopy.c:90 sp : ffff80000fb9b9a0 x29: ffff80000fb9b9b0 x28: ffff0000c6073400 x27: 0000000020001a00 x26: 0000000000000014 x25: ffff80000cf52000 x24: fffffc0000000000 x23: 05ffc00000000200 x22: fffffc000324bf80 x21: ffff0000c92fe1a8 x20: 0000000000000001 x19: 0000000000000004 x18: 0000000000000000 x17: 656a626f2042554c x16: ffff0000c6073dd0 x15: ffff80000dbd2118 x14: ffff0000c6073400 x13: 00000000ffffffff x12: ffff0000c6073400 x11: ff808000081bbb4c x10: 0000000000000000 x9 : 7b0572d7cc0ccf00 x8 : 7b0572d7cc0ccf00 x7 : ffff80000bf650d4 x6 : 0000000000000000 x5 : 0000000000000001 x4 : 0000000000000001 x3 : 0000000000000000 x2 : ffff0001fefbff08 x1 : 0000000100000000 x0 : 000000000000006c Call trace: usercopy_abort+0x90/0x94 mm/usercopy.c:90 __check_heap_object+0xa8/0x100 mm/slub.c:4761 check_heap_object mm/usercopy.c:196 [inline] __check_object_size+0x208/0x6b8 mm/usercopy.c:251 check_object_size include/linux/thread_info.h:199 [inline] __copy_to_user include/linux/uaccess.h:115 [inline] put_cmsg+0x408/0x464 net/core/scm.c:238 sock_recv_mark net/socket.c:975 [inline] __sock_recv_cmsgs+0x1fc/0x248 net/socket.c:984 sock_recv_cmsgs include/net/sock.h:2728 [inline] packet_recvmsg+0x2d8/0x678 net/packet/af_packet.c:3482 ____sys_recvmsg+0x110/0x3a0 ___sys_recvmsg net/socket.c:2737 [inline] __sys_recvmsg+0x194/0x210 net/socket.c:2767 __do_sys_recvmsg net/socket.c:2777 [inline] __se_sys_recvmsg net/socket.c:2774 [inline] __arm64_sys_recvmsg+0x2c/0x3c net/socket.c:2774 __invoke_syscall arch/arm64/kernel/syscall.c:38 [inline] invoke_syscall+0x64/0x178 arch/arm64/kernel/syscall.c:52 el0_svc_common+0xbc/0x180 arch/arm64/kernel/syscall.c:142 do_el0_svc+0x48/0x110 arch/arm64/kernel/syscall.c:193 el0_svc+0x58/0x14c arch/arm64/kernel/entry-common.c:637 el0t_64_sync_handler+0x84/0xf0 arch/arm64/kernel/entry-common.c:655 el0t_64_sync+0x190/0x194 arch/arm64/kernel/entry.S:591 Code: 91388800 aa0903e1 f90003e8 94e6d752 (d4210000) Fixes: 6fd1d51cfa25 ("net: SO_RCVMARK socket option for SO_MARK with recvmsg()") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Erin MacNeil Reviewed-by: Alexander Lobakin Link: https://lore.kernel.org/r/20230213160059.3829741-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/socket.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index 888cd618a968..c12af3c84d3a 100644 --- a/net/socket.c +++ b/net/socket.c @@ -971,9 +971,12 @@ static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, static void sock_recv_mark(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) { - if (sock_flag(sk, SOCK_RCVMARK) && skb) - put_cmsg(msg, SOL_SOCKET, SO_MARK, sizeof(__u32), - &skb->mark); + if (sock_flag(sk, SOCK_RCVMARK) && skb) { + /* We must use a bounce buffer for CONFIG_HARDENED_USERCOPY=y */ + __u32 mark = skb->mark; + + put_cmsg(msg, SOL_SOCKET, SO_MARK, sizeof(__u32), &mark); + } } void __sock_recv_cmsgs(struct msghdr *msg, struct sock *sk, -- cgit From 11a4d6f67cf55883dc78e31c247d1903ed7feccc Mon Sep 17 00:00:00 2001 From: Tung Nguyen Date: Tue, 14 Feb 2023 01:26:06 +0000 Subject: tipc: fix kernel warning when sending SYN message When sending a SYN message, this kernel stack trace is observed: ... [ 13.396352] RIP: 0010:_copy_from_iter+0xb4/0x550 ... [ 13.398494] Call Trace: [ 13.398630] [ 13.398630] ? __alloc_skb+0xed/0x1a0 [ 13.398630] tipc_msg_build+0x12c/0x670 [tipc] [ 13.398630] ? shmem_add_to_page_cache.isra.71+0x151/0x290 [ 13.398630] __tipc_sendmsg+0x2d1/0x710 [tipc] [ 13.398630] ? tipc_connect+0x1d9/0x230 [tipc] [ 13.398630] ? __local_bh_enable_ip+0x37/0x80 [ 13.398630] tipc_connect+0x1d9/0x230 [tipc] [ 13.398630] ? __sys_connect+0x9f/0xd0 [ 13.398630] __sys_connect+0x9f/0xd0 [ 13.398630] ? preempt_count_add+0x4d/0xa0 [ 13.398630] ? fpregs_assert_state_consistent+0x22/0x50 [ 13.398630] __x64_sys_connect+0x16/0x20 [ 13.398630] do_syscall_64+0x42/0x90 [ 13.398630] entry_SYSCALL_64_after_hwframe+0x63/0xcd It is because commit a41dad905e5a ("iov_iter: saner checks for attempt to copy to/from iterator") has introduced sanity check for copying from/to iov iterator. Lacking of copy direction from the iterator viewpoint would lead to kernel stack trace like above. This commit fixes this issue by initializing the iov iterator with the correct copy direction when sending SYN or ACK without data. Fixes: f25dcc7687d4 ("tipc: tipc ->sendmsg() conversion") Reported-by: syzbot+d43608d061e8847ec9f3@syzkaller.appspotmail.com Acked-by: Jon Maloy Signed-off-by: Tung Nguyen Link: https://lore.kernel.org/r/20230214012606.5804-1-tung.q.nguyen@dektech.com.au Signed-off-by: Jakub Kicinski --- net/tipc/socket.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index b35c8701876a..a38733f2197a 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2614,6 +2614,7 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest, /* Send a 'SYN-' to destination */ m.msg_name = dest; m.msg_namelen = destlen; + iov_iter_kvec(&m.msg_iter, ITER_SOURCE, NULL, 0, 0); /* If connect is in non-blocking case, set MSG_DONTWAIT to * indicate send_msg() is never blocked. @@ -2776,6 +2777,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, __skb_queue_head(&new_sk->sk_receive_queue, buf); skb_set_owner_r(buf, new_sk); } + iov_iter_kvec(&m.msg_iter, ITER_SOURCE, NULL, 0, 0); __tipc_sendstream(new_sock, &m, 0); release_sock(new_sk); exit: -- cgit From 42018a322bd453e38b3ffee294982243e50a484f Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Mon, 13 Feb 2023 22:47:29 -0300 Subject: net/sched: tcindex: search key must be 16 bits Syzkaller found an issue where a handle greater than 16 bits would trigger a null-ptr-deref in the imperfect hash area update. general protection fault, probably for non-canonical address 0xdffffc0000000015: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x00000000000000a8-0x00000000000000af] CPU: 0 PID: 5070 Comm: syz-executor456 Not tainted 6.2.0-rc7-syzkaller-00112-gc68f345b7c42 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/21/2023 RIP: 0010:tcindex_set_parms+0x1a6a/0x2990 net/sched/cls_tcindex.c:509 Code: 01 e9 e9 fe ff ff 4c 8b bd 28 fe ff ff e8 0e 57 7d f9 48 8d bb a8 00 00 00 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <80> 3c 02 00 0f 85 94 0c 00 00 48 8b 85 f8 fd ff ff 48 8b 9b a8 00 RSP: 0018:ffffc90003d3ef88 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000000015 RSI: ffffffff8803a102 RDI: 00000000000000a8 RBP: ffffc90003d3f1d8 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000000 R12: ffff88801e2b10a8 R13: dffffc0000000000 R14: 0000000000030000 R15: ffff888017b3be00 FS: 00005555569af300(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000056041c6d2000 CR3: 000000002bfca000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: tcindex_change+0x1ea/0x320 net/sched/cls_tcindex.c:572 tc_new_tfilter+0x96e/0x2220 net/sched/cls_api.c:2155 rtnetlink_rcv_msg+0x959/0xca0 net/core/rtnetlink.c:6132 netlink_rcv_skb+0x165/0x440 net/netlink/af_netlink.c:2574 netlink_unicast_kernel net/netlink/af_netlink.c:1339 [inline] netlink_unicast+0x547/0x7f0 net/netlink/af_netlink.c:1365 netlink_sendmsg+0x91b/0xe10 net/netlink/af_netlink.c:1942 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg+0xd3/0x120 net/socket.c:734 ____sys_sendmsg+0x334/0x8c0 net/socket.c:2476 ___sys_sendmsg+0x110/0x1b0 net/socket.c:2530 __sys_sendmmsg+0x18f/0x460 net/socket.c:2616 __do_sys_sendmmsg net/socket.c:2645 [inline] __se_sys_sendmmsg net/socket.c:2642 [inline] __x64_sys_sendmmsg+0x9d/0x100 net/socket.c:2642 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80 Fixes: ee059170b1f7 ("net/sched: tcindex: update imperfect hash filters respecting rcu") Signed-off-by: Jamal Hadi Salim Signed-off-by: Pedro Tammela Reported-by: syzbot Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/cls_tcindex.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index ba7f22a49397..6640e75eaa02 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -503,7 +503,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, /* lookup the filter, guaranteed to exist */ for (cf = rcu_dereference_bh_rtnl(*fp); cf; fp = &cf->next, cf = rcu_dereference_bh_rtnl(*fp)) - if (cf->key == handle) + if (cf->key == (u16)handle) break; f->next = cf->next; -- cgit From fda6c89fe3d9aca073495a664e1d5aea28cd4377 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 13 Feb 2023 22:53:55 -0800 Subject: net: mpls: fix stale pointer if allocation fails during device rename lianhui reports that when MPLS fails to register the sysctl table under new location (during device rename) the old pointers won't get overwritten and may be freed again (double free). Handle this gracefully. The best option would be unregistering the MPLS from the device completely on failure, but unfortunately mpls_ifdown() can fail. So failing fully is also unreliable. Another option is to register the new table first then only remove old one if the new one succeeds. That requires more code, changes order of notifications and two tables may be visible at the same time. sysctl point is not used in the rest of the code - set to NULL on failures and skip unregister if already NULL. Reported-by: lianhui tang Fixes: 0fae3bf018d9 ("mpls: handle device renames for per-device sysctls") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/mpls/af_mpls.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 35b5f806fdda..dc5165d3eec4 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1428,6 +1428,7 @@ static int mpls_dev_sysctl_register(struct net_device *dev, free: kfree(table); out: + mdev->sysctl = NULL; return -ENOBUFS; } @@ -1437,6 +1438,9 @@ static void mpls_dev_sysctl_unregister(struct net_device *dev, struct net *net = dev_net(dev); struct ctl_table *table; + if (!mdev->sysctl) + return; + table = mdev->sysctl->ctl_table_arg; unregister_net_sysctl_table(mdev->sysctl); kfree(table); -- cgit From b20b8aec6ffc07bb547966b356780cd344f20f5b Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 15 Feb 2023 09:31:39 +0200 Subject: devlink: Fix netdev notifier chain corruption Cited commit changed devlink to register its netdev notifier block on the global netdev notifier chain instead of on the per network namespace one. However, when changing the network namespace of the devlink instance, devlink still tries to unregister its notifier block from the chain of the old namespace and register it on the chain of the new namespace. This results in corruption of the notifier chains, as the same notifier block is registered on two different chains: The global one and the per network namespace one. In turn, this causes other problems such as the inability to dismantle namespaces due to netdev reference count issues. Fix by preventing devlink from moving its notifier block between namespaces. Reproducer: # echo "10 1" > /sys/bus/netdevsim/new_device # ip netns add test123 # devlink dev reload netdevsim/netdevsim10 netns test123 # ip netns del test123 [ 71.935619] unregister_netdevice: waiting for lo to become free. Usage count = 2 [ 71.938348] leaked reference. Fixes: 565b4824c39f ("devlink: change port event netdev notifier from per-net to global") Signed-off-by: Ido Schimmel Reviewed-by: Jiri Pirko Reviewed-by: Jacob Keller Reviewed-by: Jakub Kicinski Link: https://lore.kernel.org/r/20230215073139.1360108-1-idosch@nvidia.com Signed-off-by: Paolo Abeni --- net/core/dev.c | 8 -------- net/core/devlink.c | 5 +---- 2 files changed, 1 insertion(+), 12 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index ea0a7bac1e5c..f23e287602b7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1869,14 +1869,6 @@ static void __move_netdevice_notifier_net(struct net *src_net, __register_netdevice_notifier_net(dst_net, nb, true); } -void move_netdevice_notifier_net(struct net *src_net, struct net *dst_net, - struct notifier_block *nb) -{ - rtnl_lock(); - __move_netdevice_notifier_net(src_net, dst_net, nb); - rtnl_unlock(); -} - int register_netdevice_notifier_dev_net(struct net_device *dev, struct notifier_block *nb, struct netdev_net_notifier *nn) diff --git a/net/core/devlink.c b/net/core/devlink.c index 909a10e4b0dd..0bfc144df8b9 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -4742,11 +4742,8 @@ static int devlink_reload(struct devlink *devlink, struct net *dest_net, if (err) return err; - if (dest_net && !net_eq(dest_net, curr_net)) { - move_netdevice_notifier_net(curr_net, dest_net, - &devlink->netdevice_nb); + if (dest_net && !net_eq(dest_net, curr_net)) write_pnet(&devlink->_net, dest_net); - } err = devlink->ops->reload_up(devlink, action, limit, actions_performed, extack); devlink_reload_failed_set(devlink, !!err); -- cgit