From b3c9e65eb227269ed72a115ba22f4f51b4e62b4d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 4 Sep 2024 13:37:25 +0000 Subject: net: hsr: remove seqnr_lock syzbot found a new splat [1]. Instead of adding yet another spin_lock_bh(&hsr->seqnr_lock) / spin_unlock_bh(&hsr->seqnr_lock) pair, remove seqnr_lock and use atomic_t for hsr->sequence_nr and hsr->sup_sequence_nr. This also avoid a race in hsr_fill_info(). Also remove interlink_sequence_nr which is unused. [1] WARNING: CPU: 1 PID: 9723 at net/hsr/hsr_forward.c:602 handle_std_frame+0x247/0x2c0 net/hsr/hsr_forward.c:602 Modules linked in: CPU: 1 UID: 0 PID: 9723 Comm: syz.0.1657 Not tainted 6.11.0-rc6-syzkaller-00026-g88fac17500f4 #0 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 RIP: 0010:handle_std_frame+0x247/0x2c0 net/hsr/hsr_forward.c:602 Code: 49 8d bd b0 01 00 00 be ff ff ff ff e8 e2 58 25 00 31 ff 89 c5 89 c6 e8 47 53 a8 f6 85 ed 0f 85 5a ff ff ff e8 fa 50 a8 f6 90 <0f> 0b 90 e9 4c ff ff ff e8 cc e7 06 f7 e9 8f fe ff ff e8 52 e8 06 RSP: 0018:ffffc90000598598 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffffc90000598670 RCX: ffffffff8ae2c919 RDX: ffff888024e94880 RSI: ffffffff8ae2c926 RDI: 0000000000000005 RBP: 0000000000000000 R08: 0000000000000005 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000003 R13: ffff8880627a8cc0 R14: 0000000000000000 R15: ffff888012b03c3a FS: 0000000000000000(0000) GS:ffff88802b700000(0063) knlGS:00000000f5696b40 CS: 0010 DS: 002b ES: 002b CR0: 0000000080050033 CR2: 0000000020010000 CR3: 00000000768b4000 CR4: 0000000000350ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: hsr_fill_frame_info+0x2c8/0x360 net/hsr/hsr_forward.c:630 fill_frame_info net/hsr/hsr_forward.c:700 [inline] hsr_forward_skb+0x7df/0x25c0 net/hsr/hsr_forward.c:715 hsr_handle_frame+0x603/0x850 net/hsr/hsr_slave.c:70 __netif_receive_skb_core.constprop.0+0xa3d/0x4330 net/core/dev.c:5555 __netif_receive_skb_list_core+0x357/0x950 net/core/dev.c:5737 __netif_receive_skb_list net/core/dev.c:5804 [inline] netif_receive_skb_list_internal+0x753/0xda0 net/core/dev.c:5896 gro_normal_list include/net/gro.h:515 [inline] gro_normal_list include/net/gro.h:511 [inline] napi_complete_done+0x23f/0x9a0 net/core/dev.c:6247 gro_cell_poll+0x162/0x210 net/core/gro_cells.c:66 __napi_poll.constprop.0+0xb7/0x550 net/core/dev.c:6772 napi_poll net/core/dev.c:6841 [inline] net_rx_action+0xa92/0x1010 net/core/dev.c:6963 handle_softirqs+0x216/0x8f0 kernel/softirq.c:554 do_softirq kernel/softirq.c:455 [inline] do_softirq+0xb2/0xf0 kernel/softirq.c:442 Fixes: 06afd2c31d33 ("hsr: Synchronize sending frames to have always incremented outgoing seq nr.") Fixes: f421436a591d ("net/hsr: Add support for the High-availability Seamless Redundancy protocol (HSRv0)") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Sebastian Andrzej Siewior Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/hsr/hsr_device.c | 35 ++++++++++------------------------- net/hsr/hsr_forward.c | 4 +--- net/hsr/hsr_main.h | 6 ++---- net/hsr/hsr_netlink.c | 2 +- 4 files changed, 14 insertions(+), 33 deletions(-) (limited to 'net') diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index e4cc6b78dcfc..ac56784c327c 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -231,9 +231,7 @@ static netdev_tx_t hsr_dev_xmit(struct sk_buff *skb, struct net_device *dev) skb->dev = master->dev; skb_reset_mac_header(skb); skb_reset_mac_len(skb); - spin_lock_bh(&hsr->seqnr_lock); hsr_forward_skb(skb, master); - spin_unlock_bh(&hsr->seqnr_lock); } else { dev_core_stats_tx_dropped_inc(dev); dev_kfree_skb_any(skb); @@ -314,14 +312,10 @@ static void send_hsr_supervision_frame(struct hsr_port *port, set_hsr_stag_HSR_ver(hsr_stag, hsr->prot_version); /* From HSRv1 on we have separate supervision sequence numbers. */ - spin_lock_bh(&hsr->seqnr_lock); - if (hsr->prot_version > 0) { - hsr_stag->sequence_nr = htons(hsr->sup_sequence_nr); - hsr->sup_sequence_nr++; - } else { - hsr_stag->sequence_nr = htons(hsr->sequence_nr); - hsr->sequence_nr++; - } + if (hsr->prot_version > 0) + hsr_stag->sequence_nr = htons(atomic_inc_return(&hsr->sup_sequence_nr)); + else + hsr_stag->sequence_nr = htons(atomic_inc_return(&hsr->sequence_nr)); hsr_stag->tlv.HSR_TLV_type = type; /* TODO: Why 12 in HSRv0? */ @@ -343,13 +337,11 @@ static void send_hsr_supervision_frame(struct hsr_port *port, ether_addr_copy(hsr_sp->macaddress_A, hsr->macaddress_redbox); } - if (skb_put_padto(skb, ETH_ZLEN)) { - spin_unlock_bh(&hsr->seqnr_lock); + if (skb_put_padto(skb, ETH_ZLEN)) return; - } hsr_forward_skb(skb, port); - spin_unlock_bh(&hsr->seqnr_lock); + return; } @@ -374,9 +366,7 @@ static void send_prp_supervision_frame(struct hsr_port *master, set_hsr_stag_HSR_ver(hsr_stag, (hsr->prot_version ? 1 : 0)); /* From HSRv1 on we have separate supervision sequence numbers. */ - spin_lock_bh(&hsr->seqnr_lock); - hsr_stag->sequence_nr = htons(hsr->sup_sequence_nr); - hsr->sup_sequence_nr++; + hsr_stag->sequence_nr = htons(atomic_inc_return(&hsr->sup_sequence_nr)); hsr_stag->tlv.HSR_TLV_type = PRP_TLV_LIFE_CHECK_DD; hsr_stag->tlv.HSR_TLV_length = sizeof(struct hsr_sup_payload); @@ -384,13 +374,10 @@ static void send_prp_supervision_frame(struct hsr_port *master, hsr_sp = skb_put(skb, sizeof(struct hsr_sup_payload)); ether_addr_copy(hsr_sp->macaddress_A, master->dev->dev_addr); - if (skb_put_padto(skb, ETH_ZLEN)) { - spin_unlock_bh(&hsr->seqnr_lock); + if (skb_put_padto(skb, ETH_ZLEN)) return; - } hsr_forward_skb(skb, master); - spin_unlock_bh(&hsr->seqnr_lock); } /* Announce (supervision frame) timer function @@ -621,11 +608,9 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], if (res < 0) return res; - spin_lock_init(&hsr->seqnr_lock); /* Overflow soon to find bugs easier: */ - hsr->sequence_nr = HSR_SEQNR_START; - hsr->sup_sequence_nr = HSR_SUP_SEQNR_START; - hsr->interlink_sequence_nr = HSR_SEQNR_START; + atomic_set(&hsr->sequence_nr, HSR_SEQNR_START); + atomic_set(&hsr->sup_sequence_nr, HSR_SUP_SEQNR_START); timer_setup(&hsr->announce_timer, hsr_announce, 0); timer_setup(&hsr->prune_timer, hsr_prune_nodes, 0); diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index b38060246e62..6f63c8a775c4 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -599,9 +599,7 @@ static void handle_std_frame(struct sk_buff *skb, if (port->type == HSR_PT_MASTER || port->type == HSR_PT_INTERLINK) { /* Sequence nr for the master/interlink node */ - lockdep_assert_held(&hsr->seqnr_lock); - frame->sequence_nr = hsr->sequence_nr; - hsr->sequence_nr++; + frame->sequence_nr = atomic_inc_return(&hsr->sequence_nr); } } diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h index ab1f8d35d9dc..6f7bbf01f3e4 100644 --- a/net/hsr/hsr_main.h +++ b/net/hsr/hsr_main.h @@ -202,11 +202,9 @@ struct hsr_priv { struct timer_list prune_timer; struct timer_list prune_proxy_timer; int announce_count; - u16 sequence_nr; - u16 interlink_sequence_nr; /* Interlink port seq_nr */ - u16 sup_sequence_nr; /* For HSRv1 separate seq_nr for supervision */ + atomic_t sequence_nr; + atomic_t sup_sequence_nr; /* For HSRv1 separate seq_nr for supervision */ enum hsr_version prot_version; /* Indicate if HSRv0, HSRv1 or PRPv1 */ - spinlock_t seqnr_lock; /* locking for sequence_nr */ spinlock_t list_lock; /* locking for node list */ struct hsr_proto_ops *proto_ops; #define PRP_LAN_ID 0x5 /* 0x1010 for A and 0x1011 for B. Bit 0 is set diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index f6ff0b61e08a..8aea4ff5f49e 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -163,7 +163,7 @@ static int hsr_fill_info(struct sk_buff *skb, const struct net_device *dev) if (nla_put(skb, IFLA_HSR_SUPERVISION_ADDR, ETH_ALEN, hsr->sup_multicast_addr) || - nla_put_u16(skb, IFLA_HSR_SEQ_NR, hsr->sequence_nr)) + nla_put_u16(skb, IFLA_HSR_SEQ_NR, atomic_read(&hsr->sequence_nr))) goto nla_put_failure; if (hsr->prot_version == PRP_V1) proto = HSR_PROTOCOL_PRP; -- cgit From 4c8002277167125078e6b9b90137bdf443ebaa08 Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Fri, 6 Sep 2024 15:28:39 +0500 Subject: fou: fix initialization of grc The grc must be initialize first. There can be a condition where if fou is NULL, goto out will be executed and grc would be used uninitialized. Fixes: 7e4196935069 ("fou: Fix null-ptr-deref in GRO.") Signed-off-by: Muhammad Usama Anjum Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20240906102839.202798-1-usama.anjum@collabora.com Signed-off-by: Jakub Kicinski --- net/ipv4/fou_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/fou_core.c b/net/ipv4/fou_core.c index 78b869b31492..3e30745e2c09 100644 --- a/net/ipv4/fou_core.c +++ b/net/ipv4/fou_core.c @@ -336,11 +336,11 @@ static struct sk_buff *gue_gro_receive(struct sock *sk, struct gro_remcsum grc; u8 proto; + skb_gro_remcsum_init(&grc); + if (!fou) goto out; - skb_gro_remcsum_init(&grc); - off = skb_gro_offset(skb); len = off + sizeof(*guehdr); -- cgit From 8b26ff7af8c32cb4148b3e147c52f9e4c695209c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 5 Sep 2024 12:54:46 +0200 Subject: netfilter: nft_socket: fix sk refcount leaks We must put 'sk' reference before returning. Fixes: 039b1f4f24ec ("netfilter: nft_socket: fix erroneous socket assignment") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_socket.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c index f30163e2ca62..765ffd6e06bc 100644 --- a/net/netfilter/nft_socket.c +++ b/net/netfilter/nft_socket.c @@ -110,13 +110,13 @@ static void nft_socket_eval(const struct nft_expr *expr, *dest = READ_ONCE(sk->sk_mark); } else { regs->verdict.code = NFT_BREAK; - return; + goto out_put_sk; } break; case NFT_SOCKET_WILDCARD: if (!sk_fullsock(sk)) { regs->verdict.code = NFT_BREAK; - return; + goto out_put_sk; } nft_socket_wildcard(pkt, regs, sk, dest); break; @@ -124,7 +124,7 @@ static void nft_socket_eval(const struct nft_expr *expr, case NFT_SOCKET_CGROUPV2: if (!nft_sock_get_eval_cgroupv2(dest, sk, pkt, priv->level)) { regs->verdict.code = NFT_BREAK; - return; + goto out_put_sk; } break; #endif @@ -133,6 +133,7 @@ static void nft_socket_eval(const struct nft_expr *expr, regs->verdict.code = NFT_BREAK; } +out_put_sk: if (sk != skb->sk) sock_gen_put(sk); } -- cgit From 7f3287db654395f9c5ddd246325ff7889f550286 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 7 Sep 2024 16:07:49 +0200 Subject: netfilter: nft_socket: make cgroupsv2 matching work with namespaces When running in container environmment, /sys/fs/cgroup/ might not be the real root node of the sk-attached cgroup. Example: In container: % stat /sys//fs/cgroup/ Device: 0,21 Inode: 2214 .. % stat /sys/fs/cgroup/foo Device: 0,21 Inode: 2264 .. The expectation would be for: nft add rule .. socket cgroupv2 level 1 "foo" counter to match traffic from a process that got added to "foo" via "echo $pid > /sys/fs/cgroup/foo/cgroup.procs". However, 'level 3' is needed to make this work. Seen from initial namespace, the complete hierarchy is: % stat /sys/fs/cgroup/system.slice/docker-.../foo Device: 0,21 Inode: 2264 .. i.e. hierarchy is 0 1 2 3 / -> system.slice -> docker-1... -> foo ... but the container doesn't know that its "/" is the "docker-1.." cgroup. Current code will retrieve the 'system.slice' cgroup node and store its kn->id in the destination register, so compare with 2264 ("foo" cgroup id) will not match. Fetch "/" cgroup from ->init() and add its level to the level we try to extract. cgroup root-level is 0 for the init-namespace or the level of the ancestor that is exposed as the cgroup root inside the container. In the above case, cgrp->level of "/" resolved in the container is 2 (docker-1...scope/) and request for 'level 1' will get adjusted to fetch the actual level (3). v2: use CONFIG_SOCK_CGROUP_DATA, eval function depends on it. (kernel test robot) Cc: cgroups@vger.kernel.org Fixes: e0bb96db96f8 ("netfilter: nft_socket: add support for cgroupsv2") Reported-by: Nadia Pinaeva Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_socket.c | 41 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 38 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c index 765ffd6e06bc..12cdff640492 100644 --- a/net/netfilter/nft_socket.c +++ b/net/netfilter/nft_socket.c @@ -9,7 +9,8 @@ struct nft_socket { enum nft_socket_keys key:8; - u8 level; + u8 level; /* cgroupv2 level to extract */ + u8 level_user; /* cgroupv2 level provided by userspace */ u8 len; union { u8 dreg; @@ -53,6 +54,28 @@ nft_sock_get_eval_cgroupv2(u32 *dest, struct sock *sk, const struct nft_pktinfo memcpy(dest, &cgid, sizeof(u64)); return true; } + +/* process context only, uses current->nsproxy. */ +static noinline int nft_socket_cgroup_subtree_level(void) +{ + struct cgroup *cgrp = cgroup_get_from_path("/"); + int level; + + if (!cgrp) + return -ENOENT; + + level = cgrp->level; + + cgroup_put(cgrp); + + if (WARN_ON_ONCE(level > 255)) + return -ERANGE; + + if (WARN_ON_ONCE(level < 0)) + return -EINVAL; + + return level; +} #endif static struct sock *nft_socket_do_lookup(const struct nft_pktinfo *pkt) @@ -174,9 +197,10 @@ static int nft_socket_init(const struct nft_ctx *ctx, case NFT_SOCKET_MARK: len = sizeof(u32); break; -#ifdef CONFIG_CGROUPS +#ifdef CONFIG_SOCK_CGROUP_DATA case NFT_SOCKET_CGROUPV2: { unsigned int level; + int err; if (!tb[NFTA_SOCKET_LEVEL]) return -EINVAL; @@ -185,6 +209,17 @@ static int nft_socket_init(const struct nft_ctx *ctx, if (level > 255) return -EOPNOTSUPP; + err = nft_socket_cgroup_subtree_level(); + if (err < 0) + return err; + + priv->level_user = level; + + level += err; + /* Implies a giant cgroup tree */ + if (WARN_ON_ONCE(level > 255)) + return -EOPNOTSUPP; + priv->level = level; len = sizeof(u64); break; @@ -209,7 +244,7 @@ static int nft_socket_dump(struct sk_buff *skb, if (nft_dump_register(skb, NFTA_SOCKET_DREG, priv->dreg)) return -1; if (priv->key == NFT_SOCKET_CGROUPV2 && - nla_put_be32(skb, NFTA_SOCKET_LEVEL, htonl(priv->level))) + nla_put_be32(skb, NFTA_SOCKET_LEVEL, htonl(priv->level_user))) return -1; return 0; } -- cgit From a7789fd4caaf96ecfed5e28c4cddb927e6bebadb Mon Sep 17 00:00:00 2001 From: Jeongjun Park Date: Sun, 8 Sep 2024 04:03:41 +0900 Subject: net: hsr: prevent NULL pointer dereference in hsr_proxy_announce() In the function hsr_proxy_annouance() added in the previous commit 5f703ce5c981 ("net: hsr: Send supervisory frames to HSR network with ProxyNodeTable data"), the return value of the hsr_port_get_hsr() function is not checked to be a NULL pointer, which causes a NULL pointer dereference. To solve this, we need to add code to check whether the return value of hsr_port_get_hsr() is NULL. Reported-by: syzbot+02a42d9b1bd395cbcab4@syzkaller.appspotmail.com Fixes: 5f703ce5c981 ("net: hsr: Send supervisory frames to HSR network with ProxyNodeTable data") Signed-off-by: Jeongjun Park Reviewed-by: Simon Horman Acked-by: Lukasz Majewski Link: https://patch.msgid.link/20240907190341.162289-1-aha310510@gmail.com Signed-off-by: Jakub Kicinski --- net/hsr/hsr_device.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index ac56784c327c..049e22bdaafb 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -414,6 +414,9 @@ static void hsr_proxy_announce(struct timer_list *t) * of SAN nodes stored in ProxyNodeTable. */ interlink = hsr_port_get_hsr(hsr, HSR_PT_INTERLINK); + if (!interlink) + goto done; + list_for_each_entry_rcu(node, &hsr->proxy_node_db, mac_list) { if (hsr_addr_is_redbox(hsr, node->macaddress_A)) continue; @@ -428,6 +431,7 @@ static void hsr_proxy_announce(struct timer_list *t) mod_timer(&hsr->announce_proxy_timer, jiffies + interval); } +done: rcu_read_unlock(); } -- cgit From b4cd80b0338945a94972ac3ed54f8338d2da2076 Mon Sep 17 00:00:00 2001 From: Edward Adam Davis Date: Tue, 10 Sep 2024 17:58:56 +0800 Subject: mptcp: pm: Fix uaf in __timer_delete_sync There are two paths to access mptcp_pm_del_add_timer, result in a race condition: CPU1 CPU2 ==== ==== net_rx_action napi_poll netlink_sendmsg __napi_poll netlink_unicast process_backlog netlink_unicast_kernel __netif_receive_skb genl_rcv __netif_receive_skb_one_core netlink_rcv_skb NF_HOOK genl_rcv_msg ip_local_deliver_finish genl_family_rcv_msg ip_protocol_deliver_rcu genl_family_rcv_msg_doit tcp_v4_rcv mptcp_pm_nl_flush_addrs_doit tcp_v4_do_rcv mptcp_nl_remove_addrs_list tcp_rcv_established mptcp_pm_remove_addrs_and_subflows tcp_data_queue remove_anno_list_by_saddr mptcp_incoming_options mptcp_pm_del_add_timer mptcp_pm_del_add_timer kfree(entry) In remove_anno_list_by_saddr(running on CPU2), after leaving the critical zone protected by "pm.lock", the entry will be released, which leads to the occurrence of uaf in the mptcp_pm_del_add_timer(running on CPU1). Keeping a reference to add_timer inside the lock, and calling sk_stop_timer_sync() with this reference, instead of "entry->add_timer". Move list_del(&entry->list) to mptcp_pm_del_add_timer and inside the pm lock, do not directly access any members of the entry outside the pm lock, which can avoid similar "entry->x" uaf. Fixes: 00cfd77b9063 ("mptcp: retransmit ADD_ADDR when timeout") Cc: stable@vger.kernel.org Reported-and-tested-by: syzbot+f3a31fb909db9b2a5c4d@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=f3a31fb909db9b2a5c4d Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Edward Adam Davis Acked-by: Paolo Abeni Link: https://patch.msgid.link/tencent_7142963A37944B4A74EF76CD66EA3C253609@qq.com Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index f891bc714668..ad935d34c973 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -334,15 +334,21 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk, { struct mptcp_pm_add_entry *entry; struct sock *sk = (struct sock *)msk; + struct timer_list *add_timer = NULL; spin_lock_bh(&msk->pm.lock); entry = mptcp_lookup_anno_list_by_saddr(msk, addr); - if (entry && (!check_id || entry->addr.id == addr->id)) + if (entry && (!check_id || entry->addr.id == addr->id)) { entry->retrans_times = ADD_ADDR_RETRANS_MAX; + add_timer = &entry->add_timer; + } + if (!check_id && entry) + list_del(&entry->list); spin_unlock_bh(&msk->pm.lock); - if (entry && (!check_id || entry->addr.id == addr->id)) - sk_stop_timer_sync(sk, &entry->add_timer); + /* no lock, because sk_stop_timer_sync() is calling del_timer_sync() */ + if (add_timer) + sk_stop_timer_sync(sk, add_timer); return entry; } @@ -1462,7 +1468,6 @@ static bool remove_anno_list_by_saddr(struct mptcp_sock *msk, entry = mptcp_pm_del_add_timer(msk, addr, false); if (entry) { - list_del(&entry->list); kfree(entry); return true; } -- cgit From 3e705251d998c9688be0e7e0526c250fec24d233 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 11 Sep 2024 17:37:30 +0200 Subject: net: netfilter: move nf flowtable bpf initialization in nf_flow_table_module_init() Move nf flowtable bpf initialization in nf_flow_table module load routine since nf_flow_table_bpf is part of nf_flow_table module and not nf_flow_table_inet one. This patch allows to avoid the following kernel warning running the reproducer below: $modprobe nf_flow_table_inet $rmmod nf_flow_table_inet $modprobe nf_flow_table_inet modprobe: ERROR: could not insert 'nf_flow_table_inet': Invalid argument [ 184.081501] ------------[ cut here ]------------ [ 184.081527] WARNING: CPU: 0 PID: 1362 at kernel/bpf/btf.c:8206 btf_populate_kfunc_set+0x23c/0x330 [ 184.081550] CPU: 0 UID: 0 PID: 1362 Comm: modprobe Kdump: loaded Not tainted 6.11.0-0.rc5.22.el10.x86_64 #1 [ 184.081553] Hardware name: Red Hat OpenStack Compute, BIOS 1.14.0-1.module+el8.4.0+8855+a9e237a9 04/01/2014 [ 184.081554] RIP: 0010:btf_populate_kfunc_set+0x23c/0x330 [ 184.081558] RSP: 0018:ff22cfb38071fc90 EFLAGS: 00010202 [ 184.081559] RAX: 0000000000000001 RBX: 0000000000000001 RCX: 0000000000000000 [ 184.081560] RDX: 000000000000006e RSI: ffffffff95c00000 RDI: ff13805543436350 [ 184.081561] RBP: ffffffffc0e22180 R08: ff13805543410808 R09: 000000000001ec00 [ 184.081562] R10: ff13805541c8113c R11: 0000000000000010 R12: ff13805541b83c00 [ 184.081563] R13: ff13805543410800 R14: 0000000000000001 R15: ffffffffc0e2259a [ 184.081564] FS: 00007fa436c46740(0000) GS:ff1380557ba00000(0000) knlGS:0000000000000000 [ 184.081569] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 184.081570] CR2: 000055e7b3187000 CR3: 0000000100c48003 CR4: 0000000000771ef0 [ 184.081571] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 184.081572] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 184.081572] PKRU: 55555554 [ 184.081574] Call Trace: [ 184.081575] [ 184.081578] ? show_trace_log_lvl+0x1b0/0x2f0 [ 184.081580] ? show_trace_log_lvl+0x1b0/0x2f0 [ 184.081582] ? __register_btf_kfunc_id_set+0x199/0x200 [ 184.081585] ? btf_populate_kfunc_set+0x23c/0x330 [ 184.081586] ? __warn.cold+0x93/0xed [ 184.081590] ? btf_populate_kfunc_set+0x23c/0x330 [ 184.081592] ? report_bug+0xff/0x140 [ 184.081594] ? handle_bug+0x3a/0x70 [ 184.081596] ? exc_invalid_op+0x17/0x70 [ 184.081597] ? asm_exc_invalid_op+0x1a/0x20 [ 184.081601] ? btf_populate_kfunc_set+0x23c/0x330 [ 184.081602] __register_btf_kfunc_id_set+0x199/0x200 [ 184.081605] ? __pfx_nf_flow_inet_module_init+0x10/0x10 [nf_flow_table_inet] [ 184.081607] do_one_initcall+0x58/0x300 [ 184.081611] do_init_module+0x60/0x230 [ 184.081614] __do_sys_init_module+0x17a/0x1b0 [ 184.081617] do_syscall_64+0x7d/0x160 [ 184.081620] ? __count_memcg_events+0x58/0xf0 [ 184.081623] ? handle_mm_fault+0x234/0x350 [ 184.081626] ? do_user_addr_fault+0x347/0x640 [ 184.081630] ? clear_bhb_loop+0x25/0x80 [ 184.081633] ? clear_bhb_loop+0x25/0x80 [ 184.081634] ? clear_bhb_loop+0x25/0x80 [ 184.081637] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 184.081639] RIP: 0033:0x7fa43652e4ce [ 184.081647] RSP: 002b:00007ffe8213be18 EFLAGS: 00000246 ORIG_RAX: 00000000000000af [ 184.081649] RAX: ffffffffffffffda RBX: 000055e7b3176c20 RCX: 00007fa43652e4ce [ 184.081650] RDX: 000055e7737fde79 RSI: 0000000000003990 RDI: 000055e7b3185380 [ 184.081651] RBP: 000055e7737fde79 R08: 0000000000000007 R09: 000055e7b3179bd0 [ 184.081651] R10: 0000000000000001 R11: 0000000000000246 R12: 0000000000040000 [ 184.081652] R13: 000055e7b3176fa0 R14: 0000000000000000 R15: 000055e7b3179b80 Fixes: 391bb6594fd3 ("netfilter: Add bpf_xdp_flow_lookup kfunc") Signed-off-by: Lorenzo Bianconi Acked-by: Florian Westphal Acked-by: Pablo Neira Ayuso Link: https://patch.msgid.link/20240911-nf-flowtable-bpf-modprob-fix-v1-1-f9fc075aafc3@kernel.org Signed-off-by: Paolo Abeni --- net/netfilter/nf_flow_table_core.c | 6 ++++++ net/netfilter/nf_flow_table_inet.c | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 5c1ff07eaee0..df72b0376970 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -670,8 +670,14 @@ static int __init nf_flow_table_module_init(void) if (ret) goto out_offload; + ret = nf_flow_register_bpf(); + if (ret) + goto out_bpf; + return 0; +out_bpf: + nf_flow_table_offload_exit(); out_offload: unregister_pernet_subsys(&nf_flow_table_net_ops); return ret; diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c index 8b541a080342..b0f199171932 100644 --- a/net/netfilter/nf_flow_table_inet.c +++ b/net/netfilter/nf_flow_table_inet.c @@ -101,7 +101,7 @@ static int __init nf_flow_inet_module_init(void) nft_register_flowtable_type(&flowtable_ipv6); nft_register_flowtable_type(&flowtable_inet); - return nf_flow_register_bpf(); + return 0; } static void __exit nf_flow_inet_module_exit(void) -- cgit