diff options
author | Paolo Abeni <pabeni@redhat.com> | 2024-12-12 13:11:38 +0100 |
---|---|---|
committer | Paolo Abeni <pabeni@redhat.com> | 2024-12-12 13:11:38 +0100 |
commit | 3d64c3d3c6d8c740a310d40ec1faaab59dde81ee (patch) | |
tree | d0fe553bd1a0acafd194f6bd686b9fae500c2e64 | |
parent | 98712844589e06d9aa305b5077169942139fd75c (diff) | |
parent | b04df3da1b5c6f6dc7cdccc37941740c078c4043 (diff) |
Merge tag 'nf-24-12-11' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf
Pablo Neira Ayuso says:
====================
Netfilter fixes for net
The following patchset contains Netfilter fixes for net:
1) Fix bogus test reports in rpath.sh selftest by adding permanent
neighbor entries, from Phil Sutter.
2) Lockdep reports possible ABBA deadlock in xt_IDLETIMER, fix it by
removing sysfs out of the mutex section, also from Phil Sutter.
3) It is illegal to release basechain via RCU callback, for several
reasons. Keep it simple and safe by calling synchronize_rcu() instead.
This is a partially reverting a botched recent attempt of me to fix
this basechain release path on netdevice removal.
From Florian Westphal.
netfilter pull request 24-12-11
* tag 'nf-24-12-11' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf:
netfilter: nf_tables: do not defer rule destruction via call_rcu
netfilter: IDLETIMER: Fix for possible ABBA deadlock
selftests: netfilter: Stabilize rpath.sh
====================
Link: https://patch.msgid.link/20241211230130.176937-1-pablo@netfilter.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
-rw-r--r-- | include/net/netfilter/nf_tables.h | 4 | ||||
-rw-r--r-- | net/netfilter/nf_tables_api.c | 32 | ||||
-rw-r--r-- | net/netfilter/xt_IDLETIMER.c | 52 | ||||
-rwxr-xr-x | tools/testing/selftests/net/netfilter/rpath.sh | 18 |
4 files changed, 59 insertions, 47 deletions
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 80a537ac26cd..4afa64c81304 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1103,7 +1103,6 @@ struct nft_rule_blob { * @name: name of the chain * @udlen: user data length * @udata: user data in the chain - * @rcu_head: rcu head for deferred release * @blob_next: rule blob pointer to the next in the chain */ struct nft_chain { @@ -1121,7 +1120,6 @@ struct nft_chain { char *name; u16 udlen; u8 *udata; - struct rcu_head rcu_head; /* Only used during control plane commit phase: */ struct nft_rule_blob *blob_next; @@ -1265,7 +1263,6 @@ static inline void nft_use_inc_restore(u32 *use) * @sets: sets in the table * @objects: stateful objects in the table * @flowtables: flow tables in the table - * @net: netnamespace this table belongs to * @hgenerator: handle generator state * @handle: table handle * @use: number of chain references to this table @@ -1285,7 +1282,6 @@ struct nft_table { struct list_head sets; struct list_head objects; struct list_head flowtables; - possible_net_t net; u64 hgenerator; u64 handle; u32 use; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 21b6f7410a1f..0b9f1e8dfe49 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1596,7 +1596,6 @@ static int nf_tables_newtable(struct sk_buff *skb, const struct nfnl_info *info, INIT_LIST_HEAD(&table->sets); INIT_LIST_HEAD(&table->objects); INIT_LIST_HEAD(&table->flowtables); - write_pnet(&table->net, net); table->family = family; table->flags = flags; table->handle = ++nft_net->table_handle; @@ -3987,8 +3986,11 @@ void nf_tables_rule_destroy(const struct nft_ctx *ctx, struct nft_rule *rule) kfree(rule); } +/* can only be used if rule is no longer visible to dumps */ static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *rule) { + lockdep_commit_lock_is_held(ctx->net); + nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_RELEASE); nf_tables_rule_destroy(ctx, rule); } @@ -5757,6 +5759,8 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding, enum nft_trans_phase phase) { + lockdep_commit_lock_is_held(ctx->net); + switch (phase) { case NFT_TRANS_PREPARE_ERROR: nft_set_trans_unbind(ctx, set); @@ -11695,19 +11699,6 @@ static void __nft_release_basechain_now(struct nft_ctx *ctx) nf_tables_chain_destroy(ctx->chain); } -static void nft_release_basechain_rcu(struct rcu_head *head) -{ - struct nft_chain *chain = container_of(head, struct nft_chain, rcu_head); - struct nft_ctx ctx = { - .family = chain->table->family, - .chain = chain, - .net = read_pnet(&chain->table->net), - }; - - __nft_release_basechain_now(&ctx); - put_net(ctx.net); -} - int __nft_release_basechain(struct nft_ctx *ctx) { struct nft_rule *rule; @@ -11722,11 +11713,18 @@ int __nft_release_basechain(struct nft_ctx *ctx) nft_chain_del(ctx->chain); nft_use_dec(&ctx->table->use); - if (maybe_get_net(ctx->net)) - call_rcu(&ctx->chain->rcu_head, nft_release_basechain_rcu); - else + if (!maybe_get_net(ctx->net)) { __nft_release_basechain_now(ctx); + return 0; + } + + /* wait for ruleset dumps to complete. Owning chain is no longer in + * lists, so new dumps can't find any of these rules anymore. + */ + synchronize_rcu(); + __nft_release_basechain_now(ctx); + put_net(ctx->net); return 0; } EXPORT_SYMBOL_GPL(__nft_release_basechain); diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index 85f017e37cfc..9f54819eb52c 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -407,21 +407,23 @@ static void idletimer_tg_destroy(const struct xt_tgdtor_param *par) mutex_lock(&list_mutex); - if (--info->timer->refcnt == 0) { - pr_debug("deleting timer %s\n", info->label); - - list_del(&info->timer->entry); - timer_shutdown_sync(&info->timer->timer); - cancel_work_sync(&info->timer->work); - sysfs_remove_file(idletimer_tg_kobj, &info->timer->attr.attr); - kfree(info->timer->attr.attr.name); - kfree(info->timer); - } else { + if (--info->timer->refcnt > 0) { pr_debug("decreased refcnt of timer %s to %u\n", info->label, info->timer->refcnt); + mutex_unlock(&list_mutex); + return; } + pr_debug("deleting timer %s\n", info->label); + + list_del(&info->timer->entry); mutex_unlock(&list_mutex); + + timer_shutdown_sync(&info->timer->timer); + cancel_work_sync(&info->timer->work); + sysfs_remove_file(idletimer_tg_kobj, &info->timer->attr.attr); + kfree(info->timer->attr.attr.name); + kfree(info->timer); } static void idletimer_tg_destroy_v1(const struct xt_tgdtor_param *par) @@ -432,25 +434,27 @@ static void idletimer_tg_destroy_v1(const struct xt_tgdtor_param *par) mutex_lock(&list_mutex); - if (--info->timer->refcnt == 0) { - pr_debug("deleting timer %s\n", info->label); - - list_del(&info->timer->entry); - if (info->timer->timer_type & XT_IDLETIMER_ALARM) { - alarm_cancel(&info->timer->alarm); - } else { - timer_shutdown_sync(&info->timer->timer); - } - cancel_work_sync(&info->timer->work); - sysfs_remove_file(idletimer_tg_kobj, &info->timer->attr.attr); - kfree(info->timer->attr.attr.name); - kfree(info->timer); - } else { + if (--info->timer->refcnt > 0) { pr_debug("decreased refcnt of timer %s to %u\n", info->label, info->timer->refcnt); + mutex_unlock(&list_mutex); + return; } + pr_debug("deleting timer %s\n", info->label); + + list_del(&info->timer->entry); mutex_unlock(&list_mutex); + + if (info->timer->timer_type & XT_IDLETIMER_ALARM) { + alarm_cancel(&info->timer->alarm); + } else { + timer_shutdown_sync(&info->timer->timer); + } + cancel_work_sync(&info->timer->work); + sysfs_remove_file(idletimer_tg_kobj, &info->timer->attr.attr); + kfree(info->timer->attr.attr.name); + kfree(info->timer); } diff --git a/tools/testing/selftests/net/netfilter/rpath.sh b/tools/testing/selftests/net/netfilter/rpath.sh index 4485fd7675ed..86ec4e68594d 100755 --- a/tools/testing/selftests/net/netfilter/rpath.sh +++ b/tools/testing/selftests/net/netfilter/rpath.sh @@ -61,9 +61,20 @@ ip -net "$ns2" a a 192.168.42.1/24 dev d0 ip -net "$ns1" a a fec0:42::2/64 dev v0 nodad ip -net "$ns2" a a fec0:42::1/64 dev d0 nodad +# avoid neighbor lookups and enable martian IPv6 pings +ns2_hwaddr=$(ip -net "$ns2" link show dev v0 | \ + sed -n 's, *link/ether \([^ ]*\) .*,\1,p') +ns1_hwaddr=$(ip -net "$ns1" link show dev v0 | \ + sed -n 's, *link/ether \([^ ]*\) .*,\1,p') +ip -net "$ns1" neigh add fec0:42::1 lladdr "$ns2_hwaddr" nud permanent dev v0 +ip -net "$ns1" neigh add fec0:23::1 lladdr "$ns2_hwaddr" nud permanent dev v0 +ip -net "$ns2" neigh add fec0:42::2 lladdr "$ns1_hwaddr" nud permanent dev d0 +ip -net "$ns2" neigh add fec0:23::2 lladdr "$ns1_hwaddr" nud permanent dev v0 + # firewall matches to test [ -n "$iptables" ] && { common='-t raw -A PREROUTING -s 192.168.0.0/16' + common+=' -p icmp --icmp-type echo-request' if ! ip netns exec "$ns2" "$iptables" $common -m rpfilter;then echo "Cannot add rpfilter rule" exit $ksft_skip @@ -72,6 +83,7 @@ ip -net "$ns2" a a fec0:42::1/64 dev d0 nodad } [ -n "$ip6tables" ] && { common='-t raw -A PREROUTING -s fec0::/16' + common+=' -p icmpv6 --icmpv6-type echo-request' if ! ip netns exec "$ns2" "$ip6tables" $common -m rpfilter;then echo "Cannot add rpfilter rule" exit $ksft_skip @@ -82,8 +94,10 @@ ip -net "$ns2" a a fec0:42::1/64 dev d0 nodad table inet t { chain c { type filter hook prerouting priority raw; - ip saddr 192.168.0.0/16 fib saddr . iif oif exists counter - ip6 saddr fec0::/16 fib saddr . iif oif exists counter + ip saddr 192.168.0.0/16 icmp type echo-request \ + fib saddr . iif oif exists counter + ip6 saddr fec0::/16 icmpv6 type echo-request \ + fib saddr . iif oif exists counter } } EOF |