diff options
author | Jakub Kicinski <kuba@kernel.org> | 2023-10-17 17:39:03 -0700 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2023-10-17 17:39:04 -0700 |
commit | 99e79b677b9a4542260c6244982d0b5d12354983 (patch) | |
tree | 9e4a3066489fd1bdac504cbc689c85dd1c3239fb | |
parent | 56a7bb12c78ffa1b02e154b1d779ed2a1555fa3c (diff) | |
parent | 6f84090333bbff3473235a13eb99c308a53e3725 (diff) |
Merge branch 'bridge-add-a-limit-on-learned-fdb-entries'
Johannes Nixdorf says:
====================
bridge: Add a limit on learned FDB entries
Introduce a limit on the amount of learned FDB entries on a bridge,
configured by netlink with a build time default on bridge creation in
the kernel config.
For backwards compatibility the kernel config default is disabling the
limit (0).
Without any limit a malicious actor may OOM a kernel by spamming packets
with changing MAC addresses on their bridge port, so allow the bridge
creator to limit the number of entries.
Currently the manual entries are identified by the bridge flags
BR_FDB_LOCAL or BR_FDB_ADDED_BY_USER, atomically bundled under the new
flag BR_FDB_DYNAMIC_LEARNED. This means the limit also applies to
entries created with BR_FDB_ADDED_BY_EXT_LEARN but none of BR_FDB_LOCAL
or BR_FDB_ADDED_BY_USER, e.g. ones added by SWITCHDEV_FDB_ADD_TO_BRIDGE.
Link to the corresponding iproute2 changes:
https://lore.kernel.org/r/20230919-fdb_limit-v4-1-b4d2dc4df30f@avm.de
v4: https://lore.kernel.org/r/20230919-fdb_limit-v4-0-39f0293807b8@avm.de/
v3: https://lore.kernel.org/r/20230905-fdb_limit-v3-0-7597cd500a82@avm.de/
v2: https://lore.kernel.org/netdev/20230619071444.14625-1-jnixdorf-oss@avm.de/
v1: https://lore.kernel.org/netdev/20230515085046.4457-1-jnixdorf-oss@avm.de/
====================
Link: https://lore.kernel.org/r/20231016-fdb_limit-v5-0-32cddff87758@avm.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r-- | include/uapi/linux/if_link.h | 2 | ||||
-rw-r--r-- | net/bridge/br_fdb.c | 42 | ||||
-rw-r--r-- | net/bridge/br_netlink.c | 17 | ||||
-rw-r--r-- | net/bridge/br_private.h | 4 | ||||
-rw-r--r-- | tools/testing/selftests/net/forwarding/Makefile | 3 | ||||
-rwxr-xr-x | tools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh | 283 |
6 files changed, 344 insertions, 7 deletions
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index fac351a93aed..9f8a3da0f14f 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -510,6 +510,8 @@ enum { IFLA_BR_VLAN_STATS_PER_PORT, IFLA_BR_MULTI_BOOLOPT, IFLA_BR_MCAST_QUERIER_STATE, + IFLA_BR_FDB_N_LEARNED, + IFLA_BR_FDB_MAX_LEARNED, __IFLA_BR_MAX, }; diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index a98ad763b368..c622de5eccd0 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -329,11 +329,18 @@ static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f, hlist_del_init_rcu(&f->fdb_node); rhashtable_remove_fast(&br->fdb_hash_tbl, &f->rhnode, br_fdb_rht_params); + if (test_and_clear_bit(BR_FDB_DYNAMIC_LEARNED, &f->flags)) + atomic_dec(&br->fdb_n_learned); fdb_notify(br, f, RTM_DELNEIGH, swdev_notify); call_rcu(&f->rcu, fdb_rcu_free); } -/* Delete a local entry if no other port had the same address. */ +/* Delete a local entry if no other port had the same address. + * + * This function should only be called on entries with BR_FDB_LOCAL set, + * so even with BR_FDB_ADDED_BY_USER cleared we never need to increase + * the accounting for dynamically learned entries again. + */ static void fdb_delete_local(struct net_bridge *br, const struct net_bridge_port *p, struct net_bridge_fdb_entry *f) @@ -388,9 +395,20 @@ static struct net_bridge_fdb_entry *fdb_create(struct net_bridge *br, __u16 vid, unsigned long flags) { + bool learned = !test_bit(BR_FDB_ADDED_BY_USER, &flags) && + !test_bit(BR_FDB_LOCAL, &flags); + u32 max_learned = READ_ONCE(br->fdb_max_learned); struct net_bridge_fdb_entry *fdb; int err; + if (likely(learned)) { + int n_learned = atomic_read(&br->fdb_n_learned); + + if (unlikely(max_learned && n_learned >= max_learned)) + return NULL; + __set_bit(BR_FDB_DYNAMIC_LEARNED, &flags); + } + fdb = kmem_cache_alloc(br_fdb_cache, GFP_ATOMIC); if (!fdb) return NULL; @@ -407,6 +425,9 @@ static struct net_bridge_fdb_entry *fdb_create(struct net_bridge *br, return NULL; } + if (likely(learned)) + atomic_inc(&br->fdb_n_learned); + hlist_add_head_rcu(&fdb->fdb_node, &br->fdb_list); return fdb; @@ -912,8 +933,12 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, clear_bit(BR_FDB_LOCKED, &fdb->flags); } - if (unlikely(test_bit(BR_FDB_ADDED_BY_USER, &flags))) + if (unlikely(test_bit(BR_FDB_ADDED_BY_USER, &flags))) { set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags); + if (test_and_clear_bit(BR_FDB_DYNAMIC_LEARNED, + &fdb->flags)) + atomic_dec(&br->fdb_n_learned); + } if (unlikely(fdb_modified)) { trace_br_fdb_update(br, source, addr, vid, flags); fdb_notify(br, fdb, RTM_NEWNEIGH, true); @@ -1075,7 +1100,8 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source, if (!(flags & NLM_F_CREATE)) return -ENOENT; - fdb = fdb_create(br, source, addr, vid, 0); + fdb = fdb_create(br, source, addr, vid, + BIT(BR_FDB_ADDED_BY_USER)); if (!fdb) return -ENOMEM; @@ -1088,6 +1114,10 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source, WRITE_ONCE(fdb->dst, source); modified = true; } + + set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags); + if (test_and_clear_bit(BR_FDB_DYNAMIC_LEARNED, &fdb->flags)) + atomic_dec(&br->fdb_n_learned); } if (fdb_to_nud(br, fdb) != state) { @@ -1119,8 +1149,6 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source, if (fdb_handle_notify(fdb, notify)) modified = true; - set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags); - fdb->used = jiffies; if (modified) { if (refresh) @@ -1464,6 +1492,10 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, if (!p) set_bit(BR_FDB_LOCAL, &fdb->flags); + if ((swdev_notify || !p) && + test_and_clear_bit(BR_FDB_DYNAMIC_LEARNED, &fdb->flags)) + atomic_dec(&br->fdb_n_learned); + if (modified) fdb_notify(br, fdb, RTM_NEWNEIGH, swdev_notify); } diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 10f0d33d8ccf..5ad4abfcb7ba 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1229,6 +1229,8 @@ static size_t br_port_get_slave_size(const struct net_device *brdev, } static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = { + [IFLA_BR_UNSPEC] = { .strict_start_type = + IFLA_BR_FDB_N_LEARNED }, [IFLA_BR_FORWARD_DELAY] = { .type = NLA_U32 }, [IFLA_BR_HELLO_TIME] = { .type = NLA_U32 }, [IFLA_BR_MAX_AGE] = { .type = NLA_U32 }, @@ -1265,6 +1267,8 @@ static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = { [IFLA_BR_VLAN_STATS_PER_PORT] = { .type = NLA_U8 }, [IFLA_BR_MULTI_BOOLOPT] = NLA_POLICY_EXACT_LEN(sizeof(struct br_boolopt_multi)), + [IFLA_BR_FDB_N_LEARNED] = { .type = NLA_REJECT }, + [IFLA_BR_FDB_MAX_LEARNED] = { .type = NLA_U32 }, }; static int br_changelink(struct net_device *brdev, struct nlattr *tb[], @@ -1539,6 +1543,12 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], return err; } + if (data[IFLA_BR_FDB_MAX_LEARNED]) { + u32 val = nla_get_u32(data[IFLA_BR_FDB_MAX_LEARNED]); + + WRITE_ONCE(br->fdb_max_learned, val); + } + return 0; } @@ -1593,6 +1603,8 @@ static size_t br_get_size(const struct net_device *brdev) nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_TOPOLOGY_CHANGE_TIMER */ nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_GC_TIMER */ nla_total_size(ETH_ALEN) + /* IFLA_BR_GROUP_ADDR */ + nla_total_size(sizeof(u32)) + /* IFLA_BR_FDB_N_LEARNED */ + nla_total_size(sizeof(u32)) + /* IFLA_BR_FDB_MAX_LEARNED */ #ifdef CONFIG_BRIDGE_IGMP_SNOOPING nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_ROUTER */ nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_SNOOPING */ @@ -1668,7 +1680,10 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) nla_put_u8(skb, IFLA_BR_TOPOLOGY_CHANGE_DETECTED, br->topology_change_detected) || nla_put(skb, IFLA_BR_GROUP_ADDR, ETH_ALEN, br->group_addr) || - nla_put(skb, IFLA_BR_MULTI_BOOLOPT, sizeof(bm), &bm)) + nla_put(skb, IFLA_BR_MULTI_BOOLOPT, sizeof(bm), &bm) || + nla_put_u32(skb, IFLA_BR_FDB_N_LEARNED, + atomic_read(&br->fdb_n_learned)) || + nla_put_u32(skb, IFLA_BR_FDB_MAX_LEARNED, br->fdb_max_learned)) return -EMSGSIZE; #ifdef CONFIG_BRIDGE_VLAN_FILTERING diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index cbbe35278459..27a7a06660f3 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -274,6 +274,7 @@ enum { BR_FDB_NOTIFY, BR_FDB_NOTIFY_INACTIVE, BR_FDB_LOCKED, + BR_FDB_DYNAMIC_LEARNED, }; struct net_bridge_fdb_key { @@ -555,6 +556,9 @@ struct net_bridge { struct kobject *ifobj; u32 auto_cnt; + atomic_t fdb_n_learned; + u32 fdb_max_learned; + #ifdef CONFIG_NET_SWITCHDEV /* Counter used to make sure that hardware domains get unique * identifiers in case a bridge spans multiple switchdev instances. diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index 74e754e266c3..df593b7b3e6b 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0+ OR MIT -TEST_PROGS = bridge_igmp.sh \ +TEST_PROGS = bridge_fdb_learning_limit.sh \ + bridge_igmp.sh \ bridge_locked_port.sh \ bridge_mdb.sh \ bridge_mdb_host.sh \ diff --git a/tools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh b/tools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh new file mode 100755 index 000000000000..0760a34b7114 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh @@ -0,0 +1,283 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# ShellCheck incorrectly believes that most of the code here is unreachable +# because it's invoked by variable name following ALL_TESTS. +# +# shellcheck disable=SC2317 + +ALL_TESTS="check_accounting check_limit" +NUM_NETIFS=6 +source lib.sh + +TEST_MAC_BASE=de:ad:be:ef:42: + +NUM_PKTS=16 +FDB_LIMIT=8 + +FDB_TYPES=( + # name is counted? overrides learned? + 'learned 1 0' + 'static 0 1' + 'user 0 1' + 'extern_learn 0 1' + 'local 0 1' +) + +mac() +{ + printf "${TEST_MAC_BASE}%02x" "$1" +} + +H1_DEFAULT_MAC=$(mac 42) + +switch_create() +{ + ip link add dev br0 type bridge + + ip link set dev "$swp1" master br0 + ip link set dev "$swp2" master br0 + # swp3 is used to add local MACs, so do not add it to the bridge yet. + + # swp2 is only used for replying when learning on swp1, its MAC should not be learned. + ip link set dev "$swp2" type bridge_slave learning off + + ip link set dev br0 up + + ip link set dev "$swp1" up + ip link set dev "$swp2" up + ip link set dev "$swp3" up +} + +switch_destroy() +{ + ip link set dev "$swp3" down + ip link set dev "$swp2" down + ip link set dev "$swp1" down + + ip link del dev br0 +} + +h_create() +{ + ip link set "$h1" addr "$H1_DEFAULT_MAC" + + simple_if_init "$h1" 192.0.2.1/24 + simple_if_init "$h2" 192.0.2.2/24 +} + +h_destroy() +{ + simple_if_fini "$h1" 192.0.2.1/24 + simple_if_fini "$h2" 192.0.2.2/24 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + h2=${NETIFS[p3]} + swp2=${NETIFS[p4]} + + swp3=${NETIFS[p6]} + + vrf_prepare + + h_create + + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + + h_destroy + + vrf_cleanup +} + +fdb_get_n_learned() +{ + ip -d -j link show dev br0 type bridge | \ + jq '.[]["linkinfo"]["info_data"]["fdb_n_learned"]' +} + +fdb_get_n_mac() +{ + local mac=${1} + + bridge -j fdb show br br0 | \ + jq "map(select(.mac == \"${mac}\" and (has(\"vlan\") | not))) | length" +} + +fdb_fill_learned() +{ + local i + + for i in $(seq 1 "$NUM_PKTS"); do + fdb_add learned "$(mac "$i")" + done +} + +fdb_reset() +{ + bridge fdb flush dev br0 + + # Keep the default MAC address of h1 in the table. We set it to a different one when + # testing dynamic learning. + bridge fdb add "$H1_DEFAULT_MAC" dev "$swp1" master static use +} + +fdb_add() +{ + local type=$1 mac=$2 + + case "$type" in + learned) + ip link set "$h1" addr "$mac" + # Wait for a reply so we implicitly wait until after the forwarding + # code finished and the FDB entry was created. + PING_COUNT=1 ping_do "$h1" 192.0.2.2 + check_err $? "Failed to ping another bridge port" + ip link set "$h1" addr "$H1_DEFAULT_MAC" + ;; + local) + ip link set dev "$swp3" addr "$mac" && ip link set "$swp3" master br0 + ;; + static) + bridge fdb replace "$mac" dev "$swp1" master static + ;; + user) + bridge fdb replace "$mac" dev "$swp1" master static use + ;; + extern_learn) + bridge fdb replace "$mac" dev "$swp1" master extern_learn + ;; + esac + + check_err $? "Failed to add a FDB entry of type ${type}" +} + +fdb_del() +{ + local type=$1 mac=$2 + + case "$type" in + local) + ip link set "$swp3" nomaster + ;; + *) + bridge fdb del "$mac" dev "$swp1" master + ;; + esac + + check_err $? "Failed to remove a FDB entry of type ${type}" +} + +check_accounting_one_type() +{ + local type=$1 is_counted=$2 overrides_learned=$3 + shift 3 + RET=0 + + fdb_reset + fdb_add "$type" "$(mac 0)" + learned=$(fdb_get_n_learned) + [ "$learned" -ne "$is_counted" ] + check_fail $? "Inserted FDB type ${type}: Expected the count ${is_counted}, but got ${learned}" + + fdb_del "$type" "$(mac 0)" + learned=$(fdb_get_n_learned) + [ "$learned" -ne 0 ] + check_fail $? "Removed FDB type ${type}: Expected the count 0, but got ${learned}" + + if [ "$overrides_learned" -eq 1 ]; then + fdb_reset + fdb_add learned "$(mac 0)" + fdb_add "$type" "$(mac 0)" + learned=$(fdb_get_n_learned) + [ "$learned" -ne "$is_counted" ] + check_fail $? "Set a learned entry to FDB type ${type}: Expected the count ${is_counted}, but got ${learned}" + fdb_del "$type" "$(mac 0)" + fi + + log_test "FDB accounting interacting with FDB type ${type}" +} + +check_accounting() +{ + local type_args learned + RET=0 + + fdb_reset + learned=$(fdb_get_n_learned) + [ "$learned" -ne 0 ] + check_fail $? "Flushed the FDB table: Expected the count 0, but got ${learned}" + + fdb_fill_learned + sleep 1 + + learned=$(fdb_get_n_learned) + [ "$learned" -ne "$NUM_PKTS" ] + check_fail $? "Filled the FDB table: Expected the count ${NUM_PKTS}, but got ${learned}" + + log_test "FDB accounting" + + for type_args in "${FDB_TYPES[@]}"; do + # This is intentional use of word splitting. + # shellcheck disable=SC2086 + check_accounting_one_type $type_args + done +} + +check_limit_one_type() +{ + local type=$1 is_counted=$2 + local n_mac expected=$((1 - is_counted)) + RET=0 + + fdb_reset + fdb_fill_learned + + fdb_add "$type" "$(mac 0)" + n_mac=$(fdb_get_n_mac "$(mac 0)") + [ "$n_mac" -ne "$expected" ] + check_fail $? "Inserted FDB type ${type} at limit: Expected the count ${expected}, but got ${n_mac}" + + log_test "FDB limits interacting with FDB type ${type}" +} + +check_limit() +{ + local learned + RET=0 + + ip link set br0 type bridge fdb_max_learned "$FDB_LIMIT" + + fdb_reset + fdb_fill_learned + + learned=$(fdb_get_n_learned) + [ "$learned" -ne "$FDB_LIMIT" ] + check_fail $? "Filled the limited FDB table: Expected the count ${FDB_LIMIT}, but got ${learned}" + + log_test "FDB limits" + + for type_args in "${FDB_TYPES[@]}"; do + # This is intentional use of word splitting. + # shellcheck disable=SC2086 + check_limit_one_type $type_args + done +} + +trap cleanup EXIT + +setup_prepare + +tests_run + +exit $EXIT_STATUS |