summaryrefslogtreecommitdiff
path: root/net/bridge/br_switchdev.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/bridge/br_switchdev.c')
-rw-r--r--net/bridge/br_switchdev.c138
1 files changed, 98 insertions, 40 deletions
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index 7eb6fd5bb917..fe3f7bbe86ee 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -17,6 +17,9 @@ static bool nbp_switchdev_can_offload_tx_fwd(const struct net_bridge_port *p,
if (!static_branch_unlikely(&br_switchdev_tx_fwd_offload))
return false;
+ if (br_multicast_igmp_type(skb))
+ return false;
+
return (p->flags & BR_TX_FWD_OFFLOAD) &&
(p->hwdom != BR_INPUT_SKB_CB(skb)->src_hwdom);
}
@@ -104,9 +107,8 @@ int br_switchdev_set_port_flag(struct net_bridge_port *p,
return 0;
if (err) {
- if (extack && !extack->_msg)
- NL_SET_ERR_MSG_MOD(extack,
- "bridge flag offload is not supported");
+ NL_SET_ERR_MSG_WEAK_MOD(extack,
+ "bridge flag offload is not supported");
return -EOPNOTSUPP;
}
@@ -115,9 +117,8 @@ int br_switchdev_set_port_flag(struct net_bridge_port *p,
err = switchdev_port_attr_set(p->dev, &attr, extack);
if (err) {
- if (extack && !extack->_msg)
- NL_SET_ERR_MSG_MOD(extack,
- "error setting offload flag on port");
+ NL_SET_ERR_MSG_WEAK_MOD(extack,
+ "error setting offload flag on port");
return err;
}
@@ -150,6 +151,17 @@ br_switchdev_fdb_notify(struct net_bridge *br,
if (test_bit(BR_FDB_LOCKED, &fdb->flags))
return;
+ /* Entries with these flags were created using ndm_state == NUD_REACHABLE,
+ * ndm_flags == NTF_MASTER( | NTF_STICKY), ext_flags == 0 by something
+ * equivalent to 'bridge fdb add ... master dynamic (sticky)'.
+ * Drivers don't know how to deal with these, so don't notify them to
+ * avoid confusing them.
+ */
+ if (test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags) &&
+ !test_bit(BR_FDB_STATIC, &fdb->flags) &&
+ !test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags))
+ return;
+
br_switchdev_fdb_populate(br, &item, fdb, NULL);
switch (type) {
@@ -495,9 +507,10 @@ static void br_switchdev_mdb_complete(struct net_device *dev, int err, void *pri
struct net_bridge_mdb_entry *mp;
struct net_bridge_port *port = data->port;
struct net_bridge *br = port->br;
+ u8 old_flags;
- if (err)
- goto err;
+ if (err == -EOPNOTSUPP)
+ goto out_free;
spin_lock_bh(&br->multicast_lock);
mp = br_mdb_ip_get(br, &data->ip);
@@ -507,11 +520,15 @@ static void br_switchdev_mdb_complete(struct net_device *dev, int err, void *pri
pp = &p->next) {
if (p->key.port != port)
continue;
- p->flags |= MDB_PG_FLAGS_OFFLOAD;
+
+ old_flags = p->flags;
+ br_multicast_set_pg_offload_flags(p, !err);
+ if (br_mdb_should_notify(br, old_flags ^ p->flags))
+ br_mdb_flag_change_notify(br->dev, mp, p);
}
out:
spin_unlock_bh(&br->multicast_lock);
-err:
+out_free:
kfree(priv);
}
@@ -586,21 +603,40 @@ br_switchdev_mdb_replay_one(struct notifier_block *nb, struct net_device *dev,
}
static int br_switchdev_mdb_queue_one(struct list_head *mdb_list,
+ struct net_device *dev,
+ unsigned long action,
enum switchdev_obj_id id,
const struct net_bridge_mdb_entry *mp,
struct net_device *orig_dev)
{
- struct switchdev_obj_port_mdb *mdb;
+ struct switchdev_obj_port_mdb mdb = {
+ .obj = {
+ .id = id,
+ .orig_dev = orig_dev,
+ },
+ };
+ struct switchdev_obj_port_mdb *pmdb;
- mdb = kzalloc(sizeof(*mdb), GFP_ATOMIC);
- if (!mdb)
- return -ENOMEM;
+ br_switchdev_mdb_populate(&mdb, mp);
- mdb->obj.id = id;
- mdb->obj.orig_dev = orig_dev;
- br_switchdev_mdb_populate(mdb, mp);
- list_add_tail(&mdb->obj.list, mdb_list);
+ if (action == SWITCHDEV_PORT_OBJ_ADD &&
+ switchdev_port_obj_act_is_deferred(dev, action, &mdb.obj)) {
+ /* This event is already in the deferred queue of
+ * events, so this replay must be elided, lest the
+ * driver receives duplicate events for it. This can
+ * only happen when replaying additions, since
+ * modifications are always immediately visible in
+ * br->mdb_list, whereas actual event delivery may be
+ * delayed.
+ */
+ return 0;
+ }
+ pmdb = kmemdup(&mdb, sizeof(mdb), GFP_ATOMIC);
+ if (!pmdb)
+ return -ENOMEM;
+
+ list_add_tail(&pmdb->obj.list, mdb_list);
return 0;
}
@@ -668,56 +704,57 @@ br_switchdev_mdb_replay(struct net_device *br_dev, struct net_device *dev,
if (!br_opt_get(br, BROPT_MULTICAST_ENABLED))
return 0;
- /* We cannot walk over br->mdb_list protected just by the rtnl_mutex,
- * because the write-side protection is br->multicast_lock. But we
- * need to emulate the [ blocking ] calling context of a regular
- * switchdev event, so since both br->multicast_lock and RCU read side
- * critical sections are atomic, we have no choice but to pick the RCU
- * read side lock, queue up all our events, leave the critical section
- * and notify switchdev from blocking context.
+ if (adding)
+ action = SWITCHDEV_PORT_OBJ_ADD;
+ else
+ action = SWITCHDEV_PORT_OBJ_DEL;
+
+ /* br_switchdev_mdb_queue_one() will take care to not queue a
+ * replay of an event that is already pending in the switchdev
+ * deferred queue. In order to safely determine that, there
+ * must be no new deferred MDB notifications enqueued for the
+ * duration of the MDB scan. Therefore, grab the write-side
+ * lock to avoid racing with any concurrent IGMP/MLD snooping.
*/
- rcu_read_lock();
+ spin_lock_bh(&br->multicast_lock);
- hlist_for_each_entry_rcu(mp, &br->mdb_list, mdb_node) {
+ hlist_for_each_entry(mp, &br->mdb_list, mdb_node) {
struct net_bridge_port_group __rcu * const *pp;
const struct net_bridge_port_group *p;
if (mp->host_joined) {
- err = br_switchdev_mdb_queue_one(&mdb_list,
+ err = br_switchdev_mdb_queue_one(&mdb_list, dev, action,
SWITCHDEV_OBJ_ID_HOST_MDB,
mp, br_dev);
if (err) {
- rcu_read_unlock();
+ spin_unlock_bh(&br->multicast_lock);
goto out_free_mdb;
}
}
- for (pp = &mp->ports; (p = rcu_dereference(*pp)) != NULL;
+ for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL;
pp = &p->next) {
if (p->key.port->dev != dev)
continue;
- err = br_switchdev_mdb_queue_one(&mdb_list,
+ err = br_switchdev_mdb_queue_one(&mdb_list, dev, action,
SWITCHDEV_OBJ_ID_PORT_MDB,
mp, dev);
if (err) {
- rcu_read_unlock();
+ spin_unlock_bh(&br->multicast_lock);
goto out_free_mdb;
}
}
}
- rcu_read_unlock();
-
- if (adding)
- action = SWITCHDEV_PORT_OBJ_ADD;
- else
- action = SWITCHDEV_PORT_OBJ_DEL;
+ spin_unlock_bh(&br->multicast_lock);
list_for_each_entry(obj, &mdb_list, list) {
err = br_switchdev_mdb_replay_one(nb, dev,
SWITCHDEV_OBJ_PORT_MDB(obj),
action, ctx, extack);
+ if (err == -EOPNOTSUPP)
+ err = 0;
if (err)
goto out_free_mdb;
}
@@ -750,8 +787,10 @@ static int nbp_switchdev_sync_objs(struct net_bridge_port *p, const void *ctx,
err = br_switchdev_mdb_replay(br_dev, dev, ctx, true, blocking_nb,
extack);
- if (err && err != -EOPNOTSUPP)
+ if (err) {
+ /* -EOPNOTSUPP not propagated from MDB replay. */
return err;
+ }
err = br_switchdev_fdb_replay(br_dev, ctx, true, atomic_nb);
if (err && err != -EOPNOTSUPP)
@@ -773,6 +812,16 @@ static void nbp_switchdev_unsync_objs(struct net_bridge_port *p,
br_switchdev_mdb_replay(br_dev, dev, ctx, false, blocking_nb, NULL);
br_switchdev_vlan_replay(br_dev, ctx, false, blocking_nb, NULL);
+
+ /* Make sure that the device leaving this bridge has seen all
+ * relevant events before it is disassociated. In the normal
+ * case, when the device is directly attached to the bridge,
+ * this is covered by del_nbp(). If the association was indirect
+ * however, e.g. via a team or bond, and the device is leaving
+ * that intermediate device, then the bridge port remains in
+ * place.
+ */
+ switchdev_deferred_process();
}
/* Let the bridge know that this port is offloaded, so that it can assign a
@@ -788,7 +837,7 @@ int br_switchdev_port_offload(struct net_bridge_port *p,
struct netdev_phys_item_id ppid;
int err;
- err = dev_get_port_parent_id(dev, &ppid, false);
+ err = netif_get_port_parent_id(dev, &ppid, false);
if (err)
return err;
@@ -816,3 +865,12 @@ void br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx,
nbp_switchdev_del(p);
}
+
+int br_switchdev_port_replay(struct net_bridge_port *p,
+ struct net_device *dev, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb,
+ struct netlink_ext_ack *extack)
+{
+ return nbp_switchdev_sync_objs(p, ctx, atomic_nb, blocking_nb, extack);
+}