summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/net/switchdev.h3
-rw-r--r--net/bridge/br_switchdev.c74
-rw-r--r--net/switchdev/switchdev.c73
3 files changed, 122 insertions, 28 deletions
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index a43062d4c734..8346b0d29542 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -308,6 +308,9 @@ void switchdev_deferred_process(void);
int switchdev_port_attr_set(struct net_device *dev,
const struct switchdev_attr *attr,
struct netlink_ext_ack *extack);
+bool switchdev_port_obj_act_is_deferred(struct net_device *dev,
+ enum switchdev_notifier_type nt,
+ const struct switchdev_obj *obj);
int switchdev_port_obj_add(struct net_device *dev,
const struct switchdev_obj *obj,
struct netlink_ext_ack *extack);
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index ee84e783e1df..6a7cb01f121c 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -595,21 +595,40 @@ br_switchdev_mdb_replay_one(struct notifier_block *nb, struct net_device *dev,
}
static int br_switchdev_mdb_queue_one(struct list_head *mdb_list,
+ struct net_device *dev,
+ unsigned long action,
enum switchdev_obj_id id,
const struct net_bridge_mdb_entry *mp,
struct net_device *orig_dev)
{
- struct switchdev_obj_port_mdb *mdb;
+ struct switchdev_obj_port_mdb mdb = {
+ .obj = {
+ .id = id,
+ .orig_dev = orig_dev,
+ },
+ };
+ struct switchdev_obj_port_mdb *pmdb;
- mdb = kzalloc(sizeof(*mdb), GFP_ATOMIC);
- if (!mdb)
- return -ENOMEM;
+ br_switchdev_mdb_populate(&mdb, mp);
- mdb->obj.id = id;
- mdb->obj.orig_dev = orig_dev;
- br_switchdev_mdb_populate(mdb, mp);
- list_add_tail(&mdb->obj.list, mdb_list);
+ if (action == SWITCHDEV_PORT_OBJ_ADD &&
+ switchdev_port_obj_act_is_deferred(dev, action, &mdb.obj)) {
+ /* This event is already in the deferred queue of
+ * events, so this replay must be elided, lest the
+ * driver receives duplicate events for it. This can
+ * only happen when replaying additions, since
+ * modifications are always immediately visible in
+ * br->mdb_list, whereas actual event delivery may be
+ * delayed.
+ */
+ return 0;
+ }
+
+ pmdb = kmemdup(&mdb, sizeof(mdb), GFP_ATOMIC);
+ if (!pmdb)
+ return -ENOMEM;
+ list_add_tail(&pmdb->obj.list, mdb_list);
return 0;
}
@@ -677,51 +696,50 @@ br_switchdev_mdb_replay(struct net_device *br_dev, struct net_device *dev,
if (!br_opt_get(br, BROPT_MULTICAST_ENABLED))
return 0;
- /* We cannot walk over br->mdb_list protected just by the rtnl_mutex,
- * because the write-side protection is br->multicast_lock. But we
- * need to emulate the [ blocking ] calling context of a regular
- * switchdev event, so since both br->multicast_lock and RCU read side
- * critical sections are atomic, we have no choice but to pick the RCU
- * read side lock, queue up all our events, leave the critical section
- * and notify switchdev from blocking context.
+ if (adding)
+ action = SWITCHDEV_PORT_OBJ_ADD;
+ else
+ action = SWITCHDEV_PORT_OBJ_DEL;
+
+ /* br_switchdev_mdb_queue_one() will take care to not queue a
+ * replay of an event that is already pending in the switchdev
+ * deferred queue. In order to safely determine that, there
+ * must be no new deferred MDB notifications enqueued for the
+ * duration of the MDB scan. Therefore, grab the write-side
+ * lock to avoid racing with any concurrent IGMP/MLD snooping.
*/
- rcu_read_lock();
+ spin_lock_bh(&br->multicast_lock);
- hlist_for_each_entry_rcu(mp, &br->mdb_list, mdb_node) {
+ hlist_for_each_entry(mp, &br->mdb_list, mdb_node) {
struct net_bridge_port_group __rcu * const *pp;
const struct net_bridge_port_group *p;
if (mp->host_joined) {
- err = br_switchdev_mdb_queue_one(&mdb_list,
+ err = br_switchdev_mdb_queue_one(&mdb_list, dev, action,
SWITCHDEV_OBJ_ID_HOST_MDB,
mp, br_dev);
if (err) {
- rcu_read_unlock();
+ spin_unlock_bh(&br->multicast_lock);
goto out_free_mdb;
}
}
- for (pp = &mp->ports; (p = rcu_dereference(*pp)) != NULL;
+ for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL;
pp = &p->next) {
if (p->key.port->dev != dev)
continue;
- err = br_switchdev_mdb_queue_one(&mdb_list,
+ err = br_switchdev_mdb_queue_one(&mdb_list, dev, action,
SWITCHDEV_OBJ_ID_PORT_MDB,
mp, dev);
if (err) {
- rcu_read_unlock();
+ spin_unlock_bh(&br->multicast_lock);
goto out_free_mdb;
}
}
}
- rcu_read_unlock();
-
- if (adding)
- action = SWITCHDEV_PORT_OBJ_ADD;
- else
- action = SWITCHDEV_PORT_OBJ_DEL;
+ spin_unlock_bh(&br->multicast_lock);
list_for_each_entry(obj, &mdb_list, list) {
err = br_switchdev_mdb_replay_one(nb, dev,
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 5b045284849e..c9189a970eec 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -19,6 +19,35 @@
#include <linux/rtnetlink.h>
#include <net/switchdev.h>
+static bool switchdev_obj_eq(const struct switchdev_obj *a,
+ const struct switchdev_obj *b)
+{
+ const struct switchdev_obj_port_vlan *va, *vb;
+ const struct switchdev_obj_port_mdb *ma, *mb;
+
+ if (a->id != b->id || a->orig_dev != b->orig_dev)
+ return false;
+
+ switch (a->id) {
+ case SWITCHDEV_OBJ_ID_PORT_VLAN:
+ va = SWITCHDEV_OBJ_PORT_VLAN(a);
+ vb = SWITCHDEV_OBJ_PORT_VLAN(b);
+ return va->flags == vb->flags &&
+ va->vid == vb->vid &&
+ va->changed == vb->changed;
+ case SWITCHDEV_OBJ_ID_PORT_MDB:
+ case SWITCHDEV_OBJ_ID_HOST_MDB:
+ ma = SWITCHDEV_OBJ_PORT_MDB(a);
+ mb = SWITCHDEV_OBJ_PORT_MDB(b);
+ return ma->vid == mb->vid &&
+ ether_addr_equal(ma->addr, mb->addr);
+ default:
+ break;
+ }
+
+ BUG();
+}
+
static LIST_HEAD(deferred);
static DEFINE_SPINLOCK(deferred_lock);
@@ -307,6 +336,50 @@ int switchdev_port_obj_del(struct net_device *dev,
}
EXPORT_SYMBOL_GPL(switchdev_port_obj_del);
+/**
+ * switchdev_port_obj_act_is_deferred - Is object action pending?
+ *
+ * @dev: port device
+ * @nt: type of action; add or delete
+ * @obj: object to test
+ *
+ * Returns true if a deferred item is pending, which is
+ * equivalent to the action @nt on an object @obj.
+ *
+ * rtnl_lock must be held.
+ */
+bool switchdev_port_obj_act_is_deferred(struct net_device *dev,
+ enum switchdev_notifier_type nt,
+ const struct switchdev_obj *obj)
+{
+ struct switchdev_deferred_item *dfitem;
+ bool found = false;
+
+ ASSERT_RTNL();
+
+ spin_lock_bh(&deferred_lock);
+
+ list_for_each_entry(dfitem, &deferred, list) {
+ if (dfitem->dev != dev)
+ continue;
+
+ if ((dfitem->func == switchdev_port_obj_add_deferred &&
+ nt == SWITCHDEV_PORT_OBJ_ADD) ||
+ (dfitem->func == switchdev_port_obj_del_deferred &&
+ nt == SWITCHDEV_PORT_OBJ_DEL)) {
+ if (switchdev_obj_eq((const void *)dfitem->data, obj)) {
+ found = true;
+ break;
+ }
+ }
+ }
+
+ spin_unlock_bh(&deferred_lock);
+
+ return found;
+}
+EXPORT_SYMBOL_GPL(switchdev_port_obj_act_is_deferred);
+
static ATOMIC_NOTIFIER_HEAD(switchdev_notif_chain);
static BLOCKING_NOTIFIER_HEAD(switchdev_blocking_notif_chain);