summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2021-10-24 13:47:45 +0100
committerDavid S. Miller <davem@davemloft.net>2021-10-24 13:47:45 +0100
commit965e6b262f48257dbdb51b565ecfd84877a0ab5f (patch)
tree06380d14ee187dd4154682bc8950ae211c162418
parent4d98bb0d7ec2d0b417df6207b0bafe1868bad9f8 (diff)
parentedc90d15850c4812a6c72b004c9b81c37755b997 (diff)
Merge branch 'dsa-rtnl'
Vladimir Oltean says: ==================== Drop rtnl_lock from DSA .port_fdb_{add,del} As mentioned in the RFC posted 2 months ago: https://patchwork.kernel.org/project/netdevbpf/cover/20210824114049.3814660-1-vladimir.oltean@nxp.com/ DSA is transitioning to a driver API where the rtnl_lock is not held when calling ds->ops->port_fdb_add() and ds->ops->port_fdb_del(). Drivers cannot take that lock privately from those callbacks either. This change is required so that DSA can wait for switchdev FDB work items to finish before leaving the bridge. That change will be made in a future patch series. A small selftest is provided with the patch set in the hope that concurrency issues uncovered by this series, but not spotted by me by code inspection, will be caught. A status of the existing drivers: - mv88e6xxx_port_fdb_add() and mv88e6xxx_port_fdb_del() take mv88e6xxx_reg_lock() so they should be safe. - qca8k_fdb_add() and qca8k_fdb_del() take mutex_lock(&priv->reg_mutex) so they should be safe. - hellcreek_fdb_add() and hellcreek_fdb_add() take mutex_lock(&hellcreek->reg_lock) so they should be safe. - ksz9477_port_fdb_add() and ksz9477_port_fdb_del() take mutex_lock(&dev->alu_mutex) so they should be safe. - b53_fdb_add() and b53_fdb_del() did not have locking, so I've added a scheme based on my own judgement there (not tested). - felix_fdb_add() and felix_fdb_del() did not have locking, I've added and tested a locking scheme there. - mt7530_port_fdb_add() and mt7530_port_fdb_del() take mutex_lock(&priv->reg_mutex), so they should be safe. - gswip_port_fdb() did not have locking, so I've added a non-expert locking scheme based on my own judgement (not tested). - lan9303_alr_add_port() and lan9303_alr_del_port() take mutex_lock(&chip->alr_mutex) so they should be safe. - sja1105_fdb_add() and sja1105_fdb_del() did not have locking, I've added and tested a locking scheme. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--MAINTAINERS1
-rw-r--r--drivers/net/dsa/b53/b53_common.c40
-rw-r--r--drivers/net/dsa/b53/b53_priv.h1
-rw-r--r--drivers/net/dsa/lantiq_gswip.c28
-rw-r--r--drivers/net/dsa/sja1105/sja1105.h2
-rw-r--r--drivers/net/dsa/sja1105/sja1105_dynamic_config.c91
-rw-r--r--drivers/net/dsa/sja1105/sja1105_main.c1
-rw-r--r--drivers/net/ethernet/mscc/ocelot.c53
-rw-r--r--include/net/dsa.h1
-rw-r--r--include/soc/mscc/ocelot.h3
-rw-r--r--net/dsa/dsa2.c1
-rw-r--r--net/dsa/slave.c2
-rw-r--r--net/dsa/switch.c76
-rwxr-xr-xtools/testing/selftests/drivers/net/dsa/test_bridge_fdb_stress.sh47
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh10
15 files changed, 283 insertions, 74 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index c5aa142d4b3a..975086c5345d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13056,6 +13056,7 @@ F: include/linux/dsa/
F: include/linux/platform_data/dsa.h
F: include/net/dsa.h
F: net/dsa/
+F: tools/testing/selftests/drivers/net/dsa/
NETWORKING [GENERAL]
M: "David S. Miller" <davem@davemloft.net>
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 06279ba64cc8..9c80ca17b155 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -1546,6 +1546,7 @@ EXPORT_SYMBOL(b53_vlan_del);
/* Address Resolution Logic routines */
static int b53_arl_op_wait(struct b53_device *dev)
+ __must_hold(&dev->arl_mutex)
{
unsigned int timeout = 10;
u8 reg;
@@ -1564,6 +1565,7 @@ static int b53_arl_op_wait(struct b53_device *dev)
}
static int b53_arl_rw_op(struct b53_device *dev, unsigned int op)
+ __must_hold(&dev->arl_mutex)
{
u8 reg;
@@ -1587,6 +1589,7 @@ static int b53_arl_rw_op(struct b53_device *dev, unsigned int op)
static int b53_arl_read(struct b53_device *dev, u64 mac,
u16 vid, struct b53_arl_entry *ent, u8 *idx)
+ __must_hold(&dev->arl_mutex)
{
DECLARE_BITMAP(free_bins, B53_ARLTBL_MAX_BIN_ENTRIES);
unsigned int i;
@@ -1632,6 +1635,7 @@ static int b53_arl_read(struct b53_device *dev, u64 mac,
static int b53_arl_op(struct b53_device *dev, int op, int port,
const unsigned char *addr, u16 vid, bool is_valid)
+ __must_hold(&dev->arl_mutex)
{
struct b53_arl_entry ent;
u32 fwd_entry;
@@ -1709,6 +1713,7 @@ int b53_fdb_add(struct dsa_switch *ds, int port,
const unsigned char *addr, u16 vid)
{
struct b53_device *priv = ds->priv;
+ int ret;
/* 5325 and 5365 require some more massaging, but could
* be supported eventually
@@ -1716,7 +1721,11 @@ int b53_fdb_add(struct dsa_switch *ds, int port,
if (is5325(priv) || is5365(priv))
return -EOPNOTSUPP;
- return b53_arl_op(priv, 0, port, addr, vid, true);
+ mutex_lock(&priv->arl_mutex);
+ ret = b53_arl_op(priv, 0, port, addr, vid, true);
+ mutex_unlock(&priv->arl_mutex);
+
+ return ret;
}
EXPORT_SYMBOL(b53_fdb_add);
@@ -1724,12 +1733,18 @@ int b53_fdb_del(struct dsa_switch *ds, int port,
const unsigned char *addr, u16 vid)
{
struct b53_device *priv = ds->priv;
+ int ret;
- return b53_arl_op(priv, 0, port, addr, vid, false);
+ mutex_lock(&priv->arl_mutex);
+ ret = b53_arl_op(priv, 0, port, addr, vid, false);
+ mutex_unlock(&priv->arl_mutex);
+
+ return ret;
}
EXPORT_SYMBOL(b53_fdb_del);
static int b53_arl_search_wait(struct b53_device *dev)
+ __must_hold(&dev->arl_mutex)
{
unsigned int timeout = 1000;
u8 reg;
@@ -1750,6 +1765,7 @@ static int b53_arl_search_wait(struct b53_device *dev)
static void b53_arl_search_rd(struct b53_device *dev, u8 idx,
struct b53_arl_entry *ent)
+ __must_hold(&dev->arl_mutex)
{
u64 mac_vid;
u32 fwd_entry;
@@ -1782,6 +1798,8 @@ int b53_fdb_dump(struct dsa_switch *ds, int port,
int ret;
u8 reg;
+ mutex_lock(&priv->arl_mutex);
+
/* Start search operation */
reg = ARL_SRCH_STDN;
b53_write8(priv, B53_ARLIO_PAGE, B53_ARL_SRCH_CTL, reg);
@@ -1789,18 +1807,18 @@ int b53_fdb_dump(struct dsa_switch *ds, int port,
do {
ret = b53_arl_search_wait(priv);
if (ret)
- return ret;
+ break;
b53_arl_search_rd(priv, 0, &results[0]);
ret = b53_fdb_copy(port, &results[0], cb, data);
if (ret)
- return ret;
+ break;
if (priv->num_arl_bins > 2) {
b53_arl_search_rd(priv, 1, &results[1]);
ret = b53_fdb_copy(port, &results[1], cb, data);
if (ret)
- return ret;
+ break;
if (!results[0].is_valid && !results[1].is_valid)
break;
@@ -1808,6 +1826,8 @@ int b53_fdb_dump(struct dsa_switch *ds, int port,
} while (count++ < b53_max_arl_entries(priv) / 2);
+ mutex_unlock(&priv->arl_mutex);
+
return 0;
}
EXPORT_SYMBOL(b53_fdb_dump);
@@ -1816,6 +1836,7 @@ int b53_mdb_add(struct dsa_switch *ds, int port,
const struct switchdev_obj_port_mdb *mdb)
{
struct b53_device *priv = ds->priv;
+ int ret;
/* 5325 and 5365 require some more massaging, but could
* be supported eventually
@@ -1823,7 +1844,11 @@ int b53_mdb_add(struct dsa_switch *ds, int port,
if (is5325(priv) || is5365(priv))
return -EOPNOTSUPP;
- return b53_arl_op(priv, 0, port, mdb->addr, mdb->vid, true);
+ mutex_lock(&priv->arl_mutex);
+ ret = b53_arl_op(priv, 0, port, mdb->addr, mdb->vid, true);
+ mutex_unlock(&priv->arl_mutex);
+
+ return ret;
}
EXPORT_SYMBOL(b53_mdb_add);
@@ -1833,7 +1858,9 @@ int b53_mdb_del(struct dsa_switch *ds, int port,
struct b53_device *priv = ds->priv;
int ret;
+ mutex_lock(&priv->arl_mutex);
ret = b53_arl_op(priv, 0, port, mdb->addr, mdb->vid, false);
+ mutex_unlock(&priv->arl_mutex);
if (ret)
dev_err(ds->dev, "failed to delete MDB entry\n");
@@ -2670,6 +2697,7 @@ struct b53_device *b53_switch_alloc(struct device *base,
mutex_init(&dev->reg_mutex);
mutex_init(&dev->stats_mutex);
+ mutex_init(&dev->arl_mutex);
return dev;
}
diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h
index 544101e74bca..579da74ada64 100644
--- a/drivers/net/dsa/b53/b53_priv.h
+++ b/drivers/net/dsa/b53/b53_priv.h
@@ -107,6 +107,7 @@ struct b53_device {
struct mutex reg_mutex;
struct mutex stats_mutex;
+ struct mutex arl_mutex;
const struct b53_io_ops *ops;
/* chip specific data */
diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c
index dbd4486a173f..1a96df70d1e8 100644
--- a/drivers/net/dsa/lantiq_gswip.c
+++ b/drivers/net/dsa/lantiq_gswip.c
@@ -276,6 +276,7 @@ struct gswip_priv {
int num_gphy_fw;
struct gswip_gphy_fw *gphy_fw;
u32 port_vlan_filter;
+ struct mutex pce_table_lock;
};
struct gswip_pce_table_entry {
@@ -523,10 +524,14 @@ static int gswip_pce_table_entry_read(struct gswip_priv *priv,
u16 addr_mode = tbl->key_mode ? GSWIP_PCE_TBL_CTRL_OPMOD_KSRD :
GSWIP_PCE_TBL_CTRL_OPMOD_ADRD;
+ mutex_lock(&priv->pce_table_lock);
+
err = gswip_switch_r_timeout(priv, GSWIP_PCE_TBL_CTRL,
GSWIP_PCE_TBL_CTRL_BAS);
- if (err)
+ if (err) {
+ mutex_unlock(&priv->pce_table_lock);
return err;
+ }
gswip_switch_w(priv, tbl->index, GSWIP_PCE_TBL_ADDR);
gswip_switch_mask(priv, GSWIP_PCE_TBL_CTRL_ADDR_MASK |
@@ -536,8 +541,10 @@ static int gswip_pce_table_entry_read(struct gswip_priv *priv,
err = gswip_switch_r_timeout(priv, GSWIP_PCE_TBL_CTRL,
GSWIP_PCE_TBL_CTRL_BAS);
- if (err)
+ if (err) {
+ mutex_unlock(&priv->pce_table_lock);
return err;
+ }
for (i = 0; i < ARRAY_SIZE(tbl->key); i++)
tbl->key[i] = gswip_switch_r(priv, GSWIP_PCE_TBL_KEY(i));
@@ -553,6 +560,8 @@ static int gswip_pce_table_entry_read(struct gswip_priv *priv,
tbl->valid = !!(crtl & GSWIP_PCE_TBL_CTRL_VLD);
tbl->gmap = (crtl & GSWIP_PCE_TBL_CTRL_GMAP_MASK) >> 7;
+ mutex_unlock(&priv->pce_table_lock);
+
return 0;
}
@@ -565,10 +574,14 @@ static int gswip_pce_table_entry_write(struct gswip_priv *priv,
u16 addr_mode = tbl->key_mode ? GSWIP_PCE_TBL_CTRL_OPMOD_KSWR :
GSWIP_PCE_TBL_CTRL_OPMOD_ADWR;
+ mutex_lock(&priv->pce_table_lock);
+
err = gswip_switch_r_timeout(priv, GSWIP_PCE_TBL_CTRL,
GSWIP_PCE_TBL_CTRL_BAS);
- if (err)
+ if (err) {
+ mutex_unlock(&priv->pce_table_lock);
return err;
+ }
gswip_switch_w(priv, tbl->index, GSWIP_PCE_TBL_ADDR);
gswip_switch_mask(priv, GSWIP_PCE_TBL_CTRL_ADDR_MASK |
@@ -600,8 +613,12 @@ static int gswip_pce_table_entry_write(struct gswip_priv *priv,
crtl |= GSWIP_PCE_TBL_CTRL_BAS;
gswip_switch_w(priv, crtl, GSWIP_PCE_TBL_CTRL);
- return gswip_switch_r_timeout(priv, GSWIP_PCE_TBL_CTRL,
- GSWIP_PCE_TBL_CTRL_BAS);
+ err = gswip_switch_r_timeout(priv, GSWIP_PCE_TBL_CTRL,
+ GSWIP_PCE_TBL_CTRL_BAS);
+
+ mutex_unlock(&priv->pce_table_lock);
+
+ return err;
}
/* Add the LAN port into a bridge with the CPU port by
@@ -2106,6 +2123,7 @@ static int gswip_probe(struct platform_device *pdev)
priv->ds->priv = priv;
priv->ds->ops = priv->hw_info->ops;
priv->dev = dev;
+ mutex_init(&priv->pce_table_lock);
version = gswip_switch_r(priv, GSWIP_VERSION);
np = dev->of_node;
diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index 808419f3b808..21dba16af097 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -261,6 +261,8 @@ struct sja1105_private {
* the switch doesn't confuse them with one another.
*/
struct mutex mgmt_lock;
+ /* Serializes access to the dynamic config interface */
+ struct mutex dynamic_config_lock;
struct devlink_region **regions;
struct sja1105_cbs_entry *cbs;
struct mii_bus *mdio_base_t1;
diff --git a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
index f2049f52833c..7729d3f8b7f5 100644
--- a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
+++ b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
@@ -1170,6 +1170,56 @@ const struct sja1105_dynamic_table_ops sja1110_dyn_ops[BLK_IDX_MAX_DYN] = {
},
};
+#define SJA1105_DYNAMIC_CONFIG_SLEEP_US 10
+#define SJA1105_DYNAMIC_CONFIG_TIMEOUT_US 100000
+
+static int
+sja1105_dynamic_config_poll_valid(struct sja1105_private *priv,
+ struct sja1105_dyn_cmd *cmd,
+ const struct sja1105_dynamic_table_ops *ops)
+{
+ u8 packed_buf[SJA1105_MAX_DYN_CMD_SIZE] = {};
+ int rc;
+
+ /* We don't _need_ to read the full entry, just the command area which
+ * is a fixed SJA1105_SIZE_DYN_CMD. But our cmd_packing() API expects a
+ * buffer that contains the full entry too. Additionally, our API
+ * doesn't really know how many bytes into the buffer does the command
+ * area really begin. So just read back the whole entry.
+ */
+ rc = sja1105_xfer_buf(priv, SPI_READ, ops->addr, packed_buf,
+ ops->packed_size);
+ if (rc)
+ return rc;
+
+ /* Unpack the command structure, and return it to the caller in case it
+ * needs to perform further checks on it (VALIDENT).
+ */
+ memset(cmd, 0, sizeof(*cmd));
+ ops->cmd_packing(packed_buf, cmd, UNPACK);
+
+ /* Hardware hasn't cleared VALID => still working on it */
+ return cmd->valid ? -EAGAIN : 0;
+}
+
+/* Poll the dynamic config entry's control area until the hardware has
+ * cleared the VALID bit, which means we have confirmation that it has
+ * finished processing the command.
+ */
+static int
+sja1105_dynamic_config_wait_complete(struct sja1105_private *priv,
+ struct sja1105_dyn_cmd *cmd,
+ const struct sja1105_dynamic_table_ops *ops)
+{
+ int rc;
+
+ return read_poll_timeout(sja1105_dynamic_config_poll_valid,
+ rc, rc != -EAGAIN,
+ SJA1105_DYNAMIC_CONFIG_SLEEP_US,
+ SJA1105_DYNAMIC_CONFIG_TIMEOUT_US,
+ false, priv, cmd, ops);
+}
+
/* Provides read access to the settings through the dynamic interface
* of the switch.
* @blk_idx is used as key to select from the sja1105_dynamic_table_ops.
@@ -1196,7 +1246,6 @@ int sja1105_dynamic_config_read(struct sja1105_private *priv,
struct sja1105_dyn_cmd cmd = {0};
/* SPI payload buffer */
u8 packed_buf[SJA1105_MAX_DYN_CMD_SIZE] = {0};
- int retries = 3;
int rc;
if (blk_idx >= BLK_IDX_MAX_DYN)
@@ -1234,33 +1283,21 @@ int sja1105_dynamic_config_read(struct sja1105_private *priv,
ops->entry_packing(packed_buf, entry, PACK);
/* Send SPI write operation: read config table entry */
+ mutex_lock(&priv->dynamic_config_lock);
rc = sja1105_xfer_buf(priv, SPI_WRITE, ops->addr, packed_buf,
ops->packed_size);
- if (rc < 0)
+ if (rc < 0) {
+ mutex_unlock(&priv->dynamic_config_lock);
return rc;
+ }
- /* Loop until we have confirmation that hardware has finished
- * processing the command and has cleared the VALID field
- */
- do {
- memset(packed_buf, 0, ops->packed_size);
-
- /* Retrieve the read operation's result */
- rc = sja1105_xfer_buf(priv, SPI_READ, ops->addr, packed_buf,
- ops->packed_size);
- if (rc < 0)
- return rc;
-
- cmd = (struct sja1105_dyn_cmd) {0};
- ops->cmd_packing(packed_buf, &cmd, UNPACK);
-
- if (!cmd.valident && !(ops->access & OP_VALID_ANYWAY))
- return -ENOENT;
- cpu_relax();
- } while (cmd.valid && --retries);
+ rc = sja1105_dynamic_config_wait_complete(priv, &cmd, ops);
+ mutex_unlock(&priv->dynamic_config_lock);
+ if (rc < 0)
+ return rc;
- if (cmd.valid)
- return -ETIMEDOUT;
+ if (!cmd.valident && !(ops->access & OP_VALID_ANYWAY))
+ return -ENOENT;
/* Don't dereference possibly NULL pointer - maybe caller
* only wanted to see whether the entry existed or not.
@@ -1316,8 +1353,16 @@ int sja1105_dynamic_config_write(struct sja1105_private *priv,
ops->entry_packing(packed_buf, entry, PACK);
/* Send SPI write operation: read config table entry */
+ mutex_lock(&priv->dynamic_config_lock);
rc = sja1105_xfer_buf(priv, SPI_WRITE, ops->addr, packed_buf,
ops->packed_size);
+ if (rc < 0) {
+ mutex_unlock(&priv->dynamic_config_lock);
+ return rc;
+ }
+
+ rc = sja1105_dynamic_config_wait_complete(priv, &cmd, ops);
+ mutex_unlock(&priv->dynamic_config_lock);
if (rc < 0)
return rc;
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 70ece441b3b8..d6788a010024 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -3366,6 +3366,7 @@ static int sja1105_probe(struct spi_device *spi)
priv->ds = ds;
mutex_init(&priv->ptp_data.lock);
+ mutex_init(&priv->dynamic_config_lock);
mutex_init(&priv->mgmt_lock);
rc = sja1105_parse_dt(priv);
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 4e5ae687d2e2..33a4a9a17436 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -21,11 +21,13 @@ struct ocelot_mact_entry {
};
static inline u32 ocelot_mact_read_macaccess(struct ocelot *ocelot)
+ __must_hold(&ocelot->mact_lock)
{
return ocelot_read(ocelot, ANA_TABLES_MACACCESS);
}
static inline int ocelot_mact_wait_for_completion(struct ocelot *ocelot)
+ __must_hold(&ocelot->mact_lock)
{
u32 val;
@@ -39,6 +41,7 @@ static inline int ocelot_mact_wait_for_completion(struct ocelot *ocelot)
static void ocelot_mact_select(struct ocelot *ocelot,
const unsigned char mac[ETH_ALEN],
unsigned int vid)
+ __must_hold(&ocelot->mact_lock)
{
u32 macl = 0, mach = 0;
@@ -67,6 +70,7 @@ int ocelot_mact_learn(struct ocelot *ocelot, int port,
ANA_TABLES_MACACCESS_ENTRYTYPE(type) |
ANA_TABLES_MACACCESS_MAC_TABLE_CMD(MACACCESS_CMD_LEARN);
unsigned int mc_ports;
+ int err;
/* Set MAC_CPU_COPY if the CPU port is used by a multicast entry */
if (type == ENTRYTYPE_MACv4)
@@ -79,18 +83,28 @@ int ocelot_mact_learn(struct ocelot *ocelot, int port,
if (mc_ports & BIT(ocelot->num_phys_ports))
cmd |= ANA_TABLES_MACACCESS_MAC_CPU_COPY;
+ mutex_lock(&ocelot->mact_lock);
+
ocelot_mact_select(ocelot, mac, vid);
/* Issue a write command */
ocelot_write(ocelot, cmd, ANA_TABLES_MACACCESS);
- return ocelot_mact_wait_for_completion(ocelot);
+ err = ocelot_mact_wait_for_completion(ocelot);
+
+ mutex_unlock(&ocelot->mact_lock);
+
+ return err;
}
EXPORT_SYMBOL(ocelot_mact_learn);
int ocelot_mact_forget(struct ocelot *ocelot,
const unsigned char mac[ETH_ALEN], unsigned int vid)
{
+ int err;
+
+ mutex_lock(&ocelot->mact_lock);
+
ocelot_mact_select(ocelot, mac, vid);
/* Issue a forget command */
@@ -98,7 +112,11 @@ int ocelot_mact_forget(struct ocelot *ocelot,
ANA_TABLES_MACACCESS_MAC_TABLE_CMD(MACACCESS_CMD_FORGET),
ANA_TABLES_MACACCESS);
- return ocelot_mact_wait_for_completion(ocelot);
+ err = ocelot_mact_wait_for_completion(ocelot);
+
+ mutex_unlock(&ocelot->mact_lock);
+
+ return err;
}
EXPORT_SYMBOL(ocelot_mact_forget);
@@ -114,7 +132,9 @@ static void ocelot_mact_init(struct ocelot *ocelot)
| ANA_AGENCTRL_LEARN_IGNORE_VLAN,
ANA_AGENCTRL);
- /* Clear the MAC table */
+ /* Clear the MAC table. We are not concurrent with anyone, so
+ * holding &ocelot->mact_lock is pointless.
+ */
ocelot_write(ocelot, MACACCESS_CMD_INIT, ANA_TABLES_MACACCESS);
}
@@ -1172,6 +1192,7 @@ EXPORT_SYMBOL(ocelot_port_fdb_do_dump);
static int ocelot_mact_read(struct ocelot *ocelot, int port, int row, int col,
struct ocelot_mact_entry *entry)
+ __must_hold(&ocelot->mact_lock)
{
u32 val, dst, macl, mach;
char mac[ETH_ALEN];
@@ -1220,33 +1241,40 @@ static int ocelot_mact_read(struct ocelot *ocelot, int port, int row, int col,
int ocelot_fdb_dump(struct ocelot *ocelot, int port,
dsa_fdb_dump_cb_t *cb, void *data)
{
+ int err = 0;
int i, j;
+ /* We could take the lock just around ocelot_mact_read, but doing so
+ * thousands of times in a row seems rather pointless and inefficient.
+ */
+ mutex_lock(&ocelot->mact_lock);
+
/* Loop through all the mac tables entries. */
for (i = 0; i < ocelot->num_mact_rows; i++) {
for (j = 0; j < 4; j++) {
struct ocelot_mact_entry entry;
bool is_static;
- int ret;
- ret = ocelot_mact_read(ocelot, port, i, j, &entry);
+ err = ocelot_mact_read(ocelot, port, i, j, &entry);
/* If the entry is invalid (wrong port, invalid...),
* skip it.
*/
- if (ret == -EINVAL)
+ if (err == -EINVAL)
continue;
- else if (ret)
- return ret;
+ else if (err)
+ break;
is_static = (entry.type == ENTRYTYPE_LOCKED);
- ret = cb(entry.mac, entry.vid, is_static, data);
- if (ret)
- return ret;
+ err = cb(entry.mac, entry.vid, is_static, data);
+ if (err)
+ break;
}
}
- return 0;
+ mutex_unlock(&ocelot->mact_lock);
+
+ return err;
}
EXPORT_SYMBOL(ocelot_fdb_dump);
@@ -2231,6 +2259,7 @@ int ocelot_init(struct ocelot *ocelot)
mutex_init(&ocelot->stats_lock);
mutex_init(&ocelot->ptp_lock);
+ mutex_init(&ocelot->mact_lock);
spin_lock_init(&ocelot->ptp_clock_lock);
spin_lock_init(&ocelot->ts_id_lock);
snprintf(queue_name, sizeof(queue_name), "%s-stats",
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 1cd9c2461f0d..badd214f7470 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -287,6 +287,7 @@ struct dsa_port {
/* List of MAC addresses that must be forwarded on this port.
* These are only valid on CPU ports and DSA links.
*/
+ struct mutex addr_lists_lock;
struct list_head fdbs;
struct list_head mdbs;
diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 9b872da0c246..fef3a36b0210 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -675,6 +675,9 @@ struct ocelot {
struct delayed_work stats_work;
struct workqueue_struct *stats_queue;
+ /* Lock for serializing access to the MAC table */
+ struct mutex mact_lock;
+
struct workqueue_struct *owq;
u8 ptp:1;
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index f5270114dcb8..826957b6442b 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -433,6 +433,7 @@ static int dsa_port_setup(struct dsa_port *dp)
if (dp->setup)
return 0;
+ mutex_init(&dp->addr_lists_lock);
INIT_LIST_HEAD(&dp->fdbs);
INIT_LIST_HEAD(&dp->mdbs);
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 9d9fef668eba..adcfb2cb4e61 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -2413,7 +2413,6 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work)
dp = dsa_to_port(ds, switchdev_work->port);
- rtnl_lock();
switch (switchdev_work->event) {
case SWITCHDEV_FDB_ADD_TO_DEVICE:
if (switchdev_work->host_addr)
@@ -2448,7 +2447,6 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work)
break;
}
- rtnl_unlock();
dev_put(switchdev_work->dev);
kfree(switchdev_work);
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index 2b1b21bde830..6871e5f9b597 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -215,26 +215,30 @@ static int dsa_port_do_mdb_add(struct dsa_port *dp,
struct dsa_switch *ds = dp->ds;
struct dsa_mac_addr *a;
int port = dp->index;
- int err;
+ int err = 0;
/* No need to bother with refcounting for user ports */
if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
return ds->ops->port_mdb_add(ds, port, mdb);
+ mutex_lock(&dp->addr_lists_lock);
+
a = dsa_mac_addr_find(&dp->mdbs, mdb->addr, mdb->vid);
if (a) {
refcount_inc(&a->refcount);
- return 0;
+ goto out;
}
a = kzalloc(sizeof(*a), GFP_KERNEL);
- if (!a)
- return -ENOMEM;
+ if (!a) {
+ err = -ENOMEM;
+ goto out;
+ }
err = ds->ops->port_mdb_add(ds, port, mdb);
if (err) {
kfree(a);
- return err;
+ goto out;
}
ether_addr_copy(a->addr, mdb->addr);
@@ -242,7 +246,10 @@ static int dsa_port_do_mdb_add(struct dsa_port *dp,
refcount_set(&a->refcount, 1);
list_add_tail(&a->list, &dp->mdbs);
- return 0;
+out:
+ mutex_unlock(&dp->addr_lists_lock);
+
+ return err;
}
static int dsa_port_do_mdb_del(struct dsa_port *dp,
@@ -251,29 +258,36 @@ static int dsa_port_do_mdb_del(struct dsa_port *dp,
struct dsa_switch *ds = dp->ds;
struct dsa_mac_addr *a;
int port = dp->index;
- int err;
+ int err = 0;
/* No need to bother with refcounting for user ports */
if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
return ds->ops->port_mdb_del(ds, port, mdb);
+ mutex_lock(&dp->addr_lists_lock);
+
a = dsa_mac_addr_find(&dp->mdbs, mdb->addr, mdb->vid);
- if (!a)
- return -ENOENT;
+ if (!a) {
+ err = -ENOENT;
+ goto out;
+ }
if (!refcount_dec_and_test(&a->refcount))
- return 0;
+ goto out;
err = ds->ops->port_mdb_del(ds, port, mdb);
if (err) {
refcount_inc(&a->refcount);
- return err;
+ goto out;
}
list_del(&a->list);
kfree(a);
- return 0;
+out:
+ mutex_unlock(&dp->addr_lists_lock);
+
+ return err;
}
static int dsa_port_do_fdb_add(struct dsa_port *dp, const unsigned char *addr,
@@ -282,26 +296,30 @@ static int dsa_port_do_fdb_add(struct dsa_port *dp, const unsigned char *addr,
struct dsa_switch *ds = dp->ds;
struct dsa_mac_addr *a;
int port = dp->index;
- int err;
+ int err = 0;
/* No need to bother with refcounting for user ports */
if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
return ds->ops->port_fdb_add(ds, port, addr, vid);
+ mutex_lock(&dp->addr_lists_lock);
+
a = dsa_mac_addr_find(&dp->fdbs, addr, vid);
if (a) {
refcount_inc(&a->refcount);
- return 0;
+ goto out;
}
a = kzalloc(sizeof(*a), GFP_KERNEL);
- if (!a)
- return -ENOMEM;
+ if (!a) {
+ err = -ENOMEM;
+ goto out;
+ }
err = ds->ops->port_fdb_add(ds, port, addr, vid);
if (err) {
kfree(a);
- return err;
+ goto out;
}
ether_addr_copy(a->addr, addr);
@@ -309,7 +327,10 @@ static int dsa_port_do_fdb_add(struct dsa_port *dp, const unsigned char *addr,
refcount_set(&a->refcount, 1);
list_add_tail(&a->list, &dp->fdbs);
- return 0;
+out:
+ mutex_unlock(&dp->addr_lists_lock);
+
+ return err;
}
static int dsa_port_do_fdb_del(struct dsa_port *dp, const unsigned char *addr,
@@ -318,29 +339,36 @@ static int dsa_port_do_fdb_del(struct dsa_port *dp, const unsigned char *addr,
struct dsa_switch *ds = dp->ds;
struct dsa_mac_addr *a;
int port = dp->index;
- int err;
+ int err = 0;
/* No need to bother with refcounting for user ports */
if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
return ds->ops->port_fdb_del(ds, port, addr, vid);
+ mutex_lock(&dp->addr_lists_lock);
+
a = dsa_mac_addr_find(&dp->fdbs, addr, vid);
- if (!a)
- return -ENOENT;
+ if (!a) {
+ err = -ENOENT;
+ goto out;
+ }
if (!refcount_dec_and_test(&a->refcount))
- return 0;
+ goto out;
err = ds->ops->port_fdb_del(ds, port, addr, vid);
if (err) {
refcount_inc(&a->refcount);
- return err;
+ goto out;
}
list_del(&a->list);
kfree(a);
- return 0;
+out:
+ mutex_unlock(&dp->addr_lists_lock);
+
+ return err;
}
static int dsa_switch_host_fdb_add(struct dsa_switch *ds,
diff --git a/tools/testing/selftests/drivers/net/dsa/test_bridge_fdb_stress.sh b/tools/testing/selftests/drivers/net/dsa/test_bridge_fdb_stress.sh
new file mode 100755
index 000000000000..dca8be6092b9
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/test_bridge_fdb_stress.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Bridge FDB entries can be offloaded to DSA switches without holding the
+# rtnl_mutex. Traditionally this mutex has conferred drivers implicit
+# serialization, which means their code paths are not well tested in the
+# presence of concurrency.
+# This test creates a background task that stresses the FDB by adding and
+# deleting an entry many times in a row without the rtnl_mutex held.
+# It then tests the driver resistance to concurrency by calling .ndo_fdb_dump
+# (with rtnl_mutex held) from a foreground task.
+# Since either the FDB dump or the additions/removals can fail, but the
+# additions and removals are performed in deferred as opposed to process
+# context, we cannot simply check for user space error codes.
+
+WAIT_TIME=1
+NUM_NETIFS=1
+REQUIRE_JQ="no"
+REQUIRE_MZ="no"
+NETIF_CREATE="no"
+lib_dir=$(dirname $0)/../../../net/forwarding
+source $lib_dir/lib.sh
+
+cleanup() {
+ echo "Cleaning up"
+ kill $pid && wait $pid &> /dev/null
+ ip link del br0
+ echo "Please check kernel log for errors"
+}
+trap 'cleanup' EXIT
+
+eth=${NETIFS[p1]}
+
+ip link del br0 2&>1 >/dev/null || :
+ip link add br0 type bridge && ip link set $eth master br0
+
+(while :; do
+ bridge fdb add 00:01:02:03:04:05 dev $eth master static
+ bridge fdb del 00:01:02:03:04:05 dev $eth master static
+done) &
+pid=$!
+
+for i in $(seq 1 50); do
+ bridge fdb show > /dev/null
+ sleep 3
+ echo "$((${i} * 2))% complete..."
+done
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 92087d423bcf..520d8b53464b 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -23,6 +23,8 @@ MC_CLI=${MC_CLI:=smcroutectl}
PING_TIMEOUT=${PING_TIMEOUT:=5}
WAIT_TIMEOUT=${WAIT_TIMEOUT:=20}
INTERFACE_TIMEOUT=${INTERFACE_TIMEOUT:=600}
+REQUIRE_JQ=${REQUIRE_JQ:=yes}
+REQUIRE_MZ=${REQUIRE_MZ:=yes}
relative_path="${BASH_SOURCE%/*}"
if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then
@@ -141,8 +143,12 @@ require_command()
fi
}
-require_command jq
-require_command $MZ
+if [[ "$REQUIRE_JQ" = "yes" ]]; then
+ require_command jq
+fi
+if [[ "$REQUIRE_MZ" = "yes" ]]; then
+ require_command $MZ
+fi
if [[ ! -v NUM_NETIFS ]]; then
echo "SKIP: importer does not define \"NUM_NETIFS\""