summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/infiniband/hw/mlx5/gsi.c2
-rw-r--r--drivers/infiniband/hw/mlx5/main.c1
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h1
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/dev.c49
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/devlink.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c25
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c173
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c537
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c129
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c28
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h3
-rw-r--r--include/linux/mlx5/driver.h5
22 files changed, 720 insertions, 302 deletions
diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c
index 3ad8f637c589..b804f2dd5628 100644
--- a/drivers/infiniband/hw/mlx5/gsi.c
+++ b/drivers/infiniband/hw/mlx5/gsi.c
@@ -100,7 +100,7 @@ int mlx5_ib_create_gsi(struct ib_pd *pd, struct mlx5_ib_qp *mqp,
port_type) == MLX5_CAP_PORT_TYPE_IB)
num_qps = pd->device->attrs.max_pkeys;
else if (dev->lag_active)
- num_qps = MLX5_MAX_PORTS;
+ num_qps = dev->lag_ports;
}
gsi = &mqp->gsi;
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 61aa196d6484..61a3b767262f 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -2991,6 +2991,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev)
}
dev->flow_db->lag_demux_ft = ft;
+ dev->lag_ports = mlx5_lag_get_num_ports(mdev);
dev->lag_active = true;
return 0;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 4f04bb55c4c6..8b3c83c0b70a 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -1131,6 +1131,7 @@ struct mlx5_ib_dev {
struct xarray sig_mrs;
struct mlx5_port_caps port_caps[MLX5_MAX_PORTS];
u16 pkey_table_len;
+ u8 lag_ports;
};
static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 3f467557d34e..fb8669c02546 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -3907,7 +3907,7 @@ static unsigned int get_tx_affinity_rr(struct mlx5_ib_dev *dev,
tx_port_affinity = &dev->port[port_num].roce.tx_port_affinity;
return (unsigned int)atomic_add_return(1, tx_port_affinity) %
- MLX5_MAX_PORTS + 1;
+ (dev->lag_active ? dev->lag_ports : MLX5_CAP_GEN(dev->mdev, num_lag_ports)) + 1;
}
static bool qp_supports_affinity(struct mlx5_ib_qp *qp)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 81620c25c77e..7895ed7cc285 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -14,7 +14,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o
mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
health.o mcg.o cq.o alloc.o port.o mr.o pd.o \
transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \
- fs_counters.o fs_ft_pool.o rl.o lag/lag.o dev.o events.o wq.o lib/gid.o \
+ fs_counters.o fs_ft_pool.o rl.o lag/debugfs.o lag/lag.o dev.o events.o wq.o lib/gid.o \
lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \
diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o \
fw_reset.o qos.o lib/tout.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
index ba6dad97e308..11f7c03ae81b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -555,12 +555,9 @@ static u32 mlx5_gen_pci_id(const struct mlx5_core_dev *dev)
PCI_SLOT(dev->pdev->devfn));
}
-static int next_phys_dev(struct device *dev, const void *data)
+static int _next_phys_dev(struct mlx5_core_dev *mdev,
+ const struct mlx5_core_dev *curr)
{
- struct mlx5_adev *madev = container_of(dev, struct mlx5_adev, adev.dev);
- struct mlx5_core_dev *mdev = madev->mdev;
- const struct mlx5_core_dev *curr = data;
-
if (!mlx5_core_is_pf(mdev))
return 0;
@@ -574,8 +571,30 @@ static int next_phys_dev(struct device *dev, const void *data)
return 1;
}
-/* Must be called with intf_mutex held */
-struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev)
+static int next_phys_dev(struct device *dev, const void *data)
+{
+ struct mlx5_adev *madev = container_of(dev, struct mlx5_adev, adev.dev);
+ struct mlx5_core_dev *mdev = madev->mdev;
+
+ return _next_phys_dev(mdev, data);
+}
+
+static int next_phys_dev_lag(struct device *dev, const void *data)
+{
+ struct mlx5_adev *madev = container_of(dev, struct mlx5_adev, adev.dev);
+ struct mlx5_core_dev *mdev = madev->mdev;
+
+ if (!MLX5_CAP_GEN(mdev, vport_group_manager) ||
+ !MLX5_CAP_GEN(mdev, lag_master) ||
+ (MLX5_CAP_GEN(mdev, num_lag_ports) > MLX5_MAX_PORTS ||
+ MLX5_CAP_GEN(mdev, num_lag_ports) <= 1))
+ return 0;
+
+ return _next_phys_dev(mdev, data);
+}
+
+static struct mlx5_core_dev *mlx5_get_next_dev(struct mlx5_core_dev *dev,
+ int (*match)(struct device *dev, const void *data))
{
struct auxiliary_device *adev;
struct mlx5_adev *madev;
@@ -583,7 +602,7 @@ struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev)
if (!mlx5_core_is_pf(dev))
return NULL;
- adev = auxiliary_find_device(NULL, dev, &next_phys_dev);
+ adev = auxiliary_find_device(NULL, dev, match);
if (!adev)
return NULL;
@@ -592,6 +611,20 @@ struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev)
return madev->mdev;
}
+/* Must be called with intf_mutex held */
+struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev)
+{
+ lockdep_assert_held(&mlx5_intf_mutex);
+ return mlx5_get_next_dev(dev, &next_phys_dev);
+}
+
+/* Must be called with intf_mutex held */
+struct mlx5_core_dev *mlx5_get_next_phys_dev_lag(struct mlx5_core_dev *dev)
+{
+ lockdep_assert_held(&mlx5_intf_mutex);
+ return mlx5_get_next_dev(dev, &next_phys_dev_lag);
+}
+
void mlx5_dev_list_lock(void)
{
mutex_lock(&mlx5_intf_mutex);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
index e8789e6d7e7b..f85166e587f2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -178,13 +178,13 @@ static int mlx5_devlink_reload_up(struct devlink *devlink, enum devlink_reload_a
*actions_performed = BIT(action);
switch (action) {
case DEVLINK_RELOAD_ACTION_DRIVER_REINIT:
- return mlx5_load_one(dev);
+ return mlx5_load_one(dev, false);
case DEVLINK_RELOAD_ACTION_FW_ACTIVATE:
if (limit == DEVLINK_RELOAD_LIMIT_NO_RESET)
break;
/* On fw_activate action, also driver is reloaded and reinit performed */
*actions_performed |= BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT);
- return mlx5_load_one(dev);
+ return mlx5_load_one(dev, false);
default:
/* Unsupported action should not get to this function */
WARN_ON(1);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 25f2d2717aaa..719ef26d23c0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1569,9 +1569,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
ida_init(&esw->offloads.vport_metadata_ida);
xa_init_flags(&esw->offloads.vhca_map, XA_FLAGS_ALLOC);
mutex_init(&esw->state_lock);
- lockdep_register_key(&esw->mode_lock_key);
init_rwsem(&esw->mode_lock);
- lockdep_set_class(&esw->mode_lock, &esw->mode_lock_key);
refcount_set(&esw->qos.refcnt, 0);
esw->enabled_vports = 0;
@@ -1615,7 +1613,6 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
esw->dev->priv.eswitch = NULL;
destroy_workqueue(esw->work_queue);
WARN_ON(refcount_read(&esw->qos.refcnt));
- lockdep_unregister_key(&esw->mode_lock_key);
mutex_destroy(&esw->state_lock);
WARN_ON(!xa_empty(&esw->offloads.vhca_map));
xa_destroy(&esw->offloads.vhca_map);
@@ -1893,17 +1890,6 @@ mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev)
}
EXPORT_SYMBOL(mlx5_eswitch_get_encap_mode);
-bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1)
-{
- if ((dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE &&
- dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE) ||
- (dev0->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS &&
- dev1->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS))
- return true;
-
- return false;
-}
-
bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0,
struct mlx5_core_dev *dev1)
{
@@ -2015,17 +2001,6 @@ void mlx5_esw_unlock(struct mlx5_eswitch *esw)
}
/**
- * mlx5_esw_lock() - Take write lock on esw mode lock
- * @esw: eswitch device.
- */
-void mlx5_esw_lock(struct mlx5_eswitch *esw)
-{
- if (!mlx5_esw_allowed(esw))
- return;
- down_write(&esw->mode_lock);
-}
-
-/**
* mlx5_eswitch_get_total_vports - Get total vports of the eswitch
*
* @dev: Pointer to core device
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index bac5160837c5..2754a732914d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -331,7 +331,6 @@ struct mlx5_eswitch {
u32 large_group_num;
} params;
struct blocking_notifier_head n_head;
- struct lock_class_key mode_lock_key;
};
void esw_offloads_disable(struct mlx5_eswitch *esw);
@@ -518,8 +517,6 @@ static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev
MLX5_CAP_ESW_FLOWTABLE_FDB(dev, push_vlan_2);
}
-bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0,
- struct mlx5_core_dev *dev1);
bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0,
struct mlx5_core_dev *dev1);
@@ -706,7 +703,6 @@ void mlx5_esw_get(struct mlx5_core_dev *dev);
void mlx5_esw_put(struct mlx5_core_dev *dev);
int mlx5_esw_try_lock(struct mlx5_eswitch *esw);
void mlx5_esw_unlock(struct mlx5_eswitch *esw);
-void mlx5_esw_lock(struct mlx5_eswitch *esw);
void esw_vport_change_handle_locked(struct mlx5_vport *vport);
@@ -724,7 +720,6 @@ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {}
static inline int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) { return 0; }
static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf) {}
-static inline bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { return true; }
static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; }
static inline
int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, u16 vport, int link_state) { return 0; }
@@ -733,9 +728,6 @@ static inline const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev)
return ERR_PTR(-EOPNOTSUPP);
}
-static inline void mlx5_esw_unlock(struct mlx5_eswitch *esw) { return; }
-static inline void mlx5_esw_lock(struct mlx5_eswitch *esw) { return; }
-
static inline struct mlx5_flow_handle *
esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag)
{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
index ca1aba845dd6..84df0d56a2b6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
@@ -148,7 +148,7 @@ static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev)
if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) {
complete(&fw_reset->done);
} else {
- mlx5_load_one(dev);
+ mlx5_load_one(dev, false);
devlink_remote_reload_actions_performed(priv_to_devlink(dev), 0,
BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) |
BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE));
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c
new file mode 100644
index 000000000000..443daf6e3d4b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c
@@ -0,0 +1,173 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include "lag.h"
+
+static char *get_str_mode_type(struct mlx5_lag *ldev)
+{
+ if (ldev->flags & MLX5_LAG_FLAG_ROCE)
+ return "roce";
+ if (ldev->flags & MLX5_LAG_FLAG_SRIOV)
+ return "switchdev";
+ if (ldev->flags & MLX5_LAG_FLAG_MULTIPATH)
+ return "multipath";
+
+ return NULL;
+}
+
+static int type_show(struct seq_file *file, void *priv)
+{
+ struct mlx5_core_dev *dev = file->private;
+ struct mlx5_lag *ldev;
+ char *mode = NULL;
+
+ ldev = dev->priv.lag;
+ mutex_lock(&ldev->lock);
+ if (__mlx5_lag_is_active(ldev))
+ mode = get_str_mode_type(ldev);
+ mutex_unlock(&ldev->lock);
+ if (!mode)
+ return -EINVAL;
+ seq_printf(file, "%s\n", mode);
+
+ return 0;
+}
+
+static int port_sel_mode_show(struct seq_file *file, void *priv)
+{
+ struct mlx5_core_dev *dev = file->private;
+ struct mlx5_lag *ldev;
+ int ret = 0;
+ char *mode;
+
+ ldev = dev->priv.lag;
+ mutex_lock(&ldev->lock);
+ if (__mlx5_lag_is_active(ldev))
+ mode = get_str_port_sel_mode(ldev->flags);
+ else
+ ret = -EINVAL;
+ mutex_unlock(&ldev->lock);
+ if (ret || !mode)
+ return ret;
+
+ seq_printf(file, "%s\n", mode);
+ return 0;
+}
+
+static int state_show(struct seq_file *file, void *priv)
+{
+ struct mlx5_core_dev *dev = file->private;
+ struct mlx5_lag *ldev;
+ bool active;
+
+ ldev = dev->priv.lag;
+ mutex_lock(&ldev->lock);
+ active = __mlx5_lag_is_active(ldev);
+ mutex_unlock(&ldev->lock);
+ seq_printf(file, "%s\n", active ? "active" : "disabled");
+ return 0;
+}
+
+static int flags_show(struct seq_file *file, void *priv)
+{
+ struct mlx5_core_dev *dev = file->private;
+ struct mlx5_lag *ldev;
+ bool shared_fdb;
+ bool lag_active;
+
+ ldev = dev->priv.lag;
+ mutex_lock(&ldev->lock);
+ lag_active = __mlx5_lag_is_active(ldev);
+ if (lag_active)
+ shared_fdb = ldev->shared_fdb;
+
+ mutex_unlock(&ldev->lock);
+ if (!lag_active)
+ return -EINVAL;
+
+ seq_printf(file, "%s:%s\n", "shared_fdb", shared_fdb ? "on" : "off");
+ return 0;
+}
+
+static int mapping_show(struct seq_file *file, void *priv)
+{
+ struct mlx5_core_dev *dev = file->private;
+ u8 ports[MLX5_MAX_PORTS] = {};
+ struct mlx5_lag *ldev;
+ bool hash = false;
+ bool lag_active;
+ int num_ports;
+ int i;
+
+ ldev = dev->priv.lag;
+ mutex_lock(&ldev->lock);
+ lag_active = __mlx5_lag_is_active(ldev);
+ if (lag_active) {
+ if (ldev->flags & MLX5_LAG_FLAG_HASH_BASED) {
+ mlx5_infer_tx_enabled(&ldev->tracker, ldev->ports, ports,
+ &num_ports);
+ hash = true;
+ } else {
+ for (i = 0; i < ldev->ports; i++)
+ ports[i] = ldev->v2p_map[i];
+ num_ports = ldev->ports;
+ }
+ }
+ mutex_unlock(&ldev->lock);
+ if (!lag_active)
+ return -EINVAL;
+
+ for (i = 0; i < num_ports; i++) {
+ if (hash)
+ seq_printf(file, "%d\n", ports[i] + 1);
+ else
+ seq_printf(file, "%d:%d\n", i + 1, ports[i]);
+ }
+
+ return 0;
+}
+
+static int members_show(struct seq_file *file, void *priv)
+{
+ struct mlx5_core_dev *dev = file->private;
+ struct mlx5_lag *ldev;
+ int i;
+
+ ldev = dev->priv.lag;
+ mutex_lock(&ldev->lock);
+ for (i = 0; i < ldev->ports; i++) {
+ if (!ldev->pf[i].dev)
+ continue;
+ seq_printf(file, "%s\n", dev_name(ldev->pf[i].dev->device));
+ }
+ mutex_unlock(&ldev->lock);
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(type);
+DEFINE_SHOW_ATTRIBUTE(port_sel_mode);
+DEFINE_SHOW_ATTRIBUTE(state);
+DEFINE_SHOW_ATTRIBUTE(flags);
+DEFINE_SHOW_ATTRIBUTE(mapping);
+DEFINE_SHOW_ATTRIBUTE(members);
+
+void mlx5_ldev_add_debugfs(struct mlx5_core_dev *dev)
+{
+ struct dentry *dbg;
+
+ dbg = debugfs_create_dir("lag", mlx5_debugfs_get_dev_root(dev));
+ dev->priv.dbg.lag_debugfs = dbg;
+
+ debugfs_create_file("type", 0444, dbg, dev, &type_fops);
+ debugfs_create_file("port_sel_mode", 0444, dbg, dev, &port_sel_mode_fops);
+ debugfs_create_file("state", 0444, dbg, dev, &state_fops);
+ debugfs_create_file("flags", 0444, dbg, dev, &flags_fops);
+ debugfs_create_file("mapping", 0444, dbg, dev, &mapping_fops);
+ debugfs_create_file("members", 0444, dbg, dev, &members_fops);
+}
+
+void mlx5_ldev_remove_debugfs(struct dentry *dbg)
+{
+ debugfs_remove_recursive(dbg);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
index 6cad3b72c133..b6dd9043061f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
@@ -53,8 +53,7 @@ enum {
*/
static DEFINE_SPINLOCK(lag_lock);
-static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
- u8 remap_port2, bool shared_fdb, u8 flags)
+static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 *ports, bool shared_fdb, u8 flags)
{
u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
@@ -63,8 +62,8 @@ static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
MLX5_SET(lagc, lag_ctx, fdb_selection_mode, shared_fdb);
if (!(flags & MLX5_LAG_FLAG_HASH_BASED)) {
- MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
- MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
+ MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]);
+ MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]);
} else {
MLX5_SET(lagc, lag_ctx, port_select_mode,
MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT);
@@ -73,8 +72,8 @@ static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
return mlx5_cmd_exec_in(dev, create_lag, in);
}
-static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 remap_port1,
- u8 remap_port2)
+static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 num_ports,
+ u8 *ports)
{
u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
@@ -82,8 +81,8 @@ static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 remap_port1,
MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
MLX5_SET(modify_lag_in, in, field_select, 0x1);
- MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
- MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
+ MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]);
+ MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]);
return mlx5_cmd_exec_in(dev, modify_lag, in);
}
@@ -108,6 +107,75 @@ int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
}
EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
+static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, u8 num_ports,
+ u8 *ports, int *num_disabled)
+{
+ int i;
+
+ *num_disabled = 0;
+ for (i = 0; i < num_ports; i++) {
+ if (!tracker->netdev_state[i].tx_enabled ||
+ !tracker->netdev_state[i].link_up)
+ ports[(*num_disabled)++] = i;
+ }
+}
+
+void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports,
+ u8 *ports, int *num_enabled)
+{
+ int i;
+
+ *num_enabled = 0;
+ for (i = 0; i < num_ports; i++) {
+ if (tracker->netdev_state[i].tx_enabled &&
+ tracker->netdev_state[i].link_up)
+ ports[(*num_enabled)++] = i;
+ }
+
+ if (*num_enabled == 0)
+ mlx5_infer_tx_disabled(tracker, num_ports, ports, num_enabled);
+}
+
+static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev,
+ struct mlx5_lag *ldev,
+ struct lag_tracker *tracker,
+ u8 flags)
+{
+ char buf[MLX5_MAX_PORTS * 10 + 1] = {};
+ u8 enabled_ports[MLX5_MAX_PORTS] = {};
+ int written = 0;
+ int num_enabled;
+ int idx;
+ int err;
+ int i;
+ int j;
+
+ if (flags & MLX5_LAG_FLAG_HASH_BASED) {
+ mlx5_infer_tx_enabled(tracker, ldev->ports, enabled_ports,
+ &num_enabled);
+ for (i = 0; i < num_enabled; i++) {
+ err = scnprintf(buf + written, 4, "%d, ", enabled_ports[i] + 1);
+ if (err != 3)
+ return;
+ written += err;
+ }
+ buf[written - 2] = 0;
+ mlx5_core_info(dev, "lag map active ports: %s\n", buf);
+ } else {
+ for (i = 0; i < ldev->ports; i++) {
+ for (j = 0; j < ldev->buckets; j++) {
+ idx = i * ldev->buckets + j;
+ err = scnprintf(buf + written, 10,
+ " port %d:%d", i + 1, ldev->v2p_map[idx]);
+ if (err != 9)
+ return;
+ written += err;
+ }
+ }
+ mlx5_core_info(dev, "lag map:%s\n", buf);
+ }
+}
+
static int mlx5_lag_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr);
static void mlx5_do_bond_work(struct work_struct *work);
@@ -121,6 +189,7 @@ static void mlx5_ldev_free(struct kref *ref)
mlx5_lag_mp_cleanup(ldev);
cancel_delayed_work_sync(&ldev->bond_work);
destroy_workqueue(ldev->wq);
+ mutex_destroy(&ldev->lock);
kfree(ldev);
}
@@ -150,6 +219,7 @@ static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
}
kref_init(&ldev->ref);
+ mutex_init(&ldev->lock);
INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
ldev->nb.notifier_call = mlx5_lag_netdev_event;
@@ -162,6 +232,8 @@ static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
if (err)
mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
err);
+ ldev->ports = MLX5_CAP_GEN(dev, num_lag_ports);
+ ldev->buckets = 1;
return ldev;
}
@@ -171,7 +243,7 @@ int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
{
int i;
- for (i = 0; i < MLX5_MAX_PORTS; i++)
+ for (i = 0; i < ldev->ports; i++)
if (ldev->pf[i].netdev == ndev)
return i;
@@ -188,39 +260,72 @@ static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
return !!(ldev->flags & MLX5_LAG_FLAG_SRIOV);
}
+/* Create a mapping between steering slots and active ports.
+ * As we have ldev->buckets slots per port first assume the native
+ * mapping should be used.
+ * If there are ports that are disabled fill the relevant slots
+ * with mapping that points to active ports.
+ */
static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
- u8 *port1, u8 *port2)
+ u8 num_ports,
+ u8 buckets,
+ u8 *ports)
{
- bool p1en;
- bool p2en;
+ int disabled[MLX5_MAX_PORTS] = {};
+ int enabled[MLX5_MAX_PORTS] = {};
+ int disabled_ports_num = 0;
+ int enabled_ports_num = 0;
+ int idx;
+ u32 rand;
+ int i;
+ int j;
- p1en = tracker->netdev_state[MLX5_LAG_P1].tx_enabled &&
- tracker->netdev_state[MLX5_LAG_P1].link_up;
+ for (i = 0; i < num_ports; i++) {
+ if (tracker->netdev_state[i].tx_enabled &&
+ tracker->netdev_state[i].link_up)
+ enabled[enabled_ports_num++] = i;
+ else
+ disabled[disabled_ports_num++] = i;
+ }
- p2en = tracker->netdev_state[MLX5_LAG_P2].tx_enabled &&
- tracker->netdev_state[MLX5_LAG_P2].link_up;
+ /* Use native mapping by default where each port's buckets
+ * point the native port: 1 1 1 .. 1 2 2 2 ... 2 3 3 3 ... 3 etc
+ */
+ for (i = 0; i < num_ports; i++)
+ for (j = 0; j < buckets; j++) {
+ idx = i * buckets + j;
+ ports[idx] = MLX5_LAG_EGRESS_PORT_1 + i;
+ }
- *port1 = MLX5_LAG_EGRESS_PORT_1;
- *port2 = MLX5_LAG_EGRESS_PORT_2;
- if ((!p1en && !p2en) || (p1en && p2en))
+ /* If all ports are disabled/enabled keep native mapping */
+ if (enabled_ports_num == num_ports ||
+ disabled_ports_num == num_ports)
return;
- if (p1en)
- *port2 = MLX5_LAG_EGRESS_PORT_1;
- else
- *port1 = MLX5_LAG_EGRESS_PORT_2;
+ /* Go over the disabled ports and for each assign a random active port */
+ for (i = 0; i < disabled_ports_num; i++) {
+ for (j = 0; j < buckets; j++) {
+ get_random_bytes(&rand, 4);
+ ports[disabled[i] * buckets + j] = enabled[rand % enabled_ports_num] + 1;
+ }
+ }
}
static bool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev)
{
- return ldev->pf[MLX5_LAG_P1].has_drop || ldev->pf[MLX5_LAG_P2].has_drop;
+ int i;
+
+ for (i = 0; i < ldev->ports; i++)
+ if (ldev->pf[i].has_drop)
+ return true;
+ return false;
}
static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev)
{
int i;
- for (i = 0; i < MLX5_MAX_PORTS; i++) {
+ for (i = 0; i < ldev->ports; i++) {
if (!ldev->pf[i].has_drop)
continue;
@@ -233,12 +338,12 @@ static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev)
static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev,
struct lag_tracker *tracker)
{
- struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
- struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
- struct mlx5_core_dev *inactive;
- u8 v2p_port1, v2p_port2;
- int inactive_idx;
+ u8 disabled_ports[MLX5_MAX_PORTS] = {};
+ struct mlx5_core_dev *dev;
+ int disabled_index;
+ int num_disabled;
int err;
+ int i;
/* First delete the current drop rule so there won't be any dropped
* packets
@@ -248,58 +353,60 @@ static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev,
if (!ldev->tracker.has_inactive)
return;
- mlx5_infer_tx_affinity_mapping(tracker, &v2p_port1, &v2p_port2);
+ mlx5_infer_tx_disabled(tracker, ldev->ports, disabled_ports, &num_disabled);
- if (v2p_port1 == MLX5_LAG_EGRESS_PORT_1) {
- inactive = dev1;
- inactive_idx = MLX5_LAG_P2;
- } else {
- inactive = dev0;
- inactive_idx = MLX5_LAG_P1;
+ for (i = 0; i < num_disabled; i++) {
+ disabled_index = disabled_ports[i];
+ dev = ldev->pf[disabled_index].dev;
+ err = mlx5_esw_acl_ingress_vport_drop_rule_create(dev->priv.eswitch,
+ MLX5_VPORT_UPLINK);
+ if (!err)
+ ldev->pf[disabled_index].has_drop = true;
+ else
+ mlx5_core_err(dev,
+ "Failed to create lag drop rule, error: %d", err);
}
-
- err = mlx5_esw_acl_ingress_vport_drop_rule_create(inactive->priv.eswitch,
- MLX5_VPORT_UPLINK);
- if (!err)
- ldev->pf[inactive_idx].has_drop = true;
- else
- mlx5_core_err(inactive,
- "Failed to create lag drop rule, error: %d", err);
}
-static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 v2p_port1, u8 v2p_port2)
+static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports)
{
struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
if (ldev->flags & MLX5_LAG_FLAG_HASH_BASED)
- return mlx5_lag_port_sel_modify(ldev, v2p_port1, v2p_port2);
- return mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2);
+ return mlx5_lag_port_sel_modify(ldev, ports);
+ return mlx5_cmd_modify_lag(dev0, ldev->ports, ports);
}
void mlx5_modify_lag(struct mlx5_lag *ldev,
struct lag_tracker *tracker)
{
+ u8 ports[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS] = {};
struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
- u8 v2p_port1, v2p_port2;
+ int idx;
int err;
+ int i;
+ int j;
- mlx5_infer_tx_affinity_mapping(tracker, &v2p_port1,
- &v2p_port2);
+ mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ports);
- if (v2p_port1 != ldev->v2p_map[MLX5_LAG_P1] ||
- v2p_port2 != ldev->v2p_map[MLX5_LAG_P2]) {
- err = _mlx5_modify_lag(ldev, v2p_port1, v2p_port2);
- if (err) {
- mlx5_core_err(dev0,
- "Failed to modify LAG (%d)\n",
- err);
- return;
+ for (i = 0; i < ldev->ports; i++) {
+ for (j = 0; j < ldev->buckets; j++) {
+ idx = i * ldev->buckets + j;
+ if (ports[idx] == ldev->v2p_map[idx])
+ continue;
+ err = _mlx5_modify_lag(ldev, ports);
+ if (err) {
+ mlx5_core_err(dev0,
+ "Failed to modify LAG (%d)\n",
+ err);
+ return;
+ }
+ memcpy(ldev->v2p_map, ports, sizeof(ports));
+
+ mlx5_lag_print_mapping(dev0, ldev, tracker,
+ ldev->flags);
+ break;
}
- ldev->v2p_map[MLX5_LAG_P1] = v2p_port1;
- ldev->v2p_map[MLX5_LAG_P2] = v2p_port2;
- mlx5_core_info(dev0, "modify lag map port 1:%d port 2:%d",
- ldev->v2p_map[MLX5_LAG_P1],
- ldev->v2p_map[MLX5_LAG_P2]);
}
if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
@@ -307,20 +414,47 @@ void mlx5_modify_lag(struct mlx5_lag *ldev,
mlx5_lag_drop_rule_setup(ldev, tracker);
}
-static void mlx5_lag_set_port_sel_mode(struct mlx5_lag *ldev,
- struct lag_tracker *tracker, u8 *flags)
+#define MLX5_LAG_ROCE_HASH_PORTS_SUPPORTED 4
+static int mlx5_lag_set_port_sel_mode_roce(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker, u8 *flags)
{
- bool roce_lag = !!(*flags & MLX5_LAG_FLAG_ROCE);
struct lag_func *dev0 = &ldev->pf[MLX5_LAG_P1];
- if (roce_lag ||
- !MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table) ||
- tracker->tx_type != NETDEV_LAG_TX_TYPE_HASH)
- return;
- *flags |= MLX5_LAG_FLAG_HASH_BASED;
+ if (ldev->ports == MLX5_LAG_ROCE_HASH_PORTS_SUPPORTED) {
+ /* Four ports are support only in hash mode */
+ if (!MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table))
+ return -EINVAL;
+ *flags |= MLX5_LAG_FLAG_HASH_BASED;
+ if (ldev->ports > 2)
+ ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS;
+ }
+
+ return 0;
+}
+
+static int mlx5_lag_set_port_sel_mode_offloads(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker, u8 *flags)
+{
+ struct lag_func *dev0 = &ldev->pf[MLX5_LAG_P1];
+
+ if (MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table) &&
+ tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH)
+ *flags |= MLX5_LAG_FLAG_HASH_BASED;
+
+ return 0;
+}
+
+static int mlx5_lag_set_port_sel_mode(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker, u8 *flags)
+{
+ bool roce_lag = !!(*flags & MLX5_LAG_FLAG_ROCE);
+
+ if (roce_lag)
+ return mlx5_lag_set_port_sel_mode_roce(ldev, tracker, flags);
+ return mlx5_lag_set_port_sel_mode_offloads(ldev, tracker, flags);
}
-static char *get_str_port_sel_mode(u8 flags)
+char *get_str_port_sel_mode(u8 flags)
{
if (flags & MLX5_LAG_FLAG_HASH_BASED)
return "hash";
@@ -336,12 +470,11 @@ static int mlx5_create_lag(struct mlx5_lag *ldev,
u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
int err;
- mlx5_core_info(dev0, "lag map port 1:%d port 2:%d shared_fdb:%d mode:%s",
- ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2],
+ mlx5_lag_print_mapping(dev0, ldev, tracker, flags);
+ mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n",
shared_fdb, get_str_port_sel_mode(flags));
- err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[MLX5_LAG_P1],
- ldev->v2p_map[MLX5_LAG_P2], shared_fdb, flags);
+ err = mlx5_cmd_create_lag(dev0, ldev->v2p_map, shared_fdb, flags);
if (err) {
mlx5_core_err(dev0,
"Failed to create LAG (%d)\n",
@@ -377,13 +510,15 @@ int mlx5_activate_lag(struct mlx5_lag *ldev,
struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
int err;
- mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[MLX5_LAG_P1],
- &ldev->v2p_map[MLX5_LAG_P2]);
- mlx5_lag_set_port_sel_mode(ldev, tracker, &flags);
+ err = mlx5_lag_set_port_sel_mode(ldev, tracker, &flags);
+ if (err)
+ return err;
+
+ mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ldev->v2p_map);
+
if (flags & MLX5_LAG_FLAG_HASH_BASED) {
err = mlx5_lag_port_sel_create(ldev, tracker->hash_type,
- ldev->v2p_map[MLX5_LAG_P1],
- ldev->v2p_map[MLX5_LAG_P2]);
+ ldev->v2p_map);
if (err) {
mlx5_core_err(dev0,
"Failed to create LAG port selection(%d)\n",
@@ -455,25 +590,43 @@ static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
return 0;
}
+#define MLX5_LAG_OFFLOADS_SUPPORTED_PORTS 2
static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
{
- if (!ldev->pf[MLX5_LAG_P1].dev || !ldev->pf[MLX5_LAG_P2].dev)
- return false;
+#ifdef CONFIG_MLX5_ESWITCH
+ u8 mode;
+#endif
+ int i;
+
+ for (i = 0; i < ldev->ports; i++)
+ if (!ldev->pf[i].dev)
+ return false;
#ifdef CONFIG_MLX5_ESWITCH
- return mlx5_esw_lag_prereq(ldev->pf[MLX5_LAG_P1].dev,
- ldev->pf[MLX5_LAG_P2].dev);
+ mode = mlx5_eswitch_mode(ldev->pf[MLX5_LAG_P1].dev);
+
+ if (mode != MLX5_ESWITCH_NONE && mode != MLX5_ESWITCH_OFFLOADS)
+ return false;
+
+ for (i = 0; i < ldev->ports; i++)
+ if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode)
+ return false;
+
+ if (mode == MLX5_ESWITCH_OFFLOADS && ldev->ports != MLX5_LAG_OFFLOADS_SUPPORTED_PORTS)
+ return false;
#else
- return (!mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P1].dev) &&
- !mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P2].dev));
+ for (i = 0; i < ldev->ports; i++)
+ if (mlx5_sriov_is_enabled(ldev->pf[i].dev))
+ return false;
#endif
+ return true;
}
static void mlx5_lag_add_devices(struct mlx5_lag *ldev)
{
int i;
- for (i = 0; i < MLX5_MAX_PORTS; i++) {
+ for (i = 0; i < ldev->ports; i++) {
if (!ldev->pf[i].dev)
continue;
@@ -490,7 +643,7 @@ static void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
{
int i;
- for (i = 0; i < MLX5_MAX_PORTS; i++) {
+ for (i = 0; i < ldev->ports; i++) {
if (!ldev->pf[i].dev)
continue;
@@ -510,6 +663,7 @@ static void mlx5_disable_lag(struct mlx5_lag *ldev)
bool shared_fdb = ldev->shared_fdb;
bool roce_lag;
int err;
+ int i;
roce_lag = __mlx5_lag_is_roce(ldev);
@@ -520,7 +674,8 @@ static void mlx5_disable_lag(struct mlx5_lag *ldev)
dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
mlx5_rescan_drivers_locked(dev0);
}
- mlx5_nic_vport_disable_roce(dev1);
+ for (i = 1; i < ldev->ports; i++)
+ mlx5_nic_vport_disable_roce(ldev->pf[i].dev);
}
err = mlx5_deactivate_lag(ldev);
@@ -557,6 +712,23 @@ static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev)
return false;
}
+static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev)
+{
+ bool roce_lag = true;
+ int i;
+
+ for (i = 0; i < ldev->ports; i++)
+ roce_lag = roce_lag && !mlx5_sriov_is_enabled(ldev->pf[i].dev);
+
+#ifdef CONFIG_MLX5_ESWITCH
+ for (i = 0; i < ldev->ports; i++)
+ roce_lag = roce_lag &&
+ ldev->pf[i].dev->priv.eswitch->mode == MLX5_ESWITCH_NONE;
+#endif
+
+ return roce_lag;
+}
+
static void mlx5_do_bond(struct mlx5_lag *ldev)
{
struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
@@ -564,6 +736,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
struct lag_tracker tracker;
bool do_bond, roce_lag;
int err;
+ int i;
if (!mlx5_lag_is_ready(ldev)) {
do_bond = false;
@@ -580,14 +753,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
if (do_bond && !__mlx5_lag_is_active(ldev)) {
bool shared_fdb = mlx5_shared_fdb_supported(ldev);
- roce_lag = !mlx5_sriov_is_enabled(dev0) &&
- !mlx5_sriov_is_enabled(dev1);
-
-#ifdef CONFIG_MLX5_ESWITCH
- roce_lag = roce_lag &&
- dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE &&
- dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE;
-#endif
+ roce_lag = mlx5_lag_is_roce_lag(ldev);
if (shared_fdb || roce_lag)
mlx5_lag_remove_devices(ldev);
@@ -604,7 +770,8 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
} else if (roce_lag) {
dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
mlx5_rescan_drivers_locked(dev0);
- mlx5_nic_vport_enable_roce(dev1);
+ for (i = 1; i < ldev->ports; i++)
+ mlx5_nic_vport_enable_roce(ldev->pf[i].dev);
} else if (shared_fdb) {
dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
mlx5_rescan_drivers_locked(dev0);
@@ -636,31 +803,11 @@ static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
}
-static void mlx5_lag_lock_eswitches(struct mlx5_core_dev *dev0,
- struct mlx5_core_dev *dev1)
-{
- if (dev0)
- mlx5_esw_lock(dev0->priv.eswitch);
- if (dev1)
- mlx5_esw_lock(dev1->priv.eswitch);
-}
-
-static void mlx5_lag_unlock_eswitches(struct mlx5_core_dev *dev0,
- struct mlx5_core_dev *dev1)
-{
- if (dev1)
- mlx5_esw_unlock(dev1->priv.eswitch);
- if (dev0)
- mlx5_esw_unlock(dev0->priv.eswitch);
-}
-
static void mlx5_do_bond_work(struct work_struct *work)
{
struct delayed_work *delayed_work = to_delayed_work(work);
struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
bond_work);
- struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
- struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
int status;
status = mlx5_dev_list_trylock();
@@ -669,15 +816,16 @@ static void mlx5_do_bond_work(struct work_struct *work)
return;
}
+ mutex_lock(&ldev->lock);
if (ldev->mode_changes_in_progress) {
+ mutex_unlock(&ldev->lock);
mlx5_dev_list_unlock();
mlx5_queue_bond_work(ldev, HZ);
return;
}
- mlx5_lag_lock_eswitches(dev0, dev1);
mlx5_do_bond(ldev);
- mlx5_lag_unlock_eswitches(dev0, dev1);
+ mutex_unlock(&ldev->lock);
mlx5_dev_list_unlock();
}
@@ -691,7 +839,7 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
bool is_bonded, is_in_lag, mode_supported;
bool has_inactive = 0;
struct slave *slave;
- int bond_status = 0;
+ u8 bond_status = 0;
int num_slaves = 0;
int changed = 0;
int idx;
@@ -722,7 +870,7 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
rcu_read_unlock();
/* None of this lagdev's netdevs are slaves of this master. */
- if (!(bond_status & 0x3))
+ if (!(bond_status & GENMASK(ldev->ports - 1, 0)))
return 0;
if (lag_upper_info) {
@@ -735,7 +883,8 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
* A device is considered bonded if both its physical ports are slaves
* of the same lag master, and only them.
*/
- is_in_lag = num_slaves == MLX5_MAX_PORTS && bond_status == 0x3;
+ is_in_lag = num_slaves == ldev->ports &&
+ bond_status == GENMASK(ldev->ports - 1, 0);
/* Lag mode must be activebackup or hash. */
mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP ||
@@ -864,7 +1013,7 @@ static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev,
{
unsigned int fn = mlx5_get_dev_index(dev);
- if (fn >= MLX5_MAX_PORTS)
+ if (fn >= ldev->ports)
return;
spin_lock(&lag_lock);
@@ -880,7 +1029,7 @@ static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev,
int i;
spin_lock(&lag_lock);
- for (i = 0; i < MLX5_MAX_PORTS; i++) {
+ for (i = 0; i < ldev->ports; i++) {
if (ldev->pf[i].netdev == netdev) {
ldev->pf[i].netdev = NULL;
break;
@@ -894,24 +1043,23 @@ static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
{
unsigned int fn = mlx5_get_dev_index(dev);
- if (fn >= MLX5_MAX_PORTS)
+ if (fn >= ldev->ports)
return;
ldev->pf[fn].dev = dev;
dev->priv.lag = ldev;
}
-/* Must be called with intf_mutex held */
static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
struct mlx5_core_dev *dev)
{
int i;
- for (i = 0; i < MLX5_MAX_PORTS; i++)
+ for (i = 0; i < ldev->ports; i++)
if (ldev->pf[i].dev == dev)
break;
- if (i == MLX5_MAX_PORTS)
+ if (i == ldev->ports)
return;
ldev->pf[i].dev = NULL;
@@ -924,12 +1072,7 @@ static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
struct mlx5_lag *ldev = NULL;
struct mlx5_core_dev *tmp_dev;
- if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
- !MLX5_CAP_GEN(dev, lag_master) ||
- MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS)
- return 0;
-
- tmp_dev = mlx5_get_next_phys_dev(dev);
+ tmp_dev = mlx5_get_next_phys_dev_lag(dev);
if (tmp_dev)
ldev = tmp_dev->priv.lag;
@@ -939,13 +1082,18 @@ static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
mlx5_core_err(dev, "Failed to alloc lag dev\n");
return 0;
}
- } else {
- if (ldev->mode_changes_in_progress)
- return -EAGAIN;
- mlx5_ldev_get(ldev);
+ mlx5_ldev_add_mdev(ldev, dev);
+ return 0;
}
+ mutex_lock(&ldev->lock);
+ if (ldev->mode_changes_in_progress) {
+ mutex_unlock(&ldev->lock);
+ return -EAGAIN;
+ }
+ mlx5_ldev_get(ldev);
mlx5_ldev_add_mdev(ldev, dev);
+ mutex_unlock(&ldev->lock);
return 0;
}
@@ -958,15 +1106,19 @@ void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
if (!ldev)
return;
+ /* mdev is being removed, might as well remove debugfs
+ * as early as possible.
+ */
+ mlx5_ldev_remove_debugfs(dev->priv.dbg.lag_debugfs);
recheck:
- mlx5_dev_list_lock();
+ mutex_lock(&ldev->lock);
if (ldev->mode_changes_in_progress) {
- mlx5_dev_list_unlock();
+ mutex_unlock(&ldev->lock);
msleep(100);
goto recheck;
}
mlx5_ldev_remove_mdev(ldev, dev);
- mlx5_dev_list_unlock();
+ mutex_unlock(&ldev->lock);
mlx5_ldev_put(ldev);
}
@@ -974,35 +1126,45 @@ void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
{
int err;
+ if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
+ !MLX5_CAP_GEN(dev, lag_master) ||
+ (MLX5_CAP_GEN(dev, num_lag_ports) > MLX5_MAX_PORTS ||
+ MLX5_CAP_GEN(dev, num_lag_ports) <= 1))
+ return;
+
recheck:
mlx5_dev_list_lock();
err = __mlx5_lag_dev_add_mdev(dev);
+ mlx5_dev_list_unlock();
+
if (err) {
- mlx5_dev_list_unlock();
msleep(100);
goto recheck;
}
- mlx5_dev_list_unlock();
+ mlx5_ldev_add_debugfs(dev);
}
-/* Must be called with intf_mutex held */
void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev,
struct net_device *netdev)
{
struct mlx5_lag *ldev;
+ bool lag_is_active;
ldev = mlx5_lag_dev(dev);
if (!ldev)
return;
+ mutex_lock(&ldev->lock);
mlx5_ldev_remove_netdev(ldev, netdev);
ldev->flags &= ~MLX5_LAG_FLAG_READY;
- if (__mlx5_lag_is_active(ldev))
+ lag_is_active = __mlx5_lag_is_active(ldev);
+ mutex_unlock(&ldev->lock);
+
+ if (lag_is_active)
mlx5_queue_bond_work(ldev, 0);
}
-/* Must be called with intf_mutex held */
void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
struct net_device *netdev)
{
@@ -1013,14 +1175,16 @@ void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
if (!ldev)
return;
+ mutex_lock(&ldev->lock);
mlx5_ldev_add_netdev(ldev, dev, netdev);
- for (i = 0; i < MLX5_MAX_PORTS; i++)
+ for (i = 0; i < ldev->ports; i++)
if (!ldev->pf[i].dev)
break;
- if (i >= MLX5_MAX_PORTS)
+ if (i >= ldev->ports)
ldev->flags |= MLX5_LAG_FLAG_READY;
+ mutex_unlock(&ldev->lock);
mlx5_queue_bond_work(ldev, 0);
}
@@ -1097,8 +1261,6 @@ EXPORT_SYMBOL(mlx5_lag_is_shared_fdb);
void mlx5_lag_disable_change(struct mlx5_core_dev *dev)
{
- struct mlx5_core_dev *dev0;
- struct mlx5_core_dev *dev1;
struct mlx5_lag *ldev;
ldev = mlx5_lag_dev(dev);
@@ -1106,16 +1268,13 @@ void mlx5_lag_disable_change(struct mlx5_core_dev *dev)
return;
mlx5_dev_list_lock();
-
- dev0 = ldev->pf[MLX5_LAG_P1].dev;
- dev1 = ldev->pf[MLX5_LAG_P2].dev;
+ mutex_lock(&ldev->lock);
ldev->mode_changes_in_progress++;
- if (__mlx5_lag_is_active(ldev)) {
- mlx5_lag_lock_eswitches(dev0, dev1);
+ if (__mlx5_lag_is_active(ldev))
mlx5_disable_lag(ldev);
- mlx5_lag_unlock_eswitches(dev0, dev1);
- }
+
+ mutex_unlock(&ldev->lock);
mlx5_dev_list_unlock();
}
@@ -1127,9 +1286,9 @@ void mlx5_lag_enable_change(struct mlx5_core_dev *dev)
if (!ldev)
return;
- mlx5_dev_list_lock();
+ mutex_lock(&ldev->lock);
ldev->mode_changes_in_progress--;
- mlx5_dev_list_unlock();
+ mutex_unlock(&ldev->lock);
mlx5_queue_bond_work(ldev, 0);
}
@@ -1137,6 +1296,7 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
{
struct net_device *ndev = NULL;
struct mlx5_lag *ldev;
+ int i;
spin_lock(&lag_lock);
ldev = mlx5_lag_dev(dev);
@@ -1145,9 +1305,11 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
goto unlock;
if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
- ndev = ldev->tracker.netdev_state[MLX5_LAG_P1].tx_enabled ?
- ldev->pf[MLX5_LAG_P1].netdev :
- ldev->pf[MLX5_LAG_P2].netdev;
+ for (i = 0; i < ldev->ports; i++)
+ if (ldev->tracker.netdev_state[i].tx_enabled)
+ ndev = ldev->pf[i].netdev;
+ if (!ndev)
+ ndev = ldev->pf[ldev->ports - 1].netdev;
} else {
ndev = ldev->pf[MLX5_LAG_P1].netdev;
}
@@ -1166,18 +1328,21 @@ u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
{
struct mlx5_lag *ldev;
u8 port = 0;
+ int i;
spin_lock(&lag_lock);
ldev = mlx5_lag_dev(dev);
if (!(ldev && __mlx5_lag_is_roce(ldev)))
goto unlock;
- if (ldev->pf[MLX5_LAG_P1].netdev == slave)
- port = MLX5_LAG_P1;
- else
- port = MLX5_LAG_P2;
+ for (i = 0; i < ldev->ports; i++) {
+ if (ldev->pf[MLX5_LAG_P1].netdev == slave) {
+ port = i;
+ break;
+ }
+ }
- port = ldev->v2p_map[port];
+ port = ldev->v2p_map[port * ldev->buckets];
unlock:
spin_unlock(&lag_lock);
@@ -1185,6 +1350,18 @@ unlock:
}
EXPORT_SYMBOL(mlx5_lag_get_slave_port);
+u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev;
+
+ ldev = mlx5_lag_dev(dev);
+ if (!ldev)
+ return 0;
+
+ return ldev->ports;
+}
+EXPORT_SYMBOL(mlx5_lag_get_num_ports);
+
struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev)
{
struct mlx5_core_dev *peer_dev = NULL;
@@ -1211,7 +1388,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
size_t *offsets)
{
int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
- struct mlx5_core_dev *mdev[MLX5_MAX_PORTS];
+ struct mlx5_core_dev **mdev;
struct mlx5_lag *ldev;
int num_ports;
int ret, i, j;
@@ -1221,14 +1398,20 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
if (!out)
return -ENOMEM;
+ mdev = kvzalloc(sizeof(mdev[0]) * MLX5_MAX_PORTS, GFP_KERNEL);
+ if (!mdev) {
+ ret = -ENOMEM;
+ goto free_out;
+ }
+
memset(values, 0, sizeof(*values) * num_counters);
spin_lock(&lag_lock);
ldev = mlx5_lag_dev(dev);
if (ldev && __mlx5_lag_is_active(ldev)) {
- num_ports = MLX5_MAX_PORTS;
- mdev[MLX5_LAG_P1] = ldev->pf[MLX5_LAG_P1].dev;
- mdev[MLX5_LAG_P2] = ldev->pf[MLX5_LAG_P2].dev;
+ num_ports = ldev->ports;
+ for (i = 0; i < ldev->ports; i++)
+ mdev[i] = ldev->pf[i].dev;
} else {
num_ports = 1;
mdev[MLX5_LAG_P1] = dev;
@@ -1243,13 +1426,15 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in,
out);
if (ret)
- goto free;
+ goto free_mdev;
for (j = 0; j < num_counters; ++j)
values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
}
-free:
+free_mdev:
+ kvfree(mdev);
+free_out:
kvfree(out);
return ret;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
index cbf9a9003e55..46683b84ff84 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
@@ -4,6 +4,9 @@
#ifndef __MLX5_LAG_H__
#define __MLX5_LAG_H__
+#include <linux/debugfs.h>
+
+#define MLX5_LAG_MAX_HASH_BUCKETS 16
#include "mlx5_core.h"
#include "mp.h"
#include "port_sel.h"
@@ -45,9 +48,11 @@ struct lag_tracker {
*/
struct mlx5_lag {
u8 flags;
+ u8 ports;
+ u8 buckets;
int mode_changes_in_progress;
bool shared_fdb;
- u8 v2p_map[MLX5_MAX_PORTS];
+ u8 v2p_map[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS];
struct kref ref;
struct lag_func pf[MLX5_MAX_PORTS];
struct lag_tracker tracker;
@@ -56,6 +61,8 @@ struct mlx5_lag {
struct notifier_block nb;
struct lag_mp lag_mp;
struct mlx5_lag_port_sel port_sel;
+ /* Protect lag fields/state changes */
+ struct mutex lock;
};
static inline struct mlx5_lag *
@@ -85,4 +92,11 @@ int mlx5_activate_lag(struct mlx5_lag *ldev,
int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
struct net_device *ndev);
+char *get_str_port_sel_mode(u8 flags);
+void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports,
+ u8 *ports, int *num_enabled);
+
+void mlx5_ldev_add_debugfs(struct mlx5_core_dev *dev);
+void mlx5_ldev_remove_debugfs(struct dentry *dbg);
+
#endif /* __MLX5_LAG_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
index 5be322528279..d3a3fe4ce670 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
@@ -12,7 +12,8 @@ enum {
static struct mlx5_flow_group *
mlx5_create_hash_flow_group(struct mlx5_flow_table *ft,
- struct mlx5_flow_definer *definer)
+ struct mlx5_flow_definer *definer,
+ u8 rules)
{
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
struct mlx5_flow_group *fg;
@@ -25,7 +26,7 @@ mlx5_create_hash_flow_group(struct mlx5_flow_table *ft,
MLX5_SET(create_flow_group_in, in, match_definer_id,
mlx5_get_match_definer_id(definer));
MLX5_SET(create_flow_group_in, in, start_flow_index, 0);
- MLX5_SET(create_flow_group_in, in, end_flow_index, MLX5_MAX_PORTS - 1);
+ MLX5_SET(create_flow_group_in, in, end_flow_index, rules - 1);
MLX5_SET(create_flow_group_in, in, group_type,
MLX5_CREATE_FLOW_GROUP_IN_GROUP_TYPE_HASH_SPLIT);
@@ -36,7 +37,7 @@ mlx5_create_hash_flow_group(struct mlx5_flow_table *ft,
static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev,
struct mlx5_lag_definer *lag_definer,
- u8 port1, u8 port2)
+ u8 *ports)
{
struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev;
struct mlx5_flow_table_attr ft_attr = {};
@@ -44,8 +45,10 @@ static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev,
MLX5_DECLARE_FLOW_ACT(flow_act);
struct mlx5_flow_namespace *ns;
int err, i;
+ int idx;
+ int j;
- ft_attr.max_fte = MLX5_MAX_PORTS;
+ ft_attr.max_fte = ldev->ports * ldev->buckets;
ft_attr.level = MLX5_LAG_FT_LEVEL_DEFINER;
ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_PORT_SEL);
@@ -61,7 +64,8 @@ static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev,
}
lag_definer->fg = mlx5_create_hash_flow_group(lag_definer->ft,
- lag_definer->definer);
+ lag_definer->definer,
+ ft_attr.max_fte);
if (IS_ERR(lag_definer->fg)) {
err = PTR_ERR(lag_definer->fg);
goto destroy_ft;
@@ -70,19 +74,25 @@ static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev,
dest.type = MLX5_FLOW_DESTINATION_TYPE_UPLINK;
dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
flow_act.flags |= FLOW_ACT_NO_APPEND;
- for (i = 0; i < MLX5_MAX_PORTS; i++) {
- u8 affinity = i == 0 ? port1 : port2;
-
- dest.vport.vhca_id = MLX5_CAP_GEN(ldev->pf[affinity - 1].dev,
- vhca_id);
- lag_definer->rules[i] = mlx5_add_flow_rules(lag_definer->ft,
- NULL, &flow_act,
- &dest, 1);
- if (IS_ERR(lag_definer->rules[i])) {
- err = PTR_ERR(lag_definer->rules[i]);
- while (i--)
- mlx5_del_flow_rules(lag_definer->rules[i]);
- goto destroy_fg;
+ for (i = 0; i < ldev->ports; i++) {
+ for (j = 0; j < ldev->buckets; j++) {
+ u8 affinity;
+
+ idx = i * ldev->buckets + j;
+ affinity = ports[idx];
+
+ dest.vport.vhca_id = MLX5_CAP_GEN(ldev->pf[affinity - 1].dev,
+ vhca_id);
+ lag_definer->rules[idx] = mlx5_add_flow_rules(lag_definer->ft,
+ NULL, &flow_act,
+ &dest, 1);
+ if (IS_ERR(lag_definer->rules[idx])) {
+ err = PTR_ERR(lag_definer->rules[idx]);
+ while (i--)
+ while (j--)
+ mlx5_del_flow_rules(lag_definer->rules[idx]);
+ goto destroy_fg;
+ }
}
}
@@ -279,8 +289,7 @@ static int mlx5_lag_set_definer(u32 *match_definer_mask,
static struct mlx5_lag_definer *
mlx5_lag_create_definer(struct mlx5_lag *ldev, enum netdev_lag_hash hash,
- enum mlx5_traffic_types tt, bool tunnel, u8 port1,
- u8 port2)
+ enum mlx5_traffic_types tt, bool tunnel, u8 *ports)
{
struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev;
struct mlx5_lag_definer *lag_definer;
@@ -308,7 +317,7 @@ mlx5_lag_create_definer(struct mlx5_lag *ldev, enum netdev_lag_hash hash,
goto free_mask;
}
- err = mlx5_lag_create_port_sel_table(ldev, lag_definer, port1, port2);
+ err = mlx5_lag_create_port_sel_table(ldev, lag_definer, ports);
if (err)
goto destroy_match_definer;
@@ -329,10 +338,16 @@ static void mlx5_lag_destroy_definer(struct mlx5_lag *ldev,
struct mlx5_lag_definer *lag_definer)
{
struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev;
+ int idx;
int i;
+ int j;
- for (i = 0; i < MLX5_MAX_PORTS; i++)
- mlx5_del_flow_rules(lag_definer->rules[i]);
+ for (i = 0; i < ldev->ports; i++) {
+ for (j = 0; j < ldev->buckets; j++) {
+ idx = i * ldev->buckets + j;
+ mlx5_del_flow_rules(lag_definer->rules[idx]);
+ }
+ }
mlx5_destroy_flow_group(lag_definer->fg);
mlx5_destroy_flow_table(lag_definer->ft);
mlx5_destroy_match_definer(dev, lag_definer->definer);
@@ -356,7 +371,7 @@ static void mlx5_lag_destroy_definers(struct mlx5_lag *ldev)
static int mlx5_lag_create_definers(struct mlx5_lag *ldev,
enum netdev_lag_hash hash_type,
- u8 port1, u8 port2)
+ u8 *ports)
{
struct mlx5_lag_port_sel *port_sel = &ldev->port_sel;
struct mlx5_lag_definer *lag_definer;
@@ -364,7 +379,7 @@ static int mlx5_lag_create_definers(struct mlx5_lag *ldev,
for_each_set_bit(tt, port_sel->tt_map, MLX5_NUM_TT) {
lag_definer = mlx5_lag_create_definer(ldev, hash_type, tt,
- false, port1, port2);
+ false, ports);
if (IS_ERR(lag_definer)) {
err = PTR_ERR(lag_definer);
goto destroy_definers;
@@ -376,7 +391,7 @@ static int mlx5_lag_create_definers(struct mlx5_lag *ldev,
lag_definer =
mlx5_lag_create_definer(ldev, hash_type, tt,
- true, port1, port2);
+ true, ports);
if (IS_ERR(lag_definer)) {
err = PTR_ERR(lag_definer);
goto destroy_definers;
@@ -513,13 +528,13 @@ static int mlx5_lag_create_inner_ttc_table(struct mlx5_lag *ldev)
}
int mlx5_lag_port_sel_create(struct mlx5_lag *ldev,
- enum netdev_lag_hash hash_type, u8 port1, u8 port2)
+ enum netdev_lag_hash hash_type, u8 *ports)
{
struct mlx5_lag_port_sel *port_sel = &ldev->port_sel;
int err;
set_tt_map(port_sel, hash_type);
- err = mlx5_lag_create_definers(ldev, hash_type, port1, port2);
+ err = mlx5_lag_create_definers(ldev, hash_type, ports);
if (err)
return err;
@@ -543,52 +558,62 @@ destroy_definers:
return err;
}
-static int
-mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev,
- struct mlx5_lag_definer **definers,
- u8 port1, u8 port2)
+static int __mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev,
+ struct mlx5_lag_definer *def,
+ u8 *ports)
{
- struct mlx5_lag_port_sel *port_sel = &ldev->port_sel;
struct mlx5_flow_destination dest = {};
+ int idx;
int err;
- int tt;
+ int i;
+ int j;
dest.type = MLX5_FLOW_DESTINATION_TYPE_UPLINK;
dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
- for_each_set_bit(tt, port_sel->tt_map, MLX5_NUM_TT) {
- struct mlx5_flow_handle **rules = definers[tt]->rules;
+ for (i = 0; i < ldev->ports; i++) {
+ for (j = 0; j < ldev->buckets; j++) {
+ idx = i * ldev->buckets + j;
+ if (ldev->v2p_map[i] == ports[i])
+ continue;
- if (ldev->v2p_map[MLX5_LAG_P1] != port1) {
- dest.vport.vhca_id =
- MLX5_CAP_GEN(ldev->pf[port1 - 1].dev, vhca_id);
- err = mlx5_modify_rule_destination(rules[MLX5_LAG_P1],
- &dest, NULL);
+ dest.vport.vhca_id = MLX5_CAP_GEN(ldev->pf[ports[idx] - 1].dev,
+ vhca_id);
+ err = mlx5_modify_rule_destination(def->rules[idx], &dest, NULL);
if (err)
return err;
}
+ }
- if (ldev->v2p_map[MLX5_LAG_P2] != port2) {
- dest.vport.vhca_id =
- MLX5_CAP_GEN(ldev->pf[port2 - 1].dev, vhca_id);
- err = mlx5_modify_rule_destination(rules[MLX5_LAG_P2],
- &dest, NULL);
- if (err)
- return err;
- }
+ return 0;
+}
+
+static int
+mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev,
+ struct mlx5_lag_definer **definers,
+ u8 *ports)
+{
+ struct mlx5_lag_port_sel *port_sel = &ldev->port_sel;
+ int err;
+ int tt;
+
+ for_each_set_bit(tt, port_sel->tt_map, MLX5_NUM_TT) {
+ err = __mlx5_lag_modify_definers_destinations(ldev, definers[tt], ports);
+ if (err)
+ return err;
}
return 0;
}
-int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 port1, u8 port2)
+int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 *ports)
{
struct mlx5_lag_port_sel *port_sel = &ldev->port_sel;
int err;
err = mlx5_lag_modify_definers_destinations(ldev,
port_sel->outer.definers,
- port1, port2);
+ ports);
if (err)
return err;
@@ -597,7 +622,7 @@ int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 port1, u8 port2)
return mlx5_lag_modify_definers_destinations(ldev,
port_sel->inner.definers,
- port1, port2);
+ ports);
}
void mlx5_lag_port_sel_destroy(struct mlx5_lag *ldev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h
index 6d15b28a42fc..5ec3af2a3ecd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h
@@ -10,7 +10,10 @@ struct mlx5_lag_definer {
struct mlx5_flow_definer *definer;
struct mlx5_flow_table *ft;
struct mlx5_flow_group *fg;
- struct mlx5_flow_handle *rules[MLX5_MAX_PORTS];
+ /* Each port has ldev->buckets number of rules and they are arrange in
+ * [port * buckets .. port * buckets + buckets) locations
+ */
+ struct mlx5_flow_handle *rules[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS];
};
struct mlx5_lag_ttc {
@@ -27,22 +30,20 @@ struct mlx5_lag_port_sel {
#ifdef CONFIG_MLX5_ESWITCH
-int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 port1, u8 port2);
+int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 *ports);
void mlx5_lag_port_sel_destroy(struct mlx5_lag *ldev);
int mlx5_lag_port_sel_create(struct mlx5_lag *ldev,
- enum netdev_lag_hash hash_type, u8 port1,
- u8 port2);
+ enum netdev_lag_hash hash_type, u8 *ports);
#else /* CONFIG_MLX5_ESWITCH */
static inline int mlx5_lag_port_sel_create(struct mlx5_lag *ldev,
enum netdev_lag_hash hash_type,
- u8 port1, u8 port2)
+ u8 *ports)
{
return 0;
}
-static inline int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 port1,
- u8 port2)
+static inline int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 *ports)
{
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c
index bced2efe9bef..adefde3ea941 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c
@@ -14,7 +14,7 @@ static LIST_HEAD(devcom_list);
struct mlx5_devcom_component {
struct {
void *data;
- } device[MLX5_MAX_PORTS];
+ } device[MLX5_DEVCOM_PORTS_SUPPORTED];
mlx5_devcom_event_handler_t handler;
struct rw_semaphore sem;
@@ -25,7 +25,7 @@ struct mlx5_devcom_list {
struct list_head list;
struct mlx5_devcom_component components[MLX5_DEVCOM_NUM_COMPONENTS];
- struct mlx5_core_dev *devs[MLX5_MAX_PORTS];
+ struct mlx5_core_dev *devs[MLX5_DEVCOM_PORTS_SUPPORTED];
};
struct mlx5_devcom {
@@ -74,13 +74,15 @@ struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev)
if (!mlx5_core_is_pf(dev))
return NULL;
+ if (MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_DEVCOM_PORTS_SUPPORTED)
+ return NULL;
sguid0 = mlx5_query_nic_system_image_guid(dev);
list_for_each_entry(iter, &devcom_list, list) {
struct mlx5_core_dev *tmp_dev = NULL;
idx = -1;
- for (i = 0; i < MLX5_MAX_PORTS; i++) {
+ for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++) {
if (iter->devs[i])
tmp_dev = iter->devs[i];
else
@@ -134,11 +136,11 @@ void mlx5_devcom_unregister_device(struct mlx5_devcom *devcom)
kfree(devcom);
- for (i = 0; i < MLX5_MAX_PORTS; i++)
+ for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++)
if (priv->devs[i])
break;
- if (i != MLX5_MAX_PORTS)
+ if (i != MLX5_DEVCOM_PORTS_SUPPORTED)
return;
list_del(&priv->list);
@@ -191,7 +193,7 @@ int mlx5_devcom_send_event(struct mlx5_devcom *devcom,
comp = &devcom->priv->components[id];
down_write(&comp->sem);
- for (i = 0; i < MLX5_MAX_PORTS; i++)
+ for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++)
if (i != devcom->idx && comp->device[i].data) {
err = comp->handler(event, comp->device[i].data,
event_data);
@@ -239,7 +241,7 @@ void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom,
return NULL;
}
- for (i = 0; i < MLX5_MAX_PORTS; i++)
+ for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++)
if (i != devcom->idx)
break;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h
index 939d5bf1581b..94313c18bb64 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h
@@ -6,6 +6,8 @@
#include <linux/mlx5/driver.h>
+#define MLX5_DEVCOM_PORTS_SUPPORTED 2
+
enum mlx5_devcom_components {
MLX5_DEVCOM_ESW_OFFLOADS,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c
index c1df0d3595d8..d758848d34d0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c
@@ -10,6 +10,7 @@ struct mlx5_timeouts {
static const u32 tout_def_sw_val[MAX_TIMEOUT_TYPES] = {
[MLX5_TO_FW_PRE_INIT_TIMEOUT_MS] = 120000,
+ [MLX5_TO_FW_PRE_INIT_ON_RECOVERY_TIMEOUT_MS] = 7200000,
[MLX5_TO_FW_PRE_INIT_WARN_MESSAGE_INTERVAL_MS] = 20000,
[MLX5_TO_FW_PRE_INIT_WAIT_MS] = 2,
[MLX5_TO_FW_INIT_MS] = 2000,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h
index 1c42ead782fa..257c03eeab36 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h
@@ -7,6 +7,7 @@
enum mlx5_timeouts_types {
/* pre init timeouts (not read from FW) */
MLX5_TO_FW_PRE_INIT_TIMEOUT_MS,
+ MLX5_TO_FW_PRE_INIT_ON_RECOVERY_TIMEOUT_MS,
MLX5_TO_FW_PRE_INIT_WARN_MESSAGE_INTERVAL_MS,
MLX5_TO_FW_PRE_INIT_WAIT_MS,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 35e48ef04845..84f75aa25214 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -189,7 +189,8 @@ static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili,
fw_initializing = ioread32be(&dev->iseg->initializing);
if (!(fw_initializing >> 31))
break;
- if (time_after(jiffies, end)) {
+ if (time_after(jiffies, end) ||
+ test_and_clear_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state)) {
err = -EBUSY;
break;
}
@@ -1002,7 +1003,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
mlx5_devcom_unregister_device(dev->priv.devcom);
}
-static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot)
+static int mlx5_function_setup(struct mlx5_core_dev *dev, u64 timeout)
{
int err;
@@ -1017,11 +1018,11 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot)
/* wait for firmware to accept initialization segments configurations
*/
- err = wait_fw_init(dev, mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT),
+ err = wait_fw_init(dev, timeout,
mlx5_tout_ms(dev, FW_PRE_INIT_WARN_MESSAGE_INTERVAL));
if (err) {
mlx5_core_err(dev, "Firmware over %llu MS in pre-initializing state, aborting\n",
- mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT));
+ timeout);
return err;
}
@@ -1271,7 +1272,7 @@ int mlx5_init_one(struct mlx5_core_dev *dev)
mutex_lock(&dev->intf_state_mutex);
dev->state = MLX5_DEVICE_STATE_UP;
- err = mlx5_function_setup(dev, true);
+ err = mlx5_function_setup(dev, mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT));
if (err)
goto err_function;
@@ -1335,9 +1336,10 @@ out:
mutex_unlock(&dev->intf_state_mutex);
}
-int mlx5_load_one(struct mlx5_core_dev *dev)
+int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery)
{
int err = 0;
+ u64 timeout;
mutex_lock(&dev->intf_state_mutex);
if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
@@ -1347,7 +1349,11 @@ int mlx5_load_one(struct mlx5_core_dev *dev)
/* remove any previous indication of internal error */
dev->state = MLX5_DEVICE_STATE_UP;
- err = mlx5_function_setup(dev, false);
+ if (recovery)
+ timeout = mlx5_tout_ms(dev, FW_PRE_INIT_ON_RECOVERY_TIMEOUT);
+ else
+ timeout = mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT);
+ err = mlx5_function_setup(dev, timeout);
if (err)
goto err_function;
@@ -1602,6 +1608,7 @@ static void remove_one(struct pci_dev *pdev)
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
struct devlink *devlink = priv_to_devlink(dev);
+ set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state);
devlink_unregister(devlink);
mlx5_sriov_disable(pdev);
mlx5_crdump_disable(dev);
@@ -1717,7 +1724,7 @@ static void mlx5_pci_resume(struct pci_dev *pdev)
mlx5_pci_trace(dev, "Enter, loading driver..\n");
- err = mlx5_load_one(dev);
+ err = mlx5_load_one(dev, false);
mlx5_pci_trace(dev, "Done, err = %d, device %s\n", err,
!err ? "recovered" : "Failed");
@@ -1785,6 +1792,7 @@ static void shutdown(struct pci_dev *pdev)
int err;
mlx5_core_info(dev, "Shutdown was called\n");
+ set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state);
err = mlx5_try_fast_unload(dev);
if (err)
mlx5_unload_one(dev);
@@ -1804,7 +1812,7 @@ static int mlx5_resume(struct pci_dev *pdev)
{
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
- return mlx5_load_one(dev);
+ return mlx5_load_one(dev, false);
}
static const struct pci_device_id mlx5_core_pci_table[] = {
@@ -1849,7 +1857,7 @@ int mlx5_recover_device(struct mlx5_core_dev *dev)
return -EIO;
}
- return mlx5_load_one(dev);
+ return mlx5_load_one(dev, true);
}
static struct pci_driver mlx5_core_driver = {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index a9b2d6ead542..484cb1e4fc7f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -210,6 +210,7 @@ void mlx5_detach_device(struct mlx5_core_dev *dev);
int mlx5_register_device(struct mlx5_core_dev *dev);
void mlx5_unregister_device(struct mlx5_core_dev *dev);
struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev);
+struct mlx5_core_dev *mlx5_get_next_phys_dev_lag(struct mlx5_core_dev *dev);
void mlx5_dev_list_lock(void);
void mlx5_dev_list_unlock(void);
int mlx5_dev_list_trylock(void);
@@ -290,7 +291,7 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev);
int mlx5_init_one(struct mlx5_core_dev *dev);
void mlx5_uninit_one(struct mlx5_core_dev *dev);
void mlx5_unload_one(struct mlx5_core_dev *dev);
-int mlx5_load_one(struct mlx5_core_dev *dev);
+int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery);
int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index ff47d49d8be4..d6bac3976913 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -84,7 +84,7 @@ enum mlx5_sqp_t {
};
enum {
- MLX5_MAX_PORTS = 2,
+ MLX5_MAX_PORTS = 4,
};
enum {
@@ -558,6 +558,7 @@ struct mlx5_debugfs_entries {
struct dentry *cq_debugfs;
struct dentry *cmdif_debugfs;
struct dentry *pages_debugfs;
+ struct dentry *lag_debugfs;
};
struct mlx5_ft_pool;
@@ -632,6 +633,7 @@ enum mlx5_device_state {
enum mlx5_interface_state {
MLX5_INTERFACE_STATE_UP = BIT(0),
+ MLX5_BREAK_FW_WAIT = BIT(1),
};
enum mlx5_pci_status {
@@ -1141,6 +1143,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
int num_counters,
size_t *offsets);
struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev);
+u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev);
struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev);
void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up);
int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type,