summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
diff options
context:
space:
mode:
authorMark Bloch <mbloch@nvidia.com>2022-03-02 15:38:50 +0000
committerSaeed Mahameed <saeedm@nvidia.com>2022-05-09 22:54:03 -0700
commit352899f384d4aefa77ede6310d08c1b515612a8f (patch)
tree486d6c352319ac5ad040482ca098b1d33adba275 /drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
parent24b3599effe2b1eda7bc7e8b2b5e8fe459256222 (diff)
net/mlx5: Lag, use buckets in hash mode
When in hardware lag and the NIC has more than 2 ports when one port goes down need to distribute the traffic between the remaining active ports. For better spread in such cases instead of using 1-to-1 mapping and only 4 slots in the hash, use many. Each port will have many slots that point to it. When a port goes down go over all the slots that pointed to that port and spread them between the remaining active ports. Once the port comes back restore the default mapping. We will have number_of_ports * MLX5_LAG_MAX_HASH_BUCKETS slots. Each MLX5_LAG_MAX_HASH_BUCKETS belong to a different port. The native mapping is such that: port 1: The first MLX5_LAG_MAX_HASH_BUCKETS slots are: [1, 1, .., 1] which means if a packet is hased into one of this slots it will hit the wire via port 1. port 2: The second MLX5_LAG_MAX_HASH_BUCKETS slots are: [2, 2, .., 2] which means if a packet is hased into one of this slots it will hit the wire via port2. and this mapping is the same of the rest of the ports. On a failover, lets say port 2 goes down (port 1, 3, 4 are still up). the new mapping for port 2 will be: port 2: The second MLX5_LAG_MAX_HASH_BUCKETS are: [1, 3, 1, 4, .., 4] which means the mapping was changed from the native mapping to a mapping that consists of only the active ports. With this if a port goes down the traffic will be split between the active ports randomly Signed-off-by: Mark Bloch <mbloch@nvidia.com> Reviewed-by: Maor Gottlieb <maorg@nvidia.com> Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c95
1 files changed, 62 insertions, 33 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
index 478b4ef723f8..d3a3fe4ce670 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
@@ -13,7 +13,7 @@ enum {
static struct mlx5_flow_group *
mlx5_create_hash_flow_group(struct mlx5_flow_table *ft,
struct mlx5_flow_definer *definer,
- u8 ports)
+ u8 rules)
{
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
struct mlx5_flow_group *fg;
@@ -26,7 +26,7 @@ mlx5_create_hash_flow_group(struct mlx5_flow_table *ft,
MLX5_SET(create_flow_group_in, in, match_definer_id,
mlx5_get_match_definer_id(definer));
MLX5_SET(create_flow_group_in, in, start_flow_index, 0);
- MLX5_SET(create_flow_group_in, in, end_flow_index, ports - 1);
+ MLX5_SET(create_flow_group_in, in, end_flow_index, rules - 1);
MLX5_SET(create_flow_group_in, in, group_type,
MLX5_CREATE_FLOW_GROUP_IN_GROUP_TYPE_HASH_SPLIT);
@@ -45,8 +45,10 @@ static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev,
MLX5_DECLARE_FLOW_ACT(flow_act);
struct mlx5_flow_namespace *ns;
int err, i;
+ int idx;
+ int j;
- ft_attr.max_fte = ldev->ports;
+ ft_attr.max_fte = ldev->ports * ldev->buckets;
ft_attr.level = MLX5_LAG_FT_LEVEL_DEFINER;
ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_PORT_SEL);
@@ -63,7 +65,7 @@ static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev,
lag_definer->fg = mlx5_create_hash_flow_group(lag_definer->ft,
lag_definer->definer,
- ldev->ports);
+ ft_attr.max_fte);
if (IS_ERR(lag_definer->fg)) {
err = PTR_ERR(lag_definer->fg);
goto destroy_ft;
@@ -73,18 +75,24 @@ static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev,
dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
flow_act.flags |= FLOW_ACT_NO_APPEND;
for (i = 0; i < ldev->ports; i++) {
- u8 affinity = ports[i];
-
- dest.vport.vhca_id = MLX5_CAP_GEN(ldev->pf[affinity - 1].dev,
- vhca_id);
- lag_definer->rules[i] = mlx5_add_flow_rules(lag_definer->ft,
- NULL, &flow_act,
- &dest, 1);
- if (IS_ERR(lag_definer->rules[i])) {
- err = PTR_ERR(lag_definer->rules[i]);
- while (i--)
- mlx5_del_flow_rules(lag_definer->rules[i]);
- goto destroy_fg;
+ for (j = 0; j < ldev->buckets; j++) {
+ u8 affinity;
+
+ idx = i * ldev->buckets + j;
+ affinity = ports[idx];
+
+ dest.vport.vhca_id = MLX5_CAP_GEN(ldev->pf[affinity - 1].dev,
+ vhca_id);
+ lag_definer->rules[idx] = mlx5_add_flow_rules(lag_definer->ft,
+ NULL, &flow_act,
+ &dest, 1);
+ if (IS_ERR(lag_definer->rules[idx])) {
+ err = PTR_ERR(lag_definer->rules[idx]);
+ while (i--)
+ while (j--)
+ mlx5_del_flow_rules(lag_definer->rules[idx]);
+ goto destroy_fg;
+ }
}
}
@@ -330,10 +338,16 @@ static void mlx5_lag_destroy_definer(struct mlx5_lag *ldev,
struct mlx5_lag_definer *lag_definer)
{
struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev;
+ int idx;
int i;
+ int j;
- for (i = 0; i < ldev->ports; i++)
- mlx5_del_flow_rules(lag_definer->rules[i]);
+ for (i = 0; i < ldev->ports; i++) {
+ for (j = 0; j < ldev->buckets; j++) {
+ idx = i * ldev->buckets + j;
+ mlx5_del_flow_rules(lag_definer->rules[idx]);
+ }
+ }
mlx5_destroy_flow_group(lag_definer->fg);
mlx5_destroy_flow_table(lag_definer->ft);
mlx5_destroy_match_definer(dev, lag_definer->definer);
@@ -544,31 +558,28 @@ destroy_definers:
return err;
}
-static int
-mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev,
- struct mlx5_lag_definer **definers,
- u8 *ports)
+static int __mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev,
+ struct mlx5_lag_definer *def,
+ u8 *ports)
{
- struct mlx5_lag_port_sel *port_sel = &ldev->port_sel;
struct mlx5_flow_destination dest = {};
+ int idx;
int err;
- int tt;
int i;
+ int j;
dest.type = MLX5_FLOW_DESTINATION_TYPE_UPLINK;
dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
- for_each_set_bit(tt, port_sel->tt_map, MLX5_NUM_TT) {
- struct mlx5_flow_handle **rules = definers[tt]->rules;
-
- for (i = 0; i < ldev->ports; i++) {
+ for (i = 0; i < ldev->ports; i++) {
+ for (j = 0; j < ldev->buckets; j++) {
+ idx = i * ldev->buckets + j;
if (ldev->v2p_map[i] == ports[i])
continue;
- dest.vport.vhca_id =
- MLX5_CAP_GEN(ldev->pf[ports[i] - 1].dev,
- vhca_id);
- err = mlx5_modify_rule_destination(rules[i],
- &dest, NULL);
+
+ dest.vport.vhca_id = MLX5_CAP_GEN(ldev->pf[ports[idx] - 1].dev,
+ vhca_id);
+ err = mlx5_modify_rule_destination(def->rules[idx], &dest, NULL);
if (err)
return err;
}
@@ -577,6 +588,24 @@ mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev,
return 0;
}
+static int
+mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev,
+ struct mlx5_lag_definer **definers,
+ u8 *ports)
+{
+ struct mlx5_lag_port_sel *port_sel = &ldev->port_sel;
+ int err;
+ int tt;
+
+ for_each_set_bit(tt, port_sel->tt_map, MLX5_NUM_TT) {
+ err = __mlx5_lag_modify_definers_destinations(ldev, definers[tt], ports);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 *ports)
{
struct mlx5_lag_port_sel *port_sel = &ldev->port_sel;