summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c313
1 files changed, 304 insertions, 9 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
index ec706e9352e1..1066992c1503 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
@@ -64,11 +64,13 @@ static void esw_qos_domain_release(struct mlx5_eswitch *esw)
enum sched_node_type {
SCHED_NODE_TYPE_VPORTS_TSAR,
SCHED_NODE_TYPE_VPORT,
+ SCHED_NODE_TYPE_TC_ARBITER_TSAR,
};
static const char * const sched_node_type_str[] = {
[SCHED_NODE_TYPE_VPORTS_TSAR] = "vports TSAR",
[SCHED_NODE_TYPE_VPORT] = "vport",
+ [SCHED_NODE_TYPE_TC_ARBITER_TSAR] = "TC Arbiter TSAR",
};
struct mlx5_esw_sched_node {
@@ -106,6 +108,13 @@ static void esw_qos_node_attach_to_parent(struct mlx5_esw_sched_node *node)
}
}
+static int esw_qos_num_tcs(struct mlx5_core_dev *dev)
+{
+ int num_tcs = mlx5_max_tc(dev) + 1;
+
+ return num_tcs < DEVLINK_RATE_TCS_MAX ? num_tcs : DEVLINK_RATE_TCS_MAX;
+}
+
static void
esw_qos_node_set_parent(struct mlx5_esw_sched_node *node, struct mlx5_esw_sched_node *parent)
{
@@ -116,6 +125,27 @@ esw_qos_node_set_parent(struct mlx5_esw_sched_node *node, struct mlx5_esw_sched_
esw_qos_node_attach_to_parent(node);
}
+static void esw_qos_nodes_set_parent(struct list_head *nodes,
+ struct mlx5_esw_sched_node *parent)
+{
+ struct mlx5_esw_sched_node *node, *tmp;
+
+ list_for_each_entry_safe(node, tmp, nodes, entry) {
+ esw_qos_node_set_parent(node, parent);
+ if (!list_empty(&node->children) &&
+ parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) {
+ struct mlx5_esw_sched_node *child;
+
+ list_for_each_entry(child, &node->children, entry) {
+ struct mlx5_vport *vport = child->vport;
+
+ if (vport)
+ vport->qos.sched_node->parent = parent;
+ }
+ }
+ }
+}
+
void mlx5_esw_qos_vport_qos_free(struct mlx5_vport *vport)
{
kfree(vport->qos.sched_node);
@@ -141,16 +171,24 @@ mlx5_esw_qos_vport_get_parent(const struct mlx5_vport *vport)
static void esw_qos_sched_elem_warn(struct mlx5_esw_sched_node *node, int err, const char *op)
{
- if (node->vport) {
+ switch (node->type) {
+ case SCHED_NODE_TYPE_VPORT:
esw_warn(node->esw->dev,
"E-Switch %s %s scheduling element failed (vport=%d,err=%d)\n",
op, sched_node_type_str[node->type], node->vport->vport, err);
- return;
+ break;
+ case SCHED_NODE_TYPE_TC_ARBITER_TSAR:
+ case SCHED_NODE_TYPE_VPORTS_TSAR:
+ esw_warn(node->esw->dev,
+ "E-Switch %s %s scheduling element failed (err=%d)\n",
+ op, sched_node_type_str[node->type], err);
+ break;
+ default:
+ esw_warn(node->esw->dev,
+ "E-Switch %s scheduling element failed (err=%d)\n",
+ op, err);
+ break;
}
-
- esw_warn(node->esw->dev,
- "E-Switch %s %s scheduling element failed (err=%d)\n",
- op, sched_node_type_str[node->type], err);
}
static int esw_qos_node_create_sched_element(struct mlx5_esw_sched_node *node, void *ctx,
@@ -388,6 +426,14 @@ __esw_qos_alloc_node(struct mlx5_eswitch *esw, u32 tsar_ix, enum sched_node_type
node->parent = parent;
INIT_LIST_HEAD(&node->children);
esw_qos_node_attach_to_parent(node);
+ if (!parent) {
+ /* The caller is responsible for inserting the node into the
+ * parent list if necessary. This function can also be used with
+ * a NULL parent, which doesn't necessarily indicate that it
+ * refers to the root scheduling element.
+ */
+ list_del_init(&node->entry);
+ }
return node;
}
@@ -426,6 +472,7 @@ __esw_qos_create_vports_sched_node(struct mlx5_eswitch *esw, struct mlx5_esw_sch
goto err_alloc_node;
}
+ list_add_tail(&node->entry, &esw->qos.domain->nodes);
esw_qos_normalize_min_rate(esw, NULL, extack);
trace_mlx5_esw_node_qos_create(esw->dev, node, node->ix);
@@ -498,6 +545,9 @@ static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *exta
SCHED_NODE_TYPE_VPORTS_TSAR,
NULL))
esw->qos.node0 = ERR_PTR(-ENOMEM);
+ else
+ list_add_tail(&esw->qos.node0->entry,
+ &esw->qos.domain->nodes);
}
if (IS_ERR(esw->qos.node0)) {
err = PTR_ERR(esw->qos.node0);
@@ -555,6 +605,18 @@ static void esw_qos_put(struct mlx5_eswitch *esw)
esw_qos_destroy(esw);
}
+static void
+esw_qos_tc_arbiter_scheduling_teardown(struct mlx5_esw_sched_node *node,
+ struct netlink_ext_ack *extack)
+{}
+
+static int esw_qos_tc_arbiter_scheduling_setup(struct mlx5_esw_sched_node *node,
+ struct netlink_ext_ack *extack)
+{
+ NL_SET_ERR_MSG_MOD(extack, "TC arbiter elements are not supported.");
+ return -EOPNOTSUPP;
+}
+
static void esw_qos_vport_disable(struct mlx5_vport *vport, struct netlink_ext_ack *extack)
{
struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
@@ -723,6 +785,195 @@ static int esw_qos_vport_update_parent(struct mlx5_vport *vport, struct mlx5_esw
return err;
}
+static void
+esw_qos_switch_vport_tcs_to_vport(struct mlx5_esw_sched_node *tc_arbiter_node,
+ struct mlx5_esw_sched_node *node,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_esw_sched_node *vports_tc_node, *vport_tc_node, *tmp;
+
+ vports_tc_node = list_first_entry(&tc_arbiter_node->children,
+ struct mlx5_esw_sched_node,
+ entry);
+
+ list_for_each_entry_safe(vport_tc_node, tmp, &vports_tc_node->children,
+ entry)
+ esw_qos_vport_update_parent(vport_tc_node->vport, node, extack);
+}
+
+static int esw_qos_switch_tc_arbiter_node_to_vports(
+ struct mlx5_esw_sched_node *tc_arbiter_node,
+ struct mlx5_esw_sched_node *node,
+ struct netlink_ext_ack *extack)
+{
+ u32 parent_tsar_ix = node->parent ?
+ node->parent->ix : node->esw->qos.root_tsar_ix;
+ int err;
+
+ err = esw_qos_create_node_sched_elem(node->esw->dev, parent_tsar_ix,
+ node->max_rate, node->bw_share,
+ &node->ix);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Failed to create scheduling element for vports node when disabliing vports TC QoS");
+ return err;
+ }
+
+ node->type = SCHED_NODE_TYPE_VPORTS_TSAR;
+
+ /* Disable TC QoS for vports in the arbiter node. */
+ esw_qos_switch_vport_tcs_to_vport(tc_arbiter_node, node, extack);
+
+ return 0;
+}
+
+static int esw_qos_switch_vports_node_to_tc_arbiter(
+ struct mlx5_esw_sched_node *node,
+ struct mlx5_esw_sched_node *tc_arbiter_node,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_esw_sched_node *vport_node, *tmp;
+ struct mlx5_vport *vport;
+ int err;
+
+ /* Enable TC QoS for each vport in the node. */
+ list_for_each_entry_safe(vport_node, tmp, &node->children, entry) {
+ vport = vport_node->vport;
+ err = esw_qos_vport_update_parent(vport, tc_arbiter_node,
+ extack);
+ if (err)
+ goto err_out;
+ }
+
+ /* Destroy the current vports node TSAR. */
+ err = mlx5_destroy_scheduling_element_cmd(node->esw->dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ node->ix);
+ if (err)
+ goto err_out;
+
+ return 0;
+err_out:
+ /* Restore vports back into the node if an error occurs. */
+ esw_qos_switch_vport_tcs_to_vport(tc_arbiter_node, node, NULL);
+
+ return err;
+}
+
+static struct mlx5_esw_sched_node *
+esw_qos_move_node(struct mlx5_esw_sched_node *curr_node)
+{
+ struct mlx5_esw_sched_node *new_node;
+
+ new_node = __esw_qos_alloc_node(curr_node->esw, curr_node->ix,
+ curr_node->type, NULL);
+ if (!IS_ERR(new_node))
+ esw_qos_nodes_set_parent(&curr_node->children, new_node);
+
+ return new_node;
+}
+
+static int esw_qos_node_disable_tc_arbitration(struct mlx5_esw_sched_node *node,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_esw_sched_node *curr_node;
+ int err;
+
+ if (node->type != SCHED_NODE_TYPE_TC_ARBITER_TSAR)
+ return 0;
+
+ /* Allocate a new rate node to hold the current state, which will allow
+ * for restoring the vports back to this node after disabling TC
+ * arbitration.
+ */
+ curr_node = esw_qos_move_node(node);
+ if (IS_ERR(curr_node)) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed setting up vports node");
+ return PTR_ERR(curr_node);
+ }
+
+ /* Disable TC QoS for all vports, and assign them back to the node. */
+ err = esw_qos_switch_tc_arbiter_node_to_vports(curr_node, node, extack);
+ if (err)
+ goto err_out;
+
+ /* Clean up the TC arbiter node after disabling TC QoS for vports. */
+ esw_qos_tc_arbiter_scheduling_teardown(curr_node, extack);
+ goto out;
+err_out:
+ esw_qos_nodes_set_parent(&curr_node->children, node);
+out:
+ __esw_qos_free_node(curr_node);
+ return err;
+}
+
+static int esw_qos_node_enable_tc_arbitration(struct mlx5_esw_sched_node *node,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_esw_sched_node *curr_node, *child;
+ int err, new_level, max_level;
+
+ if (node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR)
+ return 0;
+
+ /* Increase the hierarchy level by one to account for the additional
+ * vports TC scheduling node, and verify that the new level does not
+ * exceed the maximum allowed depth.
+ */
+ new_level = node->level + 1;
+ max_level = 1 << MLX5_CAP_QOS(node->esw->dev, log_esw_max_sched_depth);
+ if (new_level > max_level) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "TC arbitration on nodes is not supported beyond max scheduling depth");
+ return -EOPNOTSUPP;
+ }
+
+ /* Ensure the node does not contain non-leaf children before assigning
+ * TC bandwidth.
+ */
+ if (!list_empty(&node->children)) {
+ list_for_each_entry(child, &node->children, entry) {
+ if (!child->vport) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Cannot configure TC bandwidth on a node with non-leaf children");
+ return -EOPNOTSUPP;
+ }
+ }
+ }
+
+ /* Allocate a new node that will store the information of the current
+ * node. This will be used later to restore the node if necessary.
+ */
+ curr_node = esw_qos_move_node(node);
+ if (IS_ERR(curr_node)) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed setting up node TC QoS");
+ return PTR_ERR(curr_node);
+ }
+
+ /* Initialize the TC arbiter node for QoS management.
+ * This step prepares the node for handling Traffic Class arbitration.
+ */
+ err = esw_qos_tc_arbiter_scheduling_setup(node, extack);
+ if (err)
+ goto err_setup;
+
+ /* Enable TC QoS for each vport within the current node. */
+ err = esw_qos_switch_vports_node_to_tc_arbiter(curr_node, node, extack);
+ if (err)
+ goto err_switch_vports;
+ goto out;
+
+err_switch_vports:
+ esw_qos_tc_arbiter_scheduling_teardown(node, NULL);
+ node->ix = curr_node->ix;
+ node->type = curr_node->type;
+err_setup:
+ esw_qos_nodes_set_parent(&curr_node->children, node);
+out:
+ __esw_qos_free_node(curr_node);
+ return err;
+}
+
static u32 mlx5_esw_qos_lag_link_speed_get_locked(struct mlx5_core_dev *mdev)
{
struct ethtool_link_ksettings lksettings;
@@ -848,6 +1099,31 @@ static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *
return 0;
}
+static bool esw_qos_validate_unsupported_tc_bw(struct mlx5_eswitch *esw,
+ u32 *tc_bw)
+{
+ int i, num_tcs = esw_qos_num_tcs(esw->dev);
+
+ for (i = num_tcs; i < DEVLINK_RATE_TCS_MAX; i++) {
+ if (tc_bw[i])
+ return false;
+ }
+
+ return true;
+}
+
+static bool esw_qos_tc_bw_disabled(u32 *tc_bw)
+{
+ int i;
+
+ for (i = 0; i < DEVLINK_RATE_TCS_MAX; i++) {
+ if (tc_bw[i])
+ return false;
+ }
+
+ return true;
+}
+
int mlx5_esw_qos_init(struct mlx5_eswitch *esw)
{
if (esw->qos.domain)
@@ -921,9 +1197,28 @@ int mlx5_esw_devlink_rate_node_tc_bw_set(struct devlink_rate *rate_node,
u32 *tc_bw,
struct netlink_ext_ack *extack)
{
- NL_SET_ERR_MSG_MOD(extack,
- "TC bandwidth shares are not supported on nodes");
- return -EOPNOTSUPP;
+ struct mlx5_esw_sched_node *node = priv;
+ struct mlx5_eswitch *esw = node->esw;
+ bool disable;
+ int err;
+
+ if (!esw_qos_validate_unsupported_tc_bw(esw, tc_bw)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "E-Switch traffic classes number is not supported");
+ return -EOPNOTSUPP;
+ }
+
+ disable = esw_qos_tc_bw_disabled(tc_bw);
+ esw_qos_lock(esw);
+ if (disable) {
+ err = esw_qos_node_disable_tc_arbitration(node, extack);
+ goto unlock;
+ }
+
+ err = esw_qos_node_enable_tc_arbitration(node, extack);
+unlock:
+ esw_qos_unlock(esw);
+ return err;
}
int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv,