diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/en')
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c | 39 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h | 15 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/health.c | 109 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/health.h | 11 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c | 218 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h | 27 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/port.c | 253 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/port.h | 8 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c | 270 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c | 183 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c | 1369 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h | 180 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c | 115 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h | 3 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h | 10 |
15 files changed, 2618 insertions, 192 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c new file mode 100644 index 000000000000..f8b2de4b04be --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#include "en/devlink.h" + +int mlx5e_devlink_port_register(struct mlx5e_priv *priv) +{ + struct devlink *devlink = priv_to_devlink(priv->mdev); + + if (mlx5_core_is_pf(priv->mdev)) + devlink_port_attrs_set(&priv->dl_port, + DEVLINK_PORT_FLAVOUR_PHYSICAL, + PCI_FUNC(priv->mdev->pdev->devfn), + false, 0, + NULL, 0); + else + devlink_port_attrs_set(&priv->dl_port, + DEVLINK_PORT_FLAVOUR_VIRTUAL, + 0, false, 0, NULL, 0); + + return devlink_port_register(devlink, &priv->dl_port, 1); +} + +void mlx5e_devlink_port_type_eth_set(struct mlx5e_priv *priv) +{ + devlink_port_type_eth_set(&priv->dl_port, priv->netdev); +} + +void mlx5e_devlink_port_unregister(struct mlx5e_priv *priv) +{ + devlink_port_unregister(&priv->dl_port); +} + +struct devlink_port *mlx5e_get_devlink_port(struct net_device *dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return &priv->dl_port; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h new file mode 100644 index 000000000000..83123a801adc --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __MLX5E_EN_DEVLINK_H +#define __MLX5E_EN_DEVLINK_H + +#include <net/devlink.h> +#include "en.h" + +int mlx5e_devlink_port_register(struct mlx5e_priv *priv); +void mlx5e_devlink_port_unregister(struct mlx5e_priv *priv); +void mlx5e_devlink_port_type_eth_set(struct mlx5e_priv *priv); +struct devlink_port *mlx5e_get_devlink_port(struct net_device *dev); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c index 3a975641f902..3a199a03d929 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c @@ -3,6 +3,7 @@ #include "health.h" #include "lib/eq.h" +#include "lib/mlx5.h" int mlx5e_reporter_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name) { @@ -197,10 +198,114 @@ int mlx5e_health_report(struct mlx5e_priv *priv, struct devlink_health_reporter *reporter, char *err_str, struct mlx5e_err_ctx *err_ctx) { - netdev_err(priv->netdev, err_str); + netdev_err(priv->netdev, "%s\n", err_str); if (!reporter) - return err_ctx->recover(&err_ctx->ctx); + return err_ctx->recover(err_ctx->ctx); return devlink_health_report(reporter, err_str, err_ctx); } + +#define MLX5_HEALTH_DEVLINK_MAX_SIZE 1024 +static int mlx5e_health_rsc_fmsg_binary(struct devlink_fmsg *fmsg, + const void *value, u32 value_len) + +{ + u32 data_size; + u32 offset; + int err; + + for (offset = 0; offset < value_len; offset += data_size) { + data_size = value_len - offset; + if (data_size > MLX5_HEALTH_DEVLINK_MAX_SIZE) + data_size = MLX5_HEALTH_DEVLINK_MAX_SIZE; + err = devlink_fmsg_binary_put(fmsg, value + offset, data_size); + if (err) + break; + } + return err; +} + +int mlx5e_health_rsc_fmsg_dump(struct mlx5e_priv *priv, struct mlx5_rsc_key *key, + struct devlink_fmsg *fmsg) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_rsc_dump_cmd *cmd; + struct page *page; + int cmd_err, err; + int end_err; + int size; + + if (IS_ERR_OR_NULL(mdev->rsc_dump)) + return -EOPNOTSUPP; + + page = alloc_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + err = devlink_fmsg_binary_pair_nest_start(fmsg, "data"); + if (err) + return err; + + cmd = mlx5_rsc_dump_cmd_create(mdev, key); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto free_page; + } + + do { + cmd_err = mlx5_rsc_dump_next(mdev, cmd, page, &size); + if (cmd_err < 0) { + err = cmd_err; + goto destroy_cmd; + } + + err = mlx5e_health_rsc_fmsg_binary(fmsg, page_address(page), size); + if (err) + goto destroy_cmd; + + } while (cmd_err > 0); + +destroy_cmd: + mlx5_rsc_dump_cmd_destroy(cmd); + end_err = devlink_fmsg_binary_pair_nest_end(fmsg); + if (end_err) + err = end_err; +free_page: + __free_page(page); + return err; +} + +int mlx5e_health_queue_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, + int queue_idx, char *lbl) +{ + struct mlx5_rsc_key key = {}; + int err; + + key.rsc = MLX5_SGMT_TYPE_FULL_QPC; + key.index1 = queue_idx; + key.size = PAGE_SIZE; + key.num_of_obj1 = 1; + + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, lbl); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "index", queue_idx); + if (err) + return err; + + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + return err; + + return devlink_fmsg_obj_nest_end(fmsg); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h index d3693fa547ac..38f97f79ef16 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h @@ -5,13 +5,13 @@ #define __MLX5E_EN_HEALTH_H #include "en.h" +#include "diag/rsc_dump.h" #define MLX5E_RX_ERR_CQE(cqe) (get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND) static inline bool cqe_syndrome_needs_recover(u8 syndrome) { - return syndrome == MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR || - syndrome == MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR || + return syndrome == MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR || syndrome == MLX5_CQE_SYNDROME_LOCAL_PROT_ERR || syndrome == MLX5_CQE_SYNDROME_WR_FLUSH_ERR; } @@ -36,6 +36,7 @@ void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq); struct mlx5e_err_ctx { int (*recover)(void *ctx); + int (*dump)(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, void *ctx); void *ctx; }; @@ -48,6 +49,8 @@ int mlx5e_health_report(struct mlx5e_priv *priv, int mlx5e_health_create_reporters(struct mlx5e_priv *priv); void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv); void mlx5e_health_channels_update(struct mlx5e_priv *priv); - - +int mlx5e_health_rsc_fmsg_dump(struct mlx5e_priv *priv, struct mlx5_rsc_key *key, + struct devlink_fmsg *fmsg); +int mlx5e_health_queue_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, + int queue_idx, char *lbl); #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c new file mode 100644 index 000000000000..ea321e528749 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c @@ -0,0 +1,218 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2018 Mellanox Technologies */ + +#include <linux/jhash.h> +#include <linux/slab.h> +#include <linux/xarray.h> +#include <linux/hashtable.h> + +#include "mapping.h" + +#define MAPPING_GRACE_PERIOD 2000 + +struct mapping_ctx { + struct xarray xarray; + DECLARE_HASHTABLE(ht, 8); + struct mutex lock; /* Guards hashtable and xarray */ + unsigned long max_id; + size_t data_size; + bool delayed_removal; + struct delayed_work dwork; + struct list_head pending_list; + spinlock_t pending_list_lock; /* Guards pending list */ +}; + +struct mapping_item { + struct rcu_head rcu; + struct list_head list; + unsigned long timeout; + struct hlist_node node; + int cnt; + u32 id; + char data[]; +}; + +int mapping_add(struct mapping_ctx *ctx, void *data, u32 *id) +{ + struct mapping_item *mi; + int err = -ENOMEM; + u32 hash_key; + + mutex_lock(&ctx->lock); + + hash_key = jhash(data, ctx->data_size, 0); + hash_for_each_possible(ctx->ht, mi, node, hash_key) { + if (!memcmp(data, mi->data, ctx->data_size)) + goto attach; + } + + mi = kzalloc(sizeof(*mi) + ctx->data_size, GFP_KERNEL); + if (!mi) + goto err_alloc; + + memcpy(mi->data, data, ctx->data_size); + hash_add(ctx->ht, &mi->node, hash_key); + + err = xa_alloc(&ctx->xarray, &mi->id, mi, XA_LIMIT(1, ctx->max_id), + GFP_KERNEL); + if (err) + goto err_assign; +attach: + ++mi->cnt; + *id = mi->id; + + mutex_unlock(&ctx->lock); + + return 0; + +err_assign: + hash_del(&mi->node); + kfree(mi); +err_alloc: + mutex_unlock(&ctx->lock); + + return err; +} + +static void mapping_remove_and_free(struct mapping_ctx *ctx, + struct mapping_item *mi) +{ + xa_erase(&ctx->xarray, mi->id); + kfree_rcu(mi, rcu); +} + +static void mapping_free_item(struct mapping_ctx *ctx, + struct mapping_item *mi) +{ + if (!ctx->delayed_removal) { + mapping_remove_and_free(ctx, mi); + return; + } + + mi->timeout = jiffies + msecs_to_jiffies(MAPPING_GRACE_PERIOD); + + spin_lock(&ctx->pending_list_lock); + list_add_tail(&mi->list, &ctx->pending_list); + spin_unlock(&ctx->pending_list_lock); + + schedule_delayed_work(&ctx->dwork, MAPPING_GRACE_PERIOD); +} + +int mapping_remove(struct mapping_ctx *ctx, u32 id) +{ + unsigned long index = id; + struct mapping_item *mi; + int err = -ENOENT; + + mutex_lock(&ctx->lock); + mi = xa_load(&ctx->xarray, index); + if (!mi) + goto out; + err = 0; + + if (--mi->cnt > 0) + goto out; + + hash_del(&mi->node); + mapping_free_item(ctx, mi); +out: + mutex_unlock(&ctx->lock); + + return err; +} + +int mapping_find(struct mapping_ctx *ctx, u32 id, void *data) +{ + unsigned long index = id; + struct mapping_item *mi; + int err = -ENOENT; + + rcu_read_lock(); + mi = xa_load(&ctx->xarray, index); + if (!mi) + goto err_find; + + memcpy(data, mi->data, ctx->data_size); + err = 0; + +err_find: + rcu_read_unlock(); + return err; +} + +static void +mapping_remove_and_free_list(struct mapping_ctx *ctx, struct list_head *list) +{ + struct mapping_item *mi; + + list_for_each_entry(mi, list, list) + mapping_remove_and_free(ctx, mi); +} + +static void mapping_work_handler(struct work_struct *work) +{ + unsigned long min_timeout = 0, now = jiffies; + struct mapping_item *mi, *next; + LIST_HEAD(pending_items); + struct mapping_ctx *ctx; + + ctx = container_of(work, struct mapping_ctx, dwork.work); + + spin_lock(&ctx->pending_list_lock); + list_for_each_entry_safe(mi, next, &ctx->pending_list, list) { + if (time_after(now, mi->timeout)) + list_move(&mi->list, &pending_items); + else if (!min_timeout || + time_before(mi->timeout, min_timeout)) + min_timeout = mi->timeout; + } + spin_unlock(&ctx->pending_list_lock); + + mapping_remove_and_free_list(ctx, &pending_items); + + if (min_timeout) + schedule_delayed_work(&ctx->dwork, abs(min_timeout - now)); +} + +static void mapping_flush_work(struct mapping_ctx *ctx) +{ + if (!ctx->delayed_removal) + return; + + cancel_delayed_work_sync(&ctx->dwork); + mapping_remove_and_free_list(ctx, &ctx->pending_list); +} + +struct mapping_ctx * +mapping_create(size_t data_size, u32 max_id, bool delayed_removal) +{ + struct mapping_ctx *ctx; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return ERR_PTR(-ENOMEM); + + ctx->max_id = max_id ? max_id : UINT_MAX; + ctx->data_size = data_size; + + if (delayed_removal) { + INIT_DELAYED_WORK(&ctx->dwork, mapping_work_handler); + INIT_LIST_HEAD(&ctx->pending_list); + spin_lock_init(&ctx->pending_list_lock); + ctx->delayed_removal = true; + } + + mutex_init(&ctx->lock); + xa_init_flags(&ctx->xarray, XA_FLAGS_ALLOC1); + + return ctx; +} + +void mapping_destroy(struct mapping_ctx *ctx) +{ + mapping_flush_work(ctx); + xa_destroy(&ctx->xarray); + mutex_destroy(&ctx->lock); + + kfree(ctx); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h new file mode 100644 index 000000000000..285525cc5470 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies */ + +#ifndef __MLX5_MAPPING_H__ +#define __MLX5_MAPPING_H__ + +struct mapping_ctx; + +int mapping_add(struct mapping_ctx *ctx, void *data, u32 *id); +int mapping_remove(struct mapping_ctx *ctx, u32 id); +int mapping_find(struct mapping_ctx *ctx, u32 id, void *data); + +/* mapping uses an xarray to map data to ids in add(), and for find(). + * For locking, it uses a internal xarray spin lock for add()/remove(), + * find() uses rcu_read_lock(). + * Choosing delayed_removal postpones the removal of a previously mapped + * id by MAPPING_GRACE_PERIOD milliseconds. + * This is to avoid races against hardware, where we mark the packet in + * hardware with a previous id, and quick remove() and add() reusing the same + * previous id. Then find() will get the new mapping instead of the old + * which was used to mark the packet. + */ +struct mapping_ctx *mapping_create(size_t data_size, u32 max_id, + bool delayed_removal); +void mapping_destroy(struct mapping_ctx *ctx); + +#endif /* __MLX5_MAPPING_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c index fce6eccdcf8b..2c4a670c8ffd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c @@ -343,64 +343,76 @@ out: return err; } -static u32 fec_supported_speeds[] = { - 10000, - 40000, - 25000, - 50000, - 56000, - 100000 +enum mlx5e_fec_supported_link_mode { + MLX5E_FEC_SUPPORTED_LINK_MODES_10G_40G, + MLX5E_FEC_SUPPORTED_LINK_MODES_25G, + MLX5E_FEC_SUPPORTED_LINK_MODES_50G, + MLX5E_FEC_SUPPORTED_LINK_MODES_56G, + MLX5E_FEC_SUPPORTED_LINK_MODES_100G, + MLX5E_FEC_SUPPORTED_LINK_MODE_50G_1X, + MLX5E_FEC_SUPPORTED_LINK_MODE_100G_2X, + MLX5E_FEC_SUPPORTED_LINK_MODE_200G_4X, + MLX5E_FEC_SUPPORTED_LINK_MODE_400G_8X, + MLX5E_MAX_FEC_SUPPORTED_LINK_MODE, }; -#define MLX5E_FEC_SUPPORTED_SPEEDS ARRAY_SIZE(fec_supported_speeds) +#define MLX5E_FEC_FIRST_50G_PER_LANE_MODE MLX5E_FEC_SUPPORTED_LINK_MODE_50G_1X + +#define MLX5E_FEC_OVERRIDE_ADMIN_POLICY(buf, policy, write, link) \ + do { \ + u16 *_policy = &(policy); \ + u32 *_buf = buf; \ + \ + if (write) \ + MLX5_SET(pplm_reg, _buf, fec_override_admin_##link, *_policy); \ + else \ + *_policy = MLX5_GET(pplm_reg, _buf, fec_override_admin_##link); \ + } while (0) + +#define MLX5E_FEC_OVERRIDE_ADMIN_50G_POLICY(buf, policy, write, link) \ + do { \ + u16 *__policy = &(policy); \ + bool _write = (write); \ + \ + if (_write && *__policy) \ + *__policy = find_first_bit((u_long *)__policy, \ + sizeof(u16) * BITS_PER_BYTE);\ + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(buf, *__policy, _write, link); \ + if (!_write && *__policy) \ + *__policy = 1 << *__policy; \ + } while (0) /* get/set FEC admin field for a given speed */ -static int mlx5e_fec_admin_field(u32 *pplm, - u8 *fec_policy, - bool write, - u32 speed) +static int mlx5e_fec_admin_field(u32 *pplm, u16 *fec_policy, bool write, + enum mlx5e_fec_supported_link_mode link_mode) { - switch (speed) { - case 10000: - case 40000: - if (!write) - *fec_policy = MLX5_GET(pplm_reg, pplm, - fec_override_admin_10g_40g); - else - MLX5_SET(pplm_reg, pplm, - fec_override_admin_10g_40g, *fec_policy); + switch (link_mode) { + case MLX5E_FEC_SUPPORTED_LINK_MODES_10G_40G: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 10g_40g); break; - case 25000: - if (!write) - *fec_policy = MLX5_GET(pplm_reg, pplm, - fec_override_admin_25g); - else - MLX5_SET(pplm_reg, pplm, - fec_override_admin_25g, *fec_policy); + case MLX5E_FEC_SUPPORTED_LINK_MODES_25G: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 25g); break; - case 50000: - if (!write) - *fec_policy = MLX5_GET(pplm_reg, pplm, - fec_override_admin_50g); - else - MLX5_SET(pplm_reg, pplm, - fec_override_admin_50g, *fec_policy); + case MLX5E_FEC_SUPPORTED_LINK_MODES_50G: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 50g); break; - case 56000: - if (!write) - *fec_policy = MLX5_GET(pplm_reg, pplm, - fec_override_admin_56g); - else - MLX5_SET(pplm_reg, pplm, - fec_override_admin_56g, *fec_policy); + case MLX5E_FEC_SUPPORTED_LINK_MODES_56G: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 56g); break; - case 100000: - if (!write) - *fec_policy = MLX5_GET(pplm_reg, pplm, - fec_override_admin_100g); - else - MLX5_SET(pplm_reg, pplm, - fec_override_admin_100g, *fec_policy); + case MLX5E_FEC_SUPPORTED_LINK_MODES_100G: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 100g); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_50G_1X: + MLX5E_FEC_OVERRIDE_ADMIN_50G_POLICY(pplm, *fec_policy, write, 50g_1x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_100G_2X: + MLX5E_FEC_OVERRIDE_ADMIN_50G_POLICY(pplm, *fec_policy, write, 100g_2x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_200G_4X: + MLX5E_FEC_OVERRIDE_ADMIN_50G_POLICY(pplm, *fec_policy, write, 200g_4x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_400G_8X: + MLX5E_FEC_OVERRIDE_ADMIN_50G_POLICY(pplm, *fec_policy, write, 400g_8x); break; default: return -EINVAL; @@ -408,32 +420,40 @@ static int mlx5e_fec_admin_field(u32 *pplm, return 0; } +#define MLX5E_GET_FEC_OVERRIDE_CAP(buf, link) \ + MLX5_GET(pplm_reg, buf, fec_override_cap_##link) + /* returns FEC capabilities for a given speed */ -static int mlx5e_get_fec_cap_field(u32 *pplm, - u8 *fec_cap, - u32 speed) +static int mlx5e_get_fec_cap_field(u32 *pplm, u16 *fec_cap, + enum mlx5e_fec_supported_link_mode link_mode) { - switch (speed) { - case 10000: - case 40000: - *fec_cap = MLX5_GET(pplm_reg, pplm, - fec_override_cap_10g_40g); + switch (link_mode) { + case MLX5E_FEC_SUPPORTED_LINK_MODES_10G_40G: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 10g_40g); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODES_25G: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 25g); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODES_50G: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 50g); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODES_56G: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 56g); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODES_100G: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 100g); break; - case 25000: - *fec_cap = MLX5_GET(pplm_reg, pplm, - fec_override_cap_25g); + case MLX5E_FEC_SUPPORTED_LINK_MODE_50G_1X: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 50g_1x); break; - case 50000: - *fec_cap = MLX5_GET(pplm_reg, pplm, - fec_override_cap_50g); + case MLX5E_FEC_SUPPORTED_LINK_MODE_100G_2X: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 100g_2x); break; - case 56000: - *fec_cap = MLX5_GET(pplm_reg, pplm, - fec_override_cap_56g); + case MLX5E_FEC_SUPPORTED_LINK_MODE_200G_4X: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 200g_4x); break; - case 100000: - *fec_cap = MLX5_GET(pplm_reg, pplm, - fec_override_cap_100g); + case MLX5E_FEC_SUPPORTED_LINK_MODE_400G_8X: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 400g_8x); break; default: return -EINVAL; @@ -441,13 +461,14 @@ static int mlx5e_get_fec_cap_field(u32 *pplm, return 0; } -int mlx5e_get_fec_caps(struct mlx5_core_dev *dev, u8 *fec_caps) +bool mlx5e_fec_in_caps(struct mlx5_core_dev *dev, int fec_policy) { + bool fec_50g_per_lane = MLX5_CAP_PCAM_FEATURE(dev, fec_50G_per_lane_in_pplm); u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {}; u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {}; int sz = MLX5_ST_SZ_BYTES(pplm_reg); - u32 current_fec_speed; int err; + int i; if (!MLX5_CAP_GEN(dev, pcam_reg)) return -EOPNOTSUPP; @@ -458,23 +479,30 @@ int mlx5e_get_fec_caps(struct mlx5_core_dev *dev, u8 *fec_caps) MLX5_SET(pplm_reg, in, local_port, 1); err = mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0); if (err) - return err; + return false; - err = mlx5e_port_linkspeed(dev, ¤t_fec_speed); - if (err) - return err; + for (i = 0; i < MLX5E_MAX_FEC_SUPPORTED_LINK_MODE; i++) { + u16 fec_caps; - return mlx5e_get_fec_cap_field(out, fec_caps, current_fec_speed); + if (i >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE && !fec_50g_per_lane) + break; + + mlx5e_get_fec_cap_field(out, &fec_caps, i); + if (fec_caps & fec_policy) + return true; + } + return false; } int mlx5e_get_fec_mode(struct mlx5_core_dev *dev, u32 *fec_mode_active, - u8 *fec_configured_mode) + u16 *fec_configured_mode) { + bool fec_50g_per_lane = MLX5_CAP_PCAM_FEATURE(dev, fec_50G_per_lane_in_pplm); u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {}; u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {}; int sz = MLX5_ST_SZ_BYTES(pplm_reg); - u32 link_speed; int err; + int i; if (!MLX5_CAP_GEN(dev, pcam_reg)) return -EOPNOTSUPP; @@ -490,24 +518,28 @@ int mlx5e_get_fec_mode(struct mlx5_core_dev *dev, u32 *fec_mode_active, *fec_mode_active = MLX5_GET(pplm_reg, out, fec_mode_active); if (!fec_configured_mode) - return 0; + goto out; - err = mlx5e_port_linkspeed(dev, &link_speed); - if (err) - return err; + *fec_configured_mode = 0; + for (i = 0; i < MLX5E_MAX_FEC_SUPPORTED_LINK_MODE; i++) { + if (i >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE && !fec_50g_per_lane) + break; - return mlx5e_fec_admin_field(out, fec_configured_mode, 0, link_speed); + mlx5e_fec_admin_field(out, fec_configured_mode, 0, i); + if (*fec_configured_mode != 0) + goto out; + } +out: + return 0; } -int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u8 fec_policy) +int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u16 fec_policy) { - u8 fec_policy_nofec = BIT(MLX5E_FEC_NOFEC); - bool fec_mode_not_supp_in_speed = false; + bool fec_50g_per_lane = MLX5_CAP_PCAM_FEATURE(dev, fec_50G_per_lane_in_pplm); u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {}; u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {}; int sz = MLX5_ST_SZ_BYTES(pplm_reg); - u8 fec_policy_auto = 0; - u8 fec_caps = 0; + u16 fec_policy_auto = 0; int err; int i; @@ -517,6 +549,9 @@ int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u8 fec_policy) if (!MLX5_CAP_PCAM_REG(dev, pplm)) return -EOPNOTSUPP; + if (fec_policy >= (1 << MLX5E_FEC_LLRS_272_257_1) && !fec_50g_per_lane) + return -EOPNOTSUPP; + MLX5_SET(pplm_reg, in, local_port, 1); err = mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0); if (err) @@ -524,25 +559,31 @@ int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u8 fec_policy) MLX5_SET(pplm_reg, out, local_port, 1); - for (i = 0; i < MLX5E_FEC_SUPPORTED_SPEEDS; i++) { - mlx5e_get_fec_cap_field(out, &fec_caps, fec_supported_speeds[i]); - /* policy supported for link speed, or policy is auto */ - if (fec_caps & fec_policy || fec_policy == fec_policy_auto) { - mlx5e_fec_admin_field(out, &fec_policy, 1, - fec_supported_speeds[i]); - } else { - /* turn off FEC if supported. Else, leave it the same */ - if (fec_caps & fec_policy_nofec) - mlx5e_fec_admin_field(out, &fec_policy_nofec, 1, - fec_supported_speeds[i]); - fec_mode_not_supp_in_speed = true; - } - } + for (i = 0; i < MLX5E_MAX_FEC_SUPPORTED_LINK_MODE; i++) { + u16 conf_fec = fec_policy; + u16 fec_caps = 0; + + if (i >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE && !fec_50g_per_lane) + break; - if (fec_mode_not_supp_in_speed) - mlx5_core_dbg(dev, - "FEC policy 0x%x is not supported for some speeds", - fec_policy); + /* RS fec in ethtool is mapped to MLX5E_FEC_RS_528_514 + * to link modes up to 25G per lane and to + * MLX5E_FEC_RS_544_514 in the new link modes based on + * 50 G per lane + */ + if (conf_fec == (1 << MLX5E_FEC_RS_528_514) && + i >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE) + conf_fec = (1 << MLX5E_FEC_RS_544_514); + + mlx5e_get_fec_cap_field(out, &fec_caps, i); + + /* policy supported for link speed */ + if (fec_caps & conf_fec) + mlx5e_fec_admin_field(out, &conf_fec, 1, i); + else + /* set FEC to auto*/ + mlx5e_fec_admin_field(out, &fec_policy_auto, 1, i); + } return mlx5_core_access_reg(dev, out, sz, out, sz, MLX5_REG_PPLM, 0, 1); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h index 4a7f4497692b..a2ddd446dd59 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h @@ -60,15 +60,17 @@ int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in); int mlx5e_port_query_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer); int mlx5e_port_set_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer); -int mlx5e_get_fec_caps(struct mlx5_core_dev *dev, u8 *fec_caps); +bool mlx5e_fec_in_caps(struct mlx5_core_dev *dev, int fec_policy); int mlx5e_get_fec_mode(struct mlx5_core_dev *dev, u32 *fec_mode_active, - u8 *fec_configured_mode); -int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u8 fec_policy); + u16 *fec_configured_mode); +int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u16 fec_policy); enum { MLX5E_FEC_NOFEC, MLX5E_FEC_FIRECODE, MLX5E_FEC_RS_528_514, + MLX5E_FEC_RS_544_514 = 7, + MLX5E_FEC_LLRS_272_257_1 = 9, }; #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index 6c72b592315b..c209579fc213 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -90,7 +90,7 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx) goto out; mlx5e_reset_icosq_cc_pc(icosq); - mlx5e_free_rx_descs(rq); + mlx5e_free_rx_in_progress_descs(rq); clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state); mlx5e_activate_icosq(icosq); mlx5e_activate_rq(rq); @@ -102,19 +102,6 @@ out: return err; } -void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq) -{ - struct mlx5e_priv *priv = icosq->channel->priv; - char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; - struct mlx5e_err_ctx err_ctx = {}; - - err_ctx.ctx = icosq; - err_ctx.recover = mlx5e_rx_reporter_err_icosq_cqe_recover; - sprintf(err_str, "ERR CQE on ICOSQ: 0x%x", icosq->sqn); - - mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx); -} - static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state) { struct net_device *dev = rq->netdev; @@ -171,19 +158,6 @@ out: return err; } -void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq) -{ - struct mlx5e_priv *priv = rq->channel->priv; - char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; - struct mlx5e_err_ctx err_ctx = {}; - - err_ctx.ctx = rq; - err_ctx.recover = mlx5e_rx_reporter_err_rq_cqe_recover; - sprintf(err_str, "ERR CQE on RQ: 0x%x", rq->rqn); - - mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx); -} - static int mlx5e_rx_reporter_timeout_recover(void *ctx) { struct mlx5e_icosq *icosq; @@ -201,21 +175,6 @@ static int mlx5e_rx_reporter_timeout_recover(void *ctx) return err; } -void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq) -{ - struct mlx5e_icosq *icosq = &rq->channel->icosq; - struct mlx5e_priv *priv = rq->channel->priv; - char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; - struct mlx5e_err_ctx err_ctx = {}; - - err_ctx.ctx = rq; - err_ctx.recover = mlx5e_rx_reporter_timeout_recover; - sprintf(err_str, "RX timeout on channel: %d, ICOSQ: 0x%x RQ: 0x%x, CQ: 0x%x\n", - icosq->channel->ix, icosq->sqn, rq->rqn, rq->cq.mcq.cqn); - - mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx); -} - static int mlx5e_rx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx) { return err_ctx->recover(err_ctx->ctx); @@ -371,10 +330,235 @@ unlock: return err; } +static int mlx5e_rx_reporter_dump_icosq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, + void *ctx) +{ + struct mlx5e_txqsq *icosq = ctx; + struct mlx5_rsc_key key = {}; + int err; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return 0; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "SX Slice"); + if (err) + return err; + + key.size = PAGE_SIZE; + key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL; + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "ICOSQ"); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "QPC"); + if (err) + return err; + + key.rsc = MLX5_SGMT_TYPE_FULL_QPC; + key.index1 = icosq->sqn; + key.num_of_obj1 = 1; + + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "send_buff"); + if (err) + return err; + + key.rsc = MLX5_SGMT_TYPE_SND_BUFF; + key.num_of_obj2 = MLX5_RSC_DUMP_ALL; + + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + return err; + + return mlx5e_reporter_named_obj_nest_end(fmsg); +} + +static int mlx5e_rx_reporter_dump_rq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, + void *ctx) +{ + struct mlx5_rsc_key key = {}; + struct mlx5e_rq *rq = ctx; + int err; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return 0; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "RX Slice"); + if (err) + return err; + + key.size = PAGE_SIZE; + key.rsc = MLX5_SGMT_TYPE_RX_SLICE_ALL; + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "RQ"); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "QPC"); + if (err) + return err; + + key.rsc = MLX5_SGMT_TYPE_FULL_QPC; + key.index1 = rq->rqn; + key.num_of_obj1 = 1; + + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "receive_buff"); + if (err) + return err; + + key.rsc = MLX5_SGMT_TYPE_RCV_BUFF; + key.num_of_obj2 = MLX5_RSC_DUMP_ALL; + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + return err; + + return mlx5e_reporter_named_obj_nest_end(fmsg); +} + +static int mlx5e_rx_reporter_dump_all_rqs(struct mlx5e_priv *priv, + struct devlink_fmsg *fmsg) +{ + struct mlx5_rsc_key key = {}; + int i, err; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return 0; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "RX Slice"); + if (err) + return err; + + key.size = PAGE_SIZE; + key.rsc = MLX5_SGMT_TYPE_RX_SLICE_ALL; + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + return err; + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "RQs"); + if (err) + return err; + + for (i = 0; i < priv->channels.num; i++) { + struct mlx5e_rq *rq = &priv->channels.c[i]->rq; + + err = mlx5e_health_queue_dump(priv, fmsg, rq->rqn, "RQ"); + if (err) + return err; + } + + return devlink_fmsg_arr_pair_nest_end(fmsg); +} + +static int mlx5e_rx_reporter_dump_from_ctx(struct mlx5e_priv *priv, + struct mlx5e_err_ctx *err_ctx, + struct devlink_fmsg *fmsg) +{ + return err_ctx->dump(priv, fmsg, err_ctx->ctx); +} + +static int mlx5e_rx_reporter_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *context, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + struct mlx5e_err_ctx *err_ctx = context; + + return err_ctx ? mlx5e_rx_reporter_dump_from_ctx(priv, err_ctx, fmsg) : + mlx5e_rx_reporter_dump_all_rqs(priv, fmsg); +} + +void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq) +{ + struct mlx5e_icosq *icosq = &rq->channel->icosq; + struct mlx5e_priv *priv = rq->channel->priv; + char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; + struct mlx5e_err_ctx err_ctx = {}; + + err_ctx.ctx = rq; + err_ctx.recover = mlx5e_rx_reporter_timeout_recover; + err_ctx.dump = mlx5e_rx_reporter_dump_rq; + snprintf(err_str, sizeof(err_str), + "RX timeout on channel: %d, ICOSQ: 0x%x RQ: 0x%x, CQ: 0x%x", + icosq->channel->ix, icosq->sqn, rq->rqn, rq->cq.mcq.cqn); + + mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx); +} + +void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq) +{ + struct mlx5e_priv *priv = rq->channel->priv; + char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; + struct mlx5e_err_ctx err_ctx = {}; + + err_ctx.ctx = rq; + err_ctx.recover = mlx5e_rx_reporter_err_rq_cqe_recover; + err_ctx.dump = mlx5e_rx_reporter_dump_rq; + snprintf(err_str, sizeof(err_str), "ERR CQE on RQ: 0x%x", rq->rqn); + + mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx); +} + +void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq) +{ + struct mlx5e_priv *priv = icosq->channel->priv; + char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; + struct mlx5e_err_ctx err_ctx = {}; + + err_ctx.ctx = icosq; + err_ctx.recover = mlx5e_rx_reporter_err_icosq_cqe_recover; + err_ctx.dump = mlx5e_rx_reporter_dump_icosq; + snprintf(err_str, sizeof(err_str), "ERR CQE on ICOSQ: 0x%x", icosq->sqn); + + mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx); +} + static const struct devlink_health_reporter_ops mlx5_rx_reporter_ops = { .name = "rx", .recover = mlx5e_rx_reporter_recover, .diagnose = mlx5e_rx_reporter_diagnose, + .dump = mlx5e_rx_reporter_dump, }; #define MLX5E_REPORTER_RX_GRACEFUL_PERIOD 500 @@ -387,7 +571,7 @@ int mlx5e_reporter_rx_create(struct mlx5e_priv *priv) reporter = devlink_health_reporter_create(devlink, &mlx5_rx_reporter_ops, MLX5E_REPORTER_RX_GRACEFUL_PERIOD, - true, priv); + priv); if (IS_ERR(reporter)) { netdev_warn(priv->netdev, "Failed to create rx reporter, err = %ld\n", PTR_ERR(reporter)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index b468549e96ff..9805fc085512 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -82,19 +82,6 @@ out: return err; } -void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq) -{ - struct mlx5e_priv *priv = sq->channel->priv; - char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; - struct mlx5e_err_ctx err_ctx = {0}; - - err_ctx.ctx = sq; - err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover; - sprintf(err_str, "ERR CQE on SQ: 0x%x", sq->sqn); - - mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); -} - static int mlx5e_tx_reporter_timeout_recover(void *ctx) { struct mlx5_eq_comp *eq; @@ -110,22 +97,6 @@ static int mlx5e_tx_reporter_timeout_recover(void *ctx) return err; } -int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq) -{ - struct mlx5e_priv *priv = sq->channel->priv; - char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; - struct mlx5e_err_ctx err_ctx; - - err_ctx.ctx = sq; - err_ctx.recover = mlx5e_tx_reporter_timeout_recover; - sprintf(err_str, - "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n", - sq->channel->ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, - jiffies_to_usecs(jiffies - sq->txq->trans_start)); - - return mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); -} - /* state lock cannot be grabbed within this function. * It can cause a dead lock or a read-after-free. */ @@ -275,10 +246,162 @@ unlock: return err; } +static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, + void *ctx) +{ + struct mlx5_rsc_key key = {}; + struct mlx5e_txqsq *sq = ctx; + int err; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return 0; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "SX Slice"); + if (err) + return err; + + key.size = PAGE_SIZE; + key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL; + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "SQ"); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "QPC"); + if (err) + return err; + + key.rsc = MLX5_SGMT_TYPE_FULL_QPC; + key.index1 = sq->sqn; + key.num_of_obj1 = 1; + + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "send_buff"); + if (err) + return err; + + key.rsc = MLX5_SGMT_TYPE_SND_BUFF; + key.num_of_obj2 = MLX5_RSC_DUMP_ALL; + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + return err; + + return mlx5e_reporter_named_obj_nest_end(fmsg); +} + +static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv, + struct devlink_fmsg *fmsg) +{ + struct mlx5_rsc_key key = {}; + int i, tc, err; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return 0; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "SX Slice"); + if (err) + return err; + + key.size = PAGE_SIZE; + key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL; + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + return err; + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs"); + if (err) + return err; + + for (i = 0; i < priv->channels.num; i++) { + struct mlx5e_channel *c = priv->channels.c[i]; + + for (tc = 0; tc < priv->channels.params.num_tc; tc++) { + struct mlx5e_txqsq *sq = &c->sq[tc]; + + err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "SQ"); + if (err) + return err; + } + } + return devlink_fmsg_arr_pair_nest_end(fmsg); +} + +static int mlx5e_tx_reporter_dump_from_ctx(struct mlx5e_priv *priv, + struct mlx5e_err_ctx *err_ctx, + struct devlink_fmsg *fmsg) +{ + return err_ctx->dump(priv, fmsg, err_ctx->ctx); +} + +static int mlx5e_tx_reporter_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *context, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + struct mlx5e_err_ctx *err_ctx = context; + + return err_ctx ? mlx5e_tx_reporter_dump_from_ctx(priv, err_ctx, fmsg) : + mlx5e_tx_reporter_dump_all_sqs(priv, fmsg); +} + +void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq) +{ + struct mlx5e_priv *priv = sq->channel->priv; + char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; + struct mlx5e_err_ctx err_ctx = {}; + + err_ctx.ctx = sq; + err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover; + err_ctx.dump = mlx5e_tx_reporter_dump_sq; + snprintf(err_str, sizeof(err_str), "ERR CQE on SQ: 0x%x", sq->sqn); + + mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); +} + +int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq) +{ + struct mlx5e_priv *priv = sq->channel->priv; + char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; + struct mlx5e_err_ctx err_ctx = {}; + + err_ctx.ctx = sq; + err_ctx.recover = mlx5e_tx_reporter_timeout_recover; + err_ctx.dump = mlx5e_tx_reporter_dump_sq; + snprintf(err_str, sizeof(err_str), + "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u", + sq->channel->ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, + jiffies_to_usecs(jiffies - sq->txq->trans_start)); + + return mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); +} + static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = { .name = "tx", .recover = mlx5e_tx_reporter_recover, .diagnose = mlx5e_tx_reporter_diagnose, + .dump = mlx5e_tx_reporter_dump, }; #define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500 @@ -293,7 +416,7 @@ int mlx5e_reporter_tx_create(struct mlx5e_priv *priv) reporter = devlink_health_reporter_create(devlink, &mlx5_tx_reporter_ops, MLX5_REPORTER_TX_GRACEFUL_PERIOD, - true, priv); + priv); if (IS_ERR(reporter)) { netdev_warn(priv->netdev, "Failed to create tx reporter, err = %ld\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c new file mode 100644 index 000000000000..ad3e3a65d403 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -0,0 +1,1369 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_conntrack_zones.h> +#include <net/netfilter/nf_conntrack_labels.h> +#include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_acct.h> +#include <uapi/linux/tc_act/tc_pedit.h> +#include <net/tc_act/tc_ct.h> +#include <net/flow_offload.h> +#include <net/netfilter/nf_flow_table.h> +#include <linux/workqueue.h> + +#include "esw/chains.h" +#include "en/tc_ct.h" +#include "en.h" +#include "en_tc.h" +#include "en_rep.h" + +#define MLX5_CT_ZONE_BITS (mlx5e_tc_attr_to_reg_mappings[ZONE_TO_REG].mlen * 8) +#define MLX5_CT_ZONE_MASK GENMASK(MLX5_CT_ZONE_BITS - 1, 0) +#define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1) +#define MLX5_CT_STATE_TRK_BIT BIT(2) + +#define MLX5_FTE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen * 8) +#define MLX5_FTE_ID_MAX GENMASK(MLX5_FTE_ID_BITS - 1, 0) +#define MLX5_FTE_ID_MASK MLX5_FTE_ID_MAX + +#define ct_dbg(fmt, args...)\ + netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args) + +struct mlx5_tc_ct_priv { + struct mlx5_eswitch *esw; + const struct net_device *netdev; + struct idr fte_ids; + struct idr tuple_ids; + struct rhashtable zone_ht; + struct mlx5_flow_table *ct; + struct mlx5_flow_table *ct_nat; + struct mlx5_flow_table *post_ct; + struct mutex control_lock; /* guards parallel adds/dels */ +}; + +struct mlx5_ct_flow { + struct mlx5_esw_flow_attr pre_ct_attr; + struct mlx5_esw_flow_attr post_ct_attr; + struct mlx5_flow_handle *pre_ct_rule; + struct mlx5_flow_handle *post_ct_rule; + struct mlx5_ct_ft *ft; + u32 fte_id; + u32 chain_mapping; +}; + +struct mlx5_ct_zone_rule { + struct mlx5_flow_handle *rule; + struct mlx5_esw_flow_attr attr; + int tupleid; + bool nat; +}; + +struct mlx5_ct_ft { + struct rhash_head node; + u16 zone; + refcount_t refcount; + struct nf_flowtable *nf_ft; + struct mlx5_tc_ct_priv *ct_priv; + struct rhashtable ct_entries_ht; + struct list_head ct_entries_list; +}; + +struct mlx5_ct_entry { + struct list_head list; + u16 zone; + struct rhash_head node; + struct flow_rule *flow_rule; + struct mlx5_fc *counter; + unsigned long lastuse; + unsigned long cookie; + unsigned long restore_cookie; + struct mlx5_ct_zone_rule zone_rules[2]; +}; + +static const struct rhashtable_params cts_ht_params = { + .head_offset = offsetof(struct mlx5_ct_entry, node), + .key_offset = offsetof(struct mlx5_ct_entry, cookie), + .key_len = sizeof(((struct mlx5_ct_entry *)0)->cookie), + .automatic_shrinking = true, + .min_size = 16 * 1024, +}; + +static const struct rhashtable_params zone_params = { + .head_offset = offsetof(struct mlx5_ct_ft, node), + .key_offset = offsetof(struct mlx5_ct_ft, zone), + .key_len = sizeof(((struct mlx5_ct_ft *)0)->zone), + .automatic_shrinking = true, +}; + +static struct mlx5_tc_ct_priv * +mlx5_tc_ct_get_ct_priv(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + return uplink_priv->ct_priv; +} + +static int +mlx5_tc_ct_set_tuple_match(struct mlx5_flow_spec *spec, + struct flow_rule *rule) +{ + void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers); + void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers); + u16 addr_type = 0; + u8 ip_proto = 0; + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; + + flow_rule_match_basic(rule, &match); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype, + ntohs(match.mask->n_proto)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, + ntohs(match.key->n_proto)); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, + match.mask->ip_proto); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, + match.key->ip_proto); + + ip_proto = match.key->ip_proto; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_match_control match; + + flow_rule_match_control(rule, &match); + addr_type = match.key->addr_type; + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + struct flow_match_ipv4_addrs match; + + flow_rule_match_ipv4_addrs(rule, &match); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv4_layout.ipv4), + &match.mask->src, sizeof(match.mask->src)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv4_layout.ipv4), + &match.key->src, sizeof(match.key->src)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &match.mask->dst, sizeof(match.mask->dst)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &match.key->dst, sizeof(match.key->dst)); + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct flow_match_ipv6_addrs match; + + flow_rule_match_ipv6_addrs(rule, &match); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &match.mask->src, sizeof(match.mask->src)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &match.key->src, sizeof(match.key->src)); + + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &match.mask->dst, sizeof(match.mask->dst)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &match.key->dst, sizeof(match.key->dst)); + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { + struct flow_match_ports match; + + flow_rule_match_ports(rule, &match); + switch (ip_proto) { + case IPPROTO_TCP: + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + tcp_sport, ntohs(match.mask->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + tcp_sport, ntohs(match.key->src)); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + tcp_dport, ntohs(match.mask->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + tcp_dport, ntohs(match.key->dst)); + break; + + case IPPROTO_UDP: + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + udp_sport, ntohs(match.mask->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + udp_sport, ntohs(match.key->src)); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + udp_dport, ntohs(match.mask->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + udp_dport, ntohs(match.key->dst)); + break; + default: + break; + } + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) { + struct flow_match_tcp match; + + flow_rule_match_tcp(rule, &match); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags, + ntohs(match.mask->flags)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags, + ntohs(match.key->flags)); + } + + return 0; +} + +static void +mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv, + struct mlx5_ct_entry *entry, + bool nat) +{ + struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat]; + struct mlx5_esw_flow_attr *attr = &zone_rule->attr; + struct mlx5_eswitch *esw = ct_priv->esw; + + ct_dbg("Deleting ct entry rule in zone %d", entry->zone); + + mlx5_eswitch_del_offloaded_rule(esw, zone_rule->rule, attr); + mlx5_modify_header_dealloc(esw->dev, attr->modify_hdr); + idr_remove(&ct_priv->tuple_ids, zone_rule->tupleid); +} + +static void +mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv, + struct mlx5_ct_entry *entry) +{ + mlx5_tc_ct_entry_del_rule(ct_priv, entry, true); + mlx5_tc_ct_entry_del_rule(ct_priv, entry, false); + + mlx5_fc_destroy(ct_priv->esw->dev, entry->counter); +} + +static struct flow_action_entry * +mlx5_tc_ct_get_ct_metadata_action(struct flow_rule *flow_rule) +{ + struct flow_action *flow_action = &flow_rule->action; + struct flow_action_entry *act; + int i; + + flow_action_for_each(i, act, flow_action) { + if (act->id == FLOW_ACTION_CT_METADATA) + return act; + } + + return NULL; +} + +static int +mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv, + struct mlx5e_tc_mod_hdr_acts *mod_acts, + u8 ct_state, + u32 mark, + u32 label, + u32 tupleid) +{ + struct mlx5_eswitch *esw = ct_priv->esw; + int err; + + err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts, + CTSTATE_TO_REG, ct_state); + if (err) + return err; + + err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts, + MARK_TO_REG, mark); + if (err) + return err; + + err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts, + LABELS_TO_REG, label); + if (err) + return err; + + err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts, + TUPLEID_TO_REG, tupleid); + if (err) + return err; + + return 0; +} + +static int +mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act, + char *modact) +{ + u32 offset = act->mangle.offset, field; + + switch (act->mangle.htype) { + case FLOW_ACT_MANGLE_HDR_TYPE_IP4: + MLX5_SET(set_action_in, modact, length, 0); + if (offset == offsetof(struct iphdr, saddr)) + field = MLX5_ACTION_IN_FIELD_OUT_SIPV4; + else if (offset == offsetof(struct iphdr, daddr)) + field = MLX5_ACTION_IN_FIELD_OUT_DIPV4; + else + return -EOPNOTSUPP; + break; + + case FLOW_ACT_MANGLE_HDR_TYPE_IP6: + MLX5_SET(set_action_in, modact, length, 0); + if (offset == offsetof(struct ipv6hdr, saddr)) + field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0; + else if (offset == offsetof(struct ipv6hdr, saddr) + 4) + field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32; + else if (offset == offsetof(struct ipv6hdr, saddr) + 8) + field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64; + else if (offset == offsetof(struct ipv6hdr, saddr) + 12) + field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96; + else if (offset == offsetof(struct ipv6hdr, daddr)) + field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0; + else if (offset == offsetof(struct ipv6hdr, daddr) + 4) + field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32; + else if (offset == offsetof(struct ipv6hdr, daddr) + 8) + field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64; + else if (offset == offsetof(struct ipv6hdr, daddr) + 12) + field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96; + else + return -EOPNOTSUPP; + break; + + case FLOW_ACT_MANGLE_HDR_TYPE_TCP: + MLX5_SET(set_action_in, modact, length, 16); + if (offset == offsetof(struct tcphdr, source)) + field = MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT; + else if (offset == offsetof(struct tcphdr, dest)) + field = MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT; + else + return -EOPNOTSUPP; + break; + + case FLOW_ACT_MANGLE_HDR_TYPE_UDP: + MLX5_SET(set_action_in, modact, length, 16); + if (offset == offsetof(struct udphdr, source)) + field = MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT; + else if (offset == offsetof(struct udphdr, dest)) + field = MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT; + else + return -EOPNOTSUPP; + break; + + default: + return -EOPNOTSUPP; + } + + MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, modact, offset, 0); + MLX5_SET(set_action_in, modact, field, field); + MLX5_SET(set_action_in, modact, data, act->mangle.val); + + return 0; +} + +static int +mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv, + struct flow_rule *flow_rule, + struct mlx5e_tc_mod_hdr_acts *mod_acts) +{ + struct flow_action *flow_action = &flow_rule->action; + struct mlx5_core_dev *mdev = ct_priv->esw->dev; + struct flow_action_entry *act; + size_t action_size; + char *modact; + int err, i; + + action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto); + + flow_action_for_each(i, act, flow_action) { + switch (act->id) { + case FLOW_ACTION_MANGLE: { + err = alloc_mod_hdr_actions(mdev, + MLX5_FLOW_NAMESPACE_FDB, + mod_acts); + if (err) + return err; + + modact = mod_acts->actions + + mod_acts->num_actions * action_size; + + err = mlx5_tc_ct_parse_mangle_to_mod_act(act, modact); + if (err) + return err; + + mod_acts->num_actions++; + } + break; + + case FLOW_ACTION_CT_METADATA: + /* Handled earlier */ + continue; + default: + return -EOPNOTSUPP; + } + } + + return 0; +} + +static int +mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, + struct mlx5_esw_flow_attr *attr, + struct flow_rule *flow_rule, + u32 tupleid, + bool nat) +{ + struct mlx5e_tc_mod_hdr_acts mod_acts = {}; + struct mlx5_eswitch *esw = ct_priv->esw; + struct mlx5_modify_hdr *mod_hdr; + struct flow_action_entry *meta; + int err; + + meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule); + if (!meta) + return -EOPNOTSUPP; + + if (meta->ct_metadata.labels[1] || + meta->ct_metadata.labels[2] || + meta->ct_metadata.labels[3]) { + ct_dbg("Failed to offload ct entry due to unsupported label"); + return -EOPNOTSUPP; + } + + if (nat) { + err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule, + &mod_acts); + if (err) + goto err_mapping; + } + + err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts, + (MLX5_CT_STATE_ESTABLISHED_BIT | + MLX5_CT_STATE_TRK_BIT), + meta->ct_metadata.mark, + meta->ct_metadata.labels[0], + tupleid); + if (err) + goto err_mapping; + + mod_hdr = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_FDB, + mod_acts.num_actions, + mod_acts.actions); + if (IS_ERR(mod_hdr)) { + err = PTR_ERR(mod_hdr); + goto err_mapping; + } + attr->modify_hdr = mod_hdr; + + dealloc_mod_hdr_actions(&mod_acts); + return 0; + +err_mapping: + dealloc_mod_hdr_actions(&mod_acts); + return err; +} + +static int +mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, + struct flow_rule *flow_rule, + struct mlx5_ct_entry *entry, + bool nat) +{ + struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat]; + struct mlx5_esw_flow_attr *attr = &zone_rule->attr; + struct mlx5_eswitch *esw = ct_priv->esw; + struct mlx5_flow_spec *spec = NULL; + u32 tupleid = 1; + int err; + + zone_rule->nat = nat; + + spec = kzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + /* Get tuple unique id */ + err = idr_alloc_u32(&ct_priv->tuple_ids, zone_rule, &tupleid, + TUPLE_ID_MAX, GFP_KERNEL); + if (err) { + netdev_warn(ct_priv->netdev, + "Failed to allocate tuple id, err: %d\n", err); + goto err_idr_alloc; + } + zone_rule->tupleid = tupleid; + + err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule, + tupleid, nat); + if (err) { + ct_dbg("Failed to create ct entry mod hdr"); + goto err_mod_hdr; + } + + attr->action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + attr->dest_chain = 0; + attr->dest_ft = ct_priv->post_ct; + attr->fdb = nat ? ct_priv->ct_nat : ct_priv->ct; + attr->outer_match_level = MLX5_MATCH_L4; + attr->counter = entry->counter; + attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT; + + mlx5_tc_ct_set_tuple_match(spec, flow_rule); + mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, + entry->zone & MLX5_CT_ZONE_MASK, + MLX5_CT_ZONE_MASK); + + zone_rule->rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr); + if (IS_ERR(zone_rule->rule)) { + err = PTR_ERR(zone_rule->rule); + ct_dbg("Failed to add ct entry rule, nat: %d", nat); + goto err_rule; + } + + kfree(spec); + ct_dbg("Offloaded ct entry rule in zone %d", entry->zone); + + return 0; + +err_rule: + mlx5_modify_header_dealloc(esw->dev, attr->modify_hdr); +err_mod_hdr: + idr_remove(&ct_priv->tuple_ids, zone_rule->tupleid); +err_idr_alloc: + kfree(spec); + return err; +} + +static int +mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv, + struct flow_rule *flow_rule, + struct mlx5_ct_entry *entry) +{ + struct mlx5_eswitch *esw = ct_priv->esw; + int err; + + entry->counter = mlx5_fc_create(esw->dev, true); + if (IS_ERR(entry->counter)) { + err = PTR_ERR(entry->counter); + ct_dbg("Failed to create counter for ct entry"); + return err; + } + + err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, false); + if (err) + goto err_orig; + + err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, true); + if (err) + goto err_nat; + + return 0; + +err_nat: + mlx5_tc_ct_entry_del_rule(ct_priv, entry, false); +err_orig: + mlx5_fc_destroy(esw->dev, entry->counter); + return err; +} + +static int +mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft, + struct flow_cls_offload *flow) +{ + struct flow_rule *flow_rule = flow_cls_offload_flow_rule(flow); + struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv; + struct flow_action_entry *meta_action; + unsigned long cookie = flow->cookie; + struct mlx5_ct_entry *entry; + int err; + + meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule); + if (!meta_action) + return -EOPNOTSUPP; + + entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, + cts_ht_params); + if (entry) + return 0; + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return -ENOMEM; + + entry->zone = ft->zone; + entry->flow_rule = flow_rule; + entry->cookie = flow->cookie; + entry->restore_cookie = meta_action->ct_metadata.cookie; + + err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry); + if (err) + goto err_rules; + + err = rhashtable_insert_fast(&ft->ct_entries_ht, &entry->node, + cts_ht_params); + if (err) + goto err_insert; + + list_add(&entry->list, &ft->ct_entries_list); + + return 0; + +err_insert: + mlx5_tc_ct_entry_del_rules(ct_priv, entry); +err_rules: + kfree(entry); + netdev_warn(ct_priv->netdev, + "Failed to offload ct entry, err: %d\n", err); + return err; +} + +static int +mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft, + struct flow_cls_offload *flow) +{ + unsigned long cookie = flow->cookie; + struct mlx5_ct_entry *entry; + + entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, + cts_ht_params); + if (!entry) + return -ENOENT; + + mlx5_tc_ct_entry_del_rules(ft->ct_priv, entry); + WARN_ON(rhashtable_remove_fast(&ft->ct_entries_ht, + &entry->node, + cts_ht_params)); + list_del(&entry->list); + kfree(entry); + + return 0; +} + +static int +mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft, + struct flow_cls_offload *f) +{ + unsigned long cookie = f->cookie; + struct mlx5_ct_entry *entry; + u64 lastuse, packets, bytes; + + entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, + cts_ht_params); + if (!entry) + return -ENOENT; + + mlx5_fc_query_cached(entry->counter, &bytes, &packets, &lastuse); + flow_stats_update(&f->stats, bytes, packets, lastuse, + FLOW_ACTION_HW_STATS_DELAYED); + + return 0; +} + +static int +mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data, + void *cb_priv) +{ + struct flow_cls_offload *f = type_data; + struct mlx5_ct_ft *ft = cb_priv; + + if (type != TC_SETUP_CLSFLOWER) + return -EOPNOTSUPP; + + switch (f->command) { + case FLOW_CLS_REPLACE: + return mlx5_tc_ct_block_flow_offload_add(ft, f); + case FLOW_CLS_DESTROY: + return mlx5_tc_ct_block_flow_offload_del(ft, f); + case FLOW_CLS_STATS: + return mlx5_tc_ct_block_flow_offload_stats(ft, f); + default: + break; + }; + + return -EOPNOTSUPP; +} + +int +mlx5_tc_ct_parse_match(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + struct netlink_ext_ack *extack) +{ + struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); + struct flow_dissector_key_ct *mask, *key; + bool trk, est, untrk, unest, new; + u32 ctstate = 0, ctstate_mask = 0; + u16 ct_state_on, ct_state_off; + u16 ct_state, ct_state_mask; + struct flow_match_ct match; + + if (!flow_rule_match_key(f->rule, FLOW_DISSECTOR_KEY_CT)) + return 0; + + if (!ct_priv) { + NL_SET_ERR_MSG_MOD(extack, + "offload of ct matching isn't available"); + return -EOPNOTSUPP; + } + + flow_rule_match_ct(f->rule, &match); + + key = match.key; + mask = match.mask; + + ct_state = key->ct_state; + ct_state_mask = mask->ct_state; + + if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED | + TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED | + TCA_FLOWER_KEY_CT_FLAGS_NEW)) { + NL_SET_ERR_MSG_MOD(extack, + "only ct_state trk, est and new are supported for offload"); + return -EOPNOTSUPP; + } + + if (mask->ct_labels[1] || mask->ct_labels[2] || mask->ct_labels[3]) { + NL_SET_ERR_MSG_MOD(extack, + "only lower 32bits of ct_labels are supported for offload"); + return -EOPNOTSUPP; + } + + ct_state_on = ct_state & ct_state_mask; + ct_state_off = (ct_state & ct_state_mask) ^ ct_state_mask; + trk = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_TRACKED; + new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW; + est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED; + untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED; + unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED; + + ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0; + ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0; + ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0; + ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0; + + if (new) { + NL_SET_ERR_MSG_MOD(extack, + "matching on ct_state +new isn't supported"); + return -EOPNOTSUPP; + } + + if (mask->ct_zone) + mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, + key->ct_zone, MLX5_CT_ZONE_MASK); + if (ctstate_mask) + mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, + ctstate, ctstate_mask); + if (mask->ct_mark) + mlx5e_tc_match_to_reg_match(spec, MARK_TO_REG, + key->ct_mark, mask->ct_mark); + if (mask->ct_labels[0]) + mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, + key->ct_labels[0], + mask->ct_labels[0]); + + return 0; +} + +int +mlx5_tc_ct_parse_action(struct mlx5e_priv *priv, + struct mlx5_esw_flow_attr *attr, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack) +{ + struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); + + if (!ct_priv) { + NL_SET_ERR_MSG_MOD(extack, + "offload of ct action isn't available"); + return -EOPNOTSUPP; + } + + attr->ct_attr.zone = act->ct.zone; + attr->ct_attr.ct_action = act->ct.action; + attr->ct_attr.nf_ft = act->ct.flow_table; + + return 0; +} + +static struct mlx5_ct_ft * +mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone, + struct nf_flowtable *nf_ft) +{ + struct mlx5_ct_ft *ft; + int err; + + ft = rhashtable_lookup_fast(&ct_priv->zone_ht, &zone, zone_params); + if (ft) { + refcount_inc(&ft->refcount); + return ft; + } + + ft = kzalloc(sizeof(*ft), GFP_KERNEL); + if (!ft) + return ERR_PTR(-ENOMEM); + + ft->zone = zone; + ft->nf_ft = nf_ft; + ft->ct_priv = ct_priv; + INIT_LIST_HEAD(&ft->ct_entries_list); + refcount_set(&ft->refcount, 1); + + err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params); + if (err) + goto err_init; + + err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node, + zone_params); + if (err) + goto err_insert; + + err = nf_flow_table_offload_add_cb(ft->nf_ft, + mlx5_tc_ct_block_flow_offload, ft); + if (err) + goto err_add_cb; + + return ft; + +err_add_cb: + rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params); +err_insert: + rhashtable_destroy(&ft->ct_entries_ht); +err_init: + kfree(ft); + return ERR_PTR(err); +} + +static void +mlx5_tc_ct_flush_ft(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) +{ + struct mlx5_ct_entry *entry; + + list_for_each_entry(entry, &ft->ct_entries_list, list) + mlx5_tc_ct_entry_del_rules(ft->ct_priv, entry); +} + +static void +mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) +{ + if (!refcount_dec_and_test(&ft->refcount)) + return; + + nf_flow_table_offload_del_cb(ft->nf_ft, + mlx5_tc_ct_block_flow_offload, ft); + mlx5_tc_ct_flush_ft(ct_priv, ft); + rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params); + rhashtable_destroy(&ft->ct_entries_ht); + kfree(ft); +} + +/* We translate the tc filter with CT action to the following HW model: + * + * +-------------------+ +--------------------+ +--------------+ + * + pre_ct (tc chain) +----->+ CT (nat or no nat) +--->+ post_ct +-----> + * + original match + | + tuple + zone match + | + fte_id match + | + * +-------------------+ | +--------------------+ | +--------------+ | + * v v v + * set chain miss mapping set mark original + * set fte_id set label filter + * set zone set established actions + * set tunnel_id do nat (if needed) + * do decap + */ +static int +__mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *orig_spec, + struct mlx5_esw_flow_attr *attr, + struct mlx5_flow_handle **flow_rule) +{ + struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); + bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT; + struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {}; + struct mlx5_flow_spec *post_ct_spec = NULL; + struct mlx5_eswitch *esw = ct_priv->esw; + struct mlx5_esw_flow_attr *pre_ct_attr; + struct mlx5_modify_hdr *mod_hdr; + struct mlx5_flow_handle *rule; + struct mlx5_ct_flow *ct_flow; + int chain_mapping = 0, err; + struct mlx5_ct_ft *ft; + u32 fte_id = 1; + + post_ct_spec = kzalloc(sizeof(*post_ct_spec), GFP_KERNEL); + ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL); + if (!post_ct_spec || !ct_flow) { + kfree(post_ct_spec); + kfree(ct_flow); + return -ENOMEM; + } + + /* Register for CT established events */ + ft = mlx5_tc_ct_add_ft_cb(ct_priv, attr->ct_attr.zone, + attr->ct_attr.nf_ft); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + ct_dbg("Failed to register to ft callback"); + goto err_ft; + } + ct_flow->ft = ft; + + err = idr_alloc_u32(&ct_priv->fte_ids, ct_flow, &fte_id, + MLX5_FTE_ID_MAX, GFP_KERNEL); + if (err) { + netdev_warn(priv->netdev, + "Failed to allocate fte id, err: %d\n", err); + goto err_idr; + } + ct_flow->fte_id = fte_id; + + /* Base esw attributes of both rules on original rule attribute */ + pre_ct_attr = &ct_flow->pre_ct_attr; + memcpy(pre_ct_attr, attr, sizeof(*attr)); + memcpy(&ct_flow->post_ct_attr, attr, sizeof(*attr)); + + /* Modify the original rule's action to fwd and modify, leave decap */ + pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP; + pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + + /* Write chain miss tag for miss in ct table as we + * don't go though all prios of this chain as normal tc rules + * miss. + */ + err = mlx5_esw_chains_get_chain_mapping(esw, attr->chain, + &chain_mapping); + if (err) { + ct_dbg("Failed to get chain register mapping for chain"); + goto err_get_chain; + } + ct_flow->chain_mapping = chain_mapping; + + err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts, + CHAIN_TO_REG, chain_mapping); + if (err) { + ct_dbg("Failed to set chain register mapping"); + goto err_mapping; + } + + err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts, ZONE_TO_REG, + attr->ct_attr.zone & + MLX5_CT_ZONE_MASK); + if (err) { + ct_dbg("Failed to set zone register mapping"); + goto err_mapping; + } + + err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts, + FTEID_TO_REG, fte_id); + if (err) { + ct_dbg("Failed to set fte_id register mapping"); + goto err_mapping; + } + + /* If original flow is decap, we do it before going into ct table + * so add a rewrite for the tunnel match_id. + */ + if ((pre_ct_attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) && + attr->chain == 0) { + u32 tun_id = mlx5e_tc_get_flow_tun_id(flow); + + err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts, + TUNNEL_TO_REG, + tun_id); + if (err) { + ct_dbg("Failed to set tunnel register mapping"); + goto err_mapping; + } + } + + mod_hdr = mlx5_modify_header_alloc(esw->dev, + MLX5_FLOW_NAMESPACE_FDB, + pre_mod_acts.num_actions, + pre_mod_acts.actions); + if (IS_ERR(mod_hdr)) { + err = PTR_ERR(mod_hdr); + ct_dbg("Failed to create pre ct mod hdr"); + goto err_mapping; + } + pre_ct_attr->modify_hdr = mod_hdr; + + /* Post ct rule matches on fte_id and executes original rule's + * tc rule action + */ + mlx5e_tc_match_to_reg_match(post_ct_spec, FTEID_TO_REG, + fte_id, MLX5_FTE_ID_MASK); + + /* Put post_ct rule on post_ct fdb */ + ct_flow->post_ct_attr.chain = 0; + ct_flow->post_ct_attr.prio = 0; + ct_flow->post_ct_attr.fdb = ct_priv->post_ct; + + ct_flow->post_ct_attr.inner_match_level = MLX5_MATCH_NONE; + ct_flow->post_ct_attr.outer_match_level = MLX5_MATCH_NONE; + ct_flow->post_ct_attr.action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP); + rule = mlx5_eswitch_add_offloaded_rule(esw, post_ct_spec, + &ct_flow->post_ct_attr); + ct_flow->post_ct_rule = rule; + if (IS_ERR(ct_flow->post_ct_rule)) { + err = PTR_ERR(ct_flow->post_ct_rule); + ct_dbg("Failed to add post ct rule"); + goto err_insert_post_ct; + } + + /* Change original rule point to ct table */ + pre_ct_attr->dest_chain = 0; + pre_ct_attr->dest_ft = nat ? ct_priv->ct_nat : ct_priv->ct; + ct_flow->pre_ct_rule = mlx5_eswitch_add_offloaded_rule(esw, + orig_spec, + pre_ct_attr); + if (IS_ERR(ct_flow->pre_ct_rule)) { + err = PTR_ERR(ct_flow->pre_ct_rule); + ct_dbg("Failed to add pre ct rule"); + goto err_insert_orig; + } + + attr->ct_attr.ct_flow = ct_flow; + *flow_rule = ct_flow->post_ct_rule; + dealloc_mod_hdr_actions(&pre_mod_acts); + kfree(post_ct_spec); + + return 0; + +err_insert_orig: + mlx5_eswitch_del_offloaded_rule(ct_priv->esw, ct_flow->post_ct_rule, + &ct_flow->post_ct_attr); +err_insert_post_ct: + mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr); +err_mapping: + dealloc_mod_hdr_actions(&pre_mod_acts); + mlx5_esw_chains_put_chain_mapping(esw, ct_flow->chain_mapping); +err_get_chain: + idr_remove(&ct_priv->fte_ids, fte_id); +err_idr: + mlx5_tc_ct_del_ft_cb(ct_priv, ft); +err_ft: + kfree(post_ct_spec); + kfree(ct_flow); + netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err); + return err; +} + +static int +__mlx5_tc_ct_flow_offload_clear(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *orig_spec, + struct mlx5_esw_flow_attr *attr, + struct mlx5e_tc_mod_hdr_acts *mod_acts, + struct mlx5_flow_handle **flow_rule) +{ + struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); + struct mlx5_eswitch *esw = ct_priv->esw; + struct mlx5_esw_flow_attr *pre_ct_attr; + struct mlx5_modify_hdr *mod_hdr; + struct mlx5_flow_handle *rule; + struct mlx5_ct_flow *ct_flow; + int err; + + ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL); + if (!ct_flow) + return -ENOMEM; + + /* Base esw attributes on original rule attribute */ + pre_ct_attr = &ct_flow->pre_ct_attr; + memcpy(pre_ct_attr, attr, sizeof(*attr)); + + err = mlx5_tc_ct_entry_set_registers(ct_priv, mod_acts, 0, 0, 0, 0); + if (err) { + ct_dbg("Failed to set register for ct clear"); + goto err_set_registers; + } + + mod_hdr = mlx5_modify_header_alloc(esw->dev, + MLX5_FLOW_NAMESPACE_FDB, + mod_acts->num_actions, + mod_acts->actions); + if (IS_ERR(mod_hdr)) { + err = PTR_ERR(mod_hdr); + ct_dbg("Failed to add create ct clear mod hdr"); + goto err_set_registers; + } + + dealloc_mod_hdr_actions(mod_acts); + pre_ct_attr->modify_hdr = mod_hdr; + pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + + rule = mlx5_eswitch_add_offloaded_rule(esw, orig_spec, pre_ct_attr); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + ct_dbg("Failed to add ct clear rule"); + goto err_insert; + } + + attr->ct_attr.ct_flow = ct_flow; + ct_flow->pre_ct_rule = rule; + *flow_rule = rule; + + return 0; + +err_insert: + mlx5_modify_header_dealloc(priv->mdev, mod_hdr); +err_set_registers: + netdev_warn(priv->netdev, + "Failed to offload ct clear flow, err %d\n", err); + return err; +} + +struct mlx5_flow_handle * +mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec, + struct mlx5_esw_flow_attr *attr, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) +{ + bool clear_action = attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR; + struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); + struct mlx5_flow_handle *rule; + int err; + + if (!ct_priv) + return ERR_PTR(-EOPNOTSUPP); + + mutex_lock(&ct_priv->control_lock); + if (clear_action) + err = __mlx5_tc_ct_flow_offload_clear(priv, flow, spec, attr, + mod_hdr_acts, &rule); + else + err = __mlx5_tc_ct_flow_offload(priv, flow, spec, attr, + &rule); + mutex_unlock(&ct_priv->control_lock); + if (err) + return ERR_PTR(err); + + return rule; +} + +static void +__mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv, + struct mlx5_ct_flow *ct_flow) +{ + struct mlx5_esw_flow_attr *pre_ct_attr = &ct_flow->pre_ct_attr; + struct mlx5_eswitch *esw = ct_priv->esw; + + mlx5_eswitch_del_offloaded_rule(esw, ct_flow->pre_ct_rule, + pre_ct_attr); + mlx5_modify_header_dealloc(esw->dev, pre_ct_attr->modify_hdr); + + if (ct_flow->post_ct_rule) { + mlx5_eswitch_del_offloaded_rule(esw, ct_flow->post_ct_rule, + &ct_flow->post_ct_attr); + mlx5_esw_chains_put_chain_mapping(esw, ct_flow->chain_mapping); + idr_remove(&ct_priv->fte_ids, ct_flow->fte_id); + mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft); + } + + kfree(ct_flow); +} + +void +mlx5_tc_ct_delete_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, + struct mlx5_esw_flow_attr *attr) +{ + struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); + struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow; + + /* We are called on error to clean up stuff from parsing + * but we don't have anything for now + */ + if (!ct_flow) + return; + + mutex_lock(&ct_priv->control_lock); + __mlx5_tc_ct_delete_flow(ct_priv, ct_flow); + mutex_unlock(&ct_priv->control_lock); +} + +static int +mlx5_tc_ct_init_check_support(struct mlx5_eswitch *esw, + const char **err_msg) +{ +#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + /* cannot restore chain ID on HW miss */ + + *err_msg = "tc skb extension missing"; + return -EOPNOTSUPP; +#endif + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level)) { + *err_msg = "firmware level support is missing"; + return -EOPNOTSUPP; + } + + if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) { + /* vlan workaround should be avoided for multi chain rules. + * This is just a sanity check as pop vlan action should + * be supported by any FW that supports ignore_flow_level + */ + + *err_msg = "firmware vlan actions support is missing"; + return -EOPNOTSUPP; + } + + if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, + fdb_modify_header_fwd_to_table)) { + /* CT always writes to registers which are mod header actions. + * Therefore, mod header and goto is required + */ + + *err_msg = "firmware fwd and modify support is missing"; + return -EOPNOTSUPP; + } + + if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) { + *err_msg = "register loopback isn't supported"; + return -EOPNOTSUPP; + } + + return 0; +} + +static void +mlx5_tc_ct_init_err(struct mlx5e_rep_priv *rpriv, const char *msg, int err) +{ + if (msg) + netdev_warn(rpriv->netdev, + "tc ct offload not supported, %s, err: %d\n", + msg, err); + else + netdev_warn(rpriv->netdev, + "tc ct offload not supported, err: %d\n", + err); +} + +int +mlx5_tc_ct_init(struct mlx5_rep_uplink_priv *uplink_priv) +{ + struct mlx5_tc_ct_priv *ct_priv; + struct mlx5e_rep_priv *rpriv; + struct mlx5_eswitch *esw; + struct mlx5e_priv *priv; + const char *msg; + int err; + + rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv); + priv = netdev_priv(rpriv->netdev); + esw = priv->mdev->priv.eswitch; + + err = mlx5_tc_ct_init_check_support(esw, &msg); + if (err) { + mlx5_tc_ct_init_err(rpriv, msg, err); + goto err_support; + } + + ct_priv = kzalloc(sizeof(*ct_priv), GFP_KERNEL); + if (!ct_priv) { + mlx5_tc_ct_init_err(rpriv, NULL, -ENOMEM); + goto err_alloc; + } + + ct_priv->esw = esw; + ct_priv->netdev = rpriv->netdev; + ct_priv->ct = mlx5_esw_chains_create_global_table(esw); + if (IS_ERR(ct_priv->ct)) { + err = PTR_ERR(ct_priv->ct); + mlx5_tc_ct_init_err(rpriv, "failed to create ct table", err); + goto err_ct_tbl; + } + + ct_priv->ct_nat = mlx5_esw_chains_create_global_table(esw); + if (IS_ERR(ct_priv->ct_nat)) { + err = PTR_ERR(ct_priv->ct_nat); + mlx5_tc_ct_init_err(rpriv, "failed to create ct nat table", + err); + goto err_ct_nat_tbl; + } + + ct_priv->post_ct = mlx5_esw_chains_create_global_table(esw); + if (IS_ERR(ct_priv->post_ct)) { + err = PTR_ERR(ct_priv->post_ct); + mlx5_tc_ct_init_err(rpriv, "failed to create post ct table", + err); + goto err_post_ct_tbl; + } + + idr_init(&ct_priv->fte_ids); + idr_init(&ct_priv->tuple_ids); + mutex_init(&ct_priv->control_lock); + rhashtable_init(&ct_priv->zone_ht, &zone_params); + + /* Done, set ct_priv to know it initializted */ + uplink_priv->ct_priv = ct_priv; + + return 0; + +err_post_ct_tbl: + mlx5_esw_chains_destroy_global_table(esw, ct_priv->ct_nat); +err_ct_nat_tbl: + mlx5_esw_chains_destroy_global_table(esw, ct_priv->ct); +err_ct_tbl: + kfree(ct_priv); +err_alloc: +err_support: + + return 0; +} + +void +mlx5_tc_ct_clean(struct mlx5_rep_uplink_priv *uplink_priv) +{ + struct mlx5_tc_ct_priv *ct_priv = uplink_priv->ct_priv; + + if (!ct_priv) + return; + + mlx5_esw_chains_destroy_global_table(ct_priv->esw, ct_priv->post_ct); + mlx5_esw_chains_destroy_global_table(ct_priv->esw, ct_priv->ct_nat); + mlx5_esw_chains_destroy_global_table(ct_priv->esw, ct_priv->ct); + + rhashtable_destroy(&ct_priv->zone_ht); + mutex_destroy(&ct_priv->control_lock); + idr_destroy(&ct_priv->tuple_ids); + idr_destroy(&ct_priv->fte_ids); + kfree(ct_priv); + + uplink_priv->ct_priv = NULL; +} + +bool +mlx5e_tc_ct_restore_flow(struct mlx5_rep_uplink_priv *uplink_priv, + struct sk_buff *skb, u32 tupleid) +{ + struct mlx5_tc_ct_priv *ct_priv = uplink_priv->ct_priv; + struct mlx5_ct_zone_rule *zone_rule; + struct mlx5_ct_entry *entry; + + if (!ct_priv || !tupleid) + return true; + + zone_rule = idr_find(&ct_priv->tuple_ids, tupleid); + if (!zone_rule) + return false; + + entry = container_of(zone_rule, struct mlx5_ct_entry, + zone_rules[zone_rule->nat]); + tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie); + + return true; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h new file mode 100644 index 000000000000..091d305b633e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h @@ -0,0 +1,180 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2018 Mellanox Technologies. */ + +#ifndef __MLX5_EN_TC_CT_H__ +#define __MLX5_EN_TC_CT_H__ + +#include <net/pkt_cls.h> +#include <linux/mlx5/fs.h> +#include <net/tc_act/tc_ct.h> + +#include "en.h" + +struct mlx5_esw_flow_attr; +struct mlx5e_tc_mod_hdr_acts; +struct mlx5_rep_uplink_priv; +struct mlx5e_tc_flow; +struct mlx5e_priv; + +struct mlx5_ct_flow; + +struct nf_flowtable; + +struct mlx5_ct_attr { + u16 zone; + u16 ct_action; + struct mlx5_ct_flow *ct_flow; + struct nf_flowtable *nf_ft; +}; + +#define zone_to_reg_ct {\ + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_2,\ + .moffset = 0,\ + .mlen = 2,\ + .soffset = MLX5_BYTE_OFF(fte_match_param,\ + misc_parameters_2.metadata_reg_c_2) + 2,\ +} + +#define ctstate_to_reg_ct {\ + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_2,\ + .moffset = 2,\ + .mlen = 2,\ + .soffset = MLX5_BYTE_OFF(fte_match_param,\ + misc_parameters_2.metadata_reg_c_2),\ +} + +#define mark_to_reg_ct {\ + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_3,\ + .moffset = 0,\ + .mlen = 4,\ + .soffset = MLX5_BYTE_OFF(fte_match_param,\ + misc_parameters_2.metadata_reg_c_3),\ +} + +#define labels_to_reg_ct {\ + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_4,\ + .moffset = 0,\ + .mlen = 4,\ + .soffset = MLX5_BYTE_OFF(fte_match_param,\ + misc_parameters_2.metadata_reg_c_4),\ +} + +#define fteid_to_reg_ct {\ + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_5,\ + .moffset = 0,\ + .mlen = 4,\ + .soffset = MLX5_BYTE_OFF(fte_match_param,\ + misc_parameters_2.metadata_reg_c_5),\ +} + +#define tupleid_to_reg_ct {\ + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1,\ + .moffset = 0,\ + .mlen = 3,\ + .soffset = MLX5_BYTE_OFF(fte_match_param,\ + misc_parameters_2.metadata_reg_c_1),\ +} + +#define TUPLE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[TUPLEID_TO_REG].mlen * 8) +#define TUPLE_ID_MAX GENMASK(TUPLE_ID_BITS - 1, 0) + +#if IS_ENABLED(CONFIG_MLX5_TC_CT) + +int +mlx5_tc_ct_init(struct mlx5_rep_uplink_priv *uplink_priv); +void +mlx5_tc_ct_clean(struct mlx5_rep_uplink_priv *uplink_priv); + +int +mlx5_tc_ct_parse_match(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + struct netlink_ext_ack *extack); +int +mlx5_tc_ct_parse_action(struct mlx5e_priv *priv, + struct mlx5_esw_flow_attr *attr, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack); + +struct mlx5_flow_handle * +mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec, + struct mlx5_esw_flow_attr *attr, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts); +void +mlx5_tc_ct_delete_flow(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_esw_flow_attr *attr); + +bool +mlx5e_tc_ct_restore_flow(struct mlx5_rep_uplink_priv *uplink_priv, + struct sk_buff *skb, u32 tupleid); + +#else /* CONFIG_MLX5_TC_CT */ + +static inline int +mlx5_tc_ct_init(struct mlx5_rep_uplink_priv *uplink_priv) +{ + return 0; +} + +static inline void +mlx5_tc_ct_clean(struct mlx5_rep_uplink_priv *uplink_priv) +{ +} + +static inline int +mlx5_tc_ct_parse_match(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + struct netlink_ext_ack *extack) +{ + if (!flow_rule_match_key(f->rule, FLOW_DISSECTOR_KEY_CT)) + return 0; + + NL_SET_ERR_MSG_MOD(extack, "mlx5 tc ct offload isn't enabled."); + netdev_warn(priv->netdev, "mlx5 tc ct offload isn't enabled.\n"); + return -EOPNOTSUPP; +} + +static inline int +mlx5_tc_ct_parse_action(struct mlx5e_priv *priv, + struct mlx5_esw_flow_attr *attr, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack) +{ + NL_SET_ERR_MSG_MOD(extack, "mlx5 tc ct offload isn't enabled."); + netdev_warn(priv->netdev, "mlx5 tc ct offload isn't enabled.\n"); + return -EOPNOTSUPP; +} + +static inline struct mlx5_flow_handle * +mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec, + struct mlx5_esw_flow_attr *attr, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void +mlx5_tc_ct_delete_flow(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_esw_flow_attr *attr) +{ +} + +static inline bool +mlx5e_tc_ct_restore_flow(struct mlx5_rep_uplink_priv *uplink_priv, + struct sk_buff *skb, u32 tupleid) +{ + if (!tupleid) + return true; + + return false; +} + +#endif /* !IS_ENABLED(CONFIG_MLX5_TC_CT) */ +#endif /* __MLX5_EN_TC_CT_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index af4ebd2951b5..b45c3f46570b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -66,6 +66,9 @@ static int get_route_and_out_devs(struct mlx5e_priv *priv, mlx5e_is_uplink_rep(netdev_priv(*out_dev)))) return -EOPNOTSUPP; + if (mlx5e_eswitch_uplink_rep(priv->netdev) && *out_dev != priv->netdev) + return -EOPNOTSUPP; + return 0; } @@ -469,10 +472,15 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev, struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, struct flow_cls_offload *f, - void *headers_c, - void *headers_v, u8 *match_level) + u8 *match_level) { struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev); + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers); + void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers); + struct netlink_ext_ack *extack = f->common.extack; int err = 0; if (!tunnel) { @@ -499,6 +507,109 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev, goto out; } + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { + struct flow_match_control match; + u16 addr_type; + + flow_rule_match_enc_control(rule, &match); + addr_type = match.key->addr_type; + + /* For tunnel addr_type used same key id`s as for non-tunnel */ + if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + struct flow_match_ipv4_addrs match; + + flow_rule_match_enc_ipv4_addrs(rule, &match); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv4_layout.ipv4, + ntohl(match.mask->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv4_layout.ipv4, + ntohl(match.key->src)); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4, + ntohl(match.mask->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4, + ntohl(match.key->dst)); + + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, + ethertype); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, + ETH_P_IP); + } else if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct flow_match_ipv6_addrs match; + + flow_rule_match_enc_ipv6_addrs(rule, &match); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &match.mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, + ipv6)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &match.key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, + ipv6)); + + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &match.mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, + ipv6)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &match.key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, + ipv6)); + + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, + ethertype); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, + ETH_P_IPV6); + } + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) { + struct flow_match_ip match; + + flow_rule_match_enc_ip(rule, &match); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, + match.mask->tos & 0x3); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, + match.key->tos & 0x3); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, + match.mask->tos >> 2); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, + match.key->tos >> 2); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, + match.mask->ttl); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, + match.key->ttl); + + if (match.mask->ttl && + !MLX5_CAP_ESW_FLOWTABLE_FDB + (priv->mdev, + ft_field_support.outer_ipv4_ttl)) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on TTL is not supported"); + err = -EOPNOTSUPP; + goto out; + } + } + + /* Enforce DMAC when offloading incoming tunneled flows. + * Flow counters require a match on the DMAC. + */ + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_47_16); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_15_0); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dmac_47_16), priv->netdev->dev_addr); + + /* let software handle IP fragments */ + MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0); + + return 0; + out: return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h index 6f9a78c85ffd..1630f0ec3ad7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h @@ -76,8 +76,7 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev, struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, struct flow_cls_offload *f, - void *headers_c, - void *headers_v, u8 *match_level); + u8 *match_level); int mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index 7c8796d9743f..f07b1399744e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -179,6 +179,16 @@ mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma) } } +static inline void mlx5e_rqwq_reset(struct mlx5e_rq *rq) +{ + if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { + mlx5_wq_ll_reset(&rq->mpwqe.wq); + rq->mpwqe.actual_wq_head = 0; + } else { + mlx5_wq_cyc_reset(&rq->wqe.wq); + } +} + /* SW parser related functions */ struct mlx5e_swp_spec { |