diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox')
114 files changed, 9208 insertions, 2847 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/crdump.c b/drivers/net/ethernet/mellanox/mlx4/crdump.c index 64ed725aec28..73eae80e1cb7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/crdump.c +++ b/drivers/net/ethernet/mellanox/mlx4/crdump.c @@ -38,8 +38,21 @@ #define CR_ENABLE_BIT_OFFSET 0xF3F04 #define MAX_NUM_OF_DUMPS_TO_STORE (8) -static const char *region_cr_space_str = "cr-space"; -static const char *region_fw_health_str = "fw-health"; +#define REGION_CR_SPACE "cr-space" +#define REGION_FW_HEALTH "fw-health" + +static const char * const region_cr_space_str = REGION_CR_SPACE; +static const char * const region_fw_health_str = REGION_FW_HEALTH; + +static const struct devlink_region_ops region_cr_space_ops = { + .name = REGION_CR_SPACE, + .destructor = &kvfree, +}; + +static const struct devlink_region_ops region_fw_health_ops = { + .name = REGION_FW_HEALTH, + .destructor = &kvfree, +}; /* Set to true in case cr enable bit was set to true before crdump */ static bool crdump_enbale_bit_set; @@ -99,7 +112,7 @@ static void mlx4_crdump_collect_crspace(struct mlx4_dev *dev, readl(cr_space + offset); err = devlink_region_snapshot_create(crdump->region_crspace, - crspace_data, id, &kvfree); + crspace_data, id); if (err) { kvfree(crspace_data); mlx4_warn(dev, "crdump: devlink create %s snapshot id %d err %d\n", @@ -138,7 +151,7 @@ static void mlx4_crdump_collect_fw_health(struct mlx4_dev *dev, readl(health_buf_start + offset); err = devlink_region_snapshot_create(crdump->region_fw_health, - health_data, id, &kvfree); + health_data, id); if (err) { kvfree(health_data); mlx4_warn(dev, "crdump: devlink create %s snapshot id %d err %d\n", @@ -159,6 +172,7 @@ int mlx4_crdump_collect(struct mlx4_dev *dev) struct pci_dev *pdev = dev->persist->pdev; unsigned long cr_res_size; u8 __iomem *cr_space; + int err; u32 id; if (!dev->caps.health_buffer_addrs) { @@ -179,15 +193,22 @@ int mlx4_crdump_collect(struct mlx4_dev *dev) return -ENODEV; } - crdump_enable_crspace_access(dev, cr_space); - /* Get the available snapshot ID for the dumps */ - id = devlink_region_snapshot_id_get(devlink); + err = devlink_region_snapshot_id_get(devlink, &id); + if (err) { + mlx4_err(dev, "crdump: devlink get snapshot id err %d\n", err); + return err; + } + + crdump_enable_crspace_access(dev, cr_space); /* Try to capture dumps */ mlx4_crdump_collect_crspace(dev, cr_space, id); mlx4_crdump_collect_fw_health(dev, cr_space, id); + /* Release reference on the snapshot id */ + devlink_region_snapshot_id_put(devlink, id); + crdump_disable_crspace_access(dev, cr_space); iounmap(cr_space); @@ -205,7 +226,7 @@ int mlx4_crdump_init(struct mlx4_dev *dev) /* Create cr-space region */ crdump->region_crspace = devlink_region_create(devlink, - region_cr_space_str, + ®ion_cr_space_ops, MAX_NUM_OF_DUMPS_TO_STORE, pci_resource_len(pdev, 0)); if (IS_ERR(crdump->region_crspace)) @@ -216,7 +237,7 @@ int mlx4_crdump_init(struct mlx4_dev *dev) /* Create fw-health region */ crdump->region_fw_health = devlink_region_create(devlink, - region_fw_health_str, + ®ion_fw_health_ops, MAX_NUM_OF_DUMPS_TO_STORE, HEALTH_BUFFER_SIZE); if (IS_ERR(crdump->region_fw_health)) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index 8bf1f08fdee2..8a5ea2543670 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -2121,6 +2121,10 @@ static int mlx4_en_set_phys_id(struct net_device *dev, } const struct ethtool_ops mlx4_en_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_MAX_FRAMES | + ETHTOOL_COALESCE_TX_MAX_FRAMES_IRQ | + ETHTOOL_COALESCE_PKT_RATE_RX_USECS, .get_drvinfo = mlx4_en_get_drvinfo, .get_link_ksettings = mlx4_en_get_link_ksettings, .set_link_ksettings = mlx4_en_set_link_ksettings, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index a1f20b205299..312e0a1ad43d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -78,6 +78,16 @@ config MLX5_ESWITCH Legacy SRIOV mode (L2 mac vlan steering based). Switchdev mode (eswitch offloads). +config MLX5_TC_CT + bool "MLX5 TC connection tracking offload support" + depends on MLX5_CORE_EN && NET_SWITCHDEV && NF_FLOW_TABLE && NET_ACT_CT && NET_TC_SKB_EXT + default y + help + Say Y here if you want to support offloading connection tracking rules + via tc ct action. + + If unsure, set to Y + config MLX5_CORE_EN_DCB bool "Data Center Bridging (DCB) Support" default y diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index d3e06cec8317..6d32915000fc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -16,7 +16,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \ fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \ lib/devcom.o lib/pci_vsc.o lib/dm.o diag/fs_tracepoint.o \ - diag/fw_tracer.o diag/crdump.o devlink.o + diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o # # Netdev basic @@ -25,7 +25,7 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \ en_selftest.o en/port.o en/monitor_stats.o en/health.o \ en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/umem.o \ - en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o + en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o # # Netdev extra @@ -34,15 +34,16 @@ mlx5_core-$(CONFIG_MLX5_EN_ARFS) += en_arfs.o mlx5_core-$(CONFIG_MLX5_EN_RXNFC) += en_fs_ethtool.o mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o lib/port_tun.o lag_mp.o \ - lib/geneve.o en/tc_tun_vxlan.o en/tc_tun_gre.o \ + lib/geneve.o en/mapping.o en/tc_tun_vxlan.o en/tc_tun_gre.o \ en/tc_tun_geneve.o diag/en_tc_tracepoint.o mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o +mlx5_core-$(CONFIG_MLX5_TC_CT) += en/tc_ct.o # # Core extra # mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \ - ecpf.o rdma.o eswitch_offloads_chains.o + ecpf.o rdma.o esw/chains.o mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index 50862275544e..1972ddd12704 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -193,7 +193,7 @@ bool mlx5_device_registered(struct mlx5_core_dev *dev) return found; } -int mlx5_register_device(struct mlx5_core_dev *dev) +void mlx5_register_device(struct mlx5_core_dev *dev) { struct mlx5_priv *priv = &dev->priv; struct mlx5_interface *intf; @@ -203,8 +203,6 @@ int mlx5_register_device(struct mlx5_core_dev *dev) list_for_each_entry(intf, &intf_list, list) mlx5_add_device(intf, priv); mutex_unlock(&mlx5_intf_mutex); - - return 0; } void mlx5_unregister_device(struct mlx5_core_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index ac108f1e5bd6..bdeb291f6b67 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -90,7 +90,8 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change, { struct mlx5_core_dev *dev = devlink_priv(devlink); - return mlx5_unload_one(dev, false); + mlx5_unload_one(dev, false); + return 0; } static int mlx5_devlink_reload_up(struct devlink *devlink, @@ -190,11 +191,6 @@ static int mlx5_devlink_fs_mode_get(struct devlink *devlink, u32 id, return 0; } -enum mlx5_devlink_param_id { - MLX5_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX, - MLX5_DEVLINK_PARAM_FLOW_STEERING_MODE, -}; - static int mlx5_devlink_enable_roce_validate(struct devlink *devlink, u32 id, union devlink_param_value val, struct netlink_ext_ack *extack) @@ -210,14 +206,38 @@ static int mlx5_devlink_enable_roce_validate(struct devlink *devlink, u32 id, return 0; } +#ifdef CONFIG_MLX5_ESWITCH +static int mlx5_devlink_large_group_num_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + int group_num = val.vu32; + + if (group_num < 1 || group_num > 1024) { + NL_SET_ERR_MSG_MOD(extack, + "Unsupported group number, supported range is 1-1024"); + return -EOPNOTSUPP; + } + + return 0; +} +#endif + static const struct devlink_param mlx5_devlink_params[] = { - DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_FLOW_STEERING_MODE, + DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_FLOW_STEERING_MODE, "flow_steering_mode", DEVLINK_PARAM_TYPE_STRING, BIT(DEVLINK_PARAM_CMODE_RUNTIME), mlx5_devlink_fs_mode_get, mlx5_devlink_fs_mode_set, mlx5_devlink_fs_mode_validate), DEVLINK_PARAM_GENERIC(ENABLE_ROCE, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL, mlx5_devlink_enable_roce_validate), +#ifdef CONFIG_MLX5_ESWITCH + DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM, + "fdb_large_groups", DEVLINK_PARAM_TYPE_U32, + BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, + mlx5_devlink_large_group_num_validate), +#endif }; static void mlx5_devlink_set_params_init_values(struct devlink *devlink) @@ -230,13 +250,20 @@ static void mlx5_devlink_set_params_init_values(struct devlink *devlink) else strcpy(value.vstr, "smfs"); devlink_param_driverinit_value_set(devlink, - MLX5_DEVLINK_PARAM_FLOW_STEERING_MODE, + MLX5_DEVLINK_PARAM_ID_FLOW_STEERING_MODE, value); value.vbool = MLX5_CAP_GEN(dev, roce); devlink_param_driverinit_value_set(devlink, DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, value); + +#ifdef CONFIG_MLX5_ESWITCH + value.vu32 = ESW_OFFLOADS_DEFAULT_NUM_GROUPS; + devlink_param_driverinit_value_set(devlink, + MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM, + value); +#endif } int mlx5_devlink_register(struct devlink *devlink, struct device *dev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h index d0ba03774ddf..f0de327a59be 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h @@ -6,6 +6,12 @@ #include <net/devlink.h> +enum mlx5_devlink_param_id { + MLX5_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX, + MLX5_DEVLINK_PARAM_ID_FLOW_STEERING_MODE, + MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM, +}; + struct devlink *mlx5_devlink_alloc(void); void mlx5_devlink_free(struct devlink *devlink); int mlx5_devlink_register(struct devlink *devlink, struct device *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c index 94d7b69a95c7..c9c9b479bda5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c @@ -188,7 +188,7 @@ static int mlx5_fw_tracer_create_mkey(struct mlx5_fw_tracer *tracer) MLX5_SET(create_mkey_in, in, translations_octword_actual_size, DIV_ROUND_UP(TRACER_BUFFER_PAGE_NUM, 2)); - mtt = (u64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); + mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); for (i = 0 ; i < TRACER_BUFFER_PAGE_NUM ; i++) mtt[i] = cpu_to_be64(tracer->buff.dma + i * PAGE_SIZE); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c new file mode 100644 index 000000000000..17ab7efe693d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c @@ -0,0 +1,286 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "rsc_dump.h" +#include "lib/mlx5.h" + +#define MLX5_SGMT_TYPE(SGMT) MLX5_SGMT_TYPE_##SGMT +#define MLX5_SGMT_STR_ASSING(SGMT)[MLX5_SGMT_TYPE(SGMT)] = #SGMT +static const char *const mlx5_rsc_sgmt_name[] = { + MLX5_SGMT_STR_ASSING(HW_CQPC), + MLX5_SGMT_STR_ASSING(HW_SQPC), + MLX5_SGMT_STR_ASSING(HW_RQPC), + MLX5_SGMT_STR_ASSING(FULL_SRQC), + MLX5_SGMT_STR_ASSING(FULL_CQC), + MLX5_SGMT_STR_ASSING(FULL_EQC), + MLX5_SGMT_STR_ASSING(FULL_QPC), + MLX5_SGMT_STR_ASSING(SND_BUFF), + MLX5_SGMT_STR_ASSING(RCV_BUFF), + MLX5_SGMT_STR_ASSING(SRQ_BUFF), + MLX5_SGMT_STR_ASSING(CQ_BUFF), + MLX5_SGMT_STR_ASSING(EQ_BUFF), + MLX5_SGMT_STR_ASSING(SX_SLICE), + MLX5_SGMT_STR_ASSING(SX_SLICE_ALL), + MLX5_SGMT_STR_ASSING(RDB), + MLX5_SGMT_STR_ASSING(RX_SLICE_ALL), +}; + +struct mlx5_rsc_dump { + u32 pdn; + struct mlx5_core_mkey mkey; + u16 fw_segment_type[MLX5_SGMT_TYPE_NUM]; +}; + +struct mlx5_rsc_dump_cmd { + u64 mem_size; + u8 cmd[MLX5_ST_SZ_BYTES(resource_dump)]; +}; + +static int mlx5_rsc_dump_sgmt_get_by_name(char *name) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(mlx5_rsc_sgmt_name); i++) + if (!strcmp(name, mlx5_rsc_sgmt_name[i])) + return i; + + return -EINVAL; +} + +static void mlx5_rsc_dump_read_menu_sgmt(struct mlx5_rsc_dump *rsc_dump, struct page *page) +{ + void *data = page_address(page); + enum mlx5_sgmt_type sgmt_idx; + int num_of_items; + char *sgmt_name; + void *member; + void *menu; + int i; + + menu = MLX5_ADDR_OF(menu_resource_dump_response, data, menu); + num_of_items = MLX5_GET(resource_dump_menu_segment, menu, num_of_records); + + for (i = 0; i < num_of_items; i++) { + member = MLX5_ADDR_OF(resource_dump_menu_segment, menu, record[i]); + sgmt_name = MLX5_ADDR_OF(resource_dump_menu_record, member, segment_name); + sgmt_idx = mlx5_rsc_dump_sgmt_get_by_name(sgmt_name); + if (sgmt_idx == -EINVAL) + continue; + rsc_dump->fw_segment_type[sgmt_idx] = MLX5_GET(resource_dump_menu_record, + member, segment_type); + } +} + +static int mlx5_rsc_dump_trigger(struct mlx5_core_dev *dev, struct mlx5_rsc_dump_cmd *cmd, + struct page *page) +{ + struct mlx5_rsc_dump *rsc_dump = dev->rsc_dump; + struct device *ddev = &dev->pdev->dev; + u32 out_seq_num; + u32 in_seq_num; + dma_addr_t dma; + int err; + + dma = dma_map_page(ddev, page, 0, cmd->mem_size, DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(ddev, dma))) + return -ENOMEM; + + in_seq_num = MLX5_GET(resource_dump, cmd->cmd, seq_num); + MLX5_SET(resource_dump, cmd->cmd, mkey, rsc_dump->mkey.key); + MLX5_SET64(resource_dump, cmd->cmd, address, dma); + + err = mlx5_core_access_reg(dev, cmd->cmd, sizeof(cmd->cmd), cmd->cmd, + sizeof(cmd->cmd), MLX5_REG_RESOURCE_DUMP, 0, 1); + if (err) { + mlx5_core_err(dev, "Resource dump: Failed to access err %d\n", err); + goto out; + } + out_seq_num = MLX5_GET(resource_dump, cmd->cmd, seq_num); + if (out_seq_num && (in_seq_num + 1 != out_seq_num)) + err = -EIO; +out: + dma_unmap_page(ddev, dma, cmd->mem_size, DMA_FROM_DEVICE); + return err; +} + +struct mlx5_rsc_dump_cmd *mlx5_rsc_dump_cmd_create(struct mlx5_core_dev *dev, + struct mlx5_rsc_key *key) +{ + struct mlx5_rsc_dump_cmd *cmd; + int sgmt_type; + + if (IS_ERR_OR_NULL(dev->rsc_dump)) + return ERR_PTR(-EOPNOTSUPP); + + sgmt_type = dev->rsc_dump->fw_segment_type[key->rsc]; + if (!sgmt_type && key->rsc != MLX5_SGMT_TYPE_MENU) + return ERR_PTR(-EOPNOTSUPP); + + cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); + if (!cmd) { + mlx5_core_err(dev, "Resource dump: Failed to allocate command\n"); + return ERR_PTR(-ENOMEM); + } + MLX5_SET(resource_dump, cmd->cmd, segment_type, sgmt_type); + MLX5_SET(resource_dump, cmd->cmd, index1, key->index1); + MLX5_SET(resource_dump, cmd->cmd, index2, key->index2); + MLX5_SET(resource_dump, cmd->cmd, num_of_obj1, key->num_of_obj1); + MLX5_SET(resource_dump, cmd->cmd, num_of_obj2, key->num_of_obj2); + MLX5_SET(resource_dump, cmd->cmd, size, key->size); + cmd->mem_size = key->size; + return cmd; +} + +void mlx5_rsc_dump_cmd_destroy(struct mlx5_rsc_dump_cmd *cmd) +{ + kfree(cmd); +} + +int mlx5_rsc_dump_next(struct mlx5_core_dev *dev, struct mlx5_rsc_dump_cmd *cmd, + struct page *page, int *size) +{ + bool more_dump; + int err; + + if (IS_ERR_OR_NULL(dev->rsc_dump)) + return -EOPNOTSUPP; + + err = mlx5_rsc_dump_trigger(dev, cmd, page); + if (err) { + mlx5_core_err(dev, "Resource dump: Failed to trigger dump, %d\n", err); + return err; + } + *size = MLX5_GET(resource_dump, cmd->cmd, size); + more_dump = MLX5_GET(resource_dump, cmd->cmd, more_dump); + + return more_dump; +} + +#define MLX5_RSC_DUMP_MENU_SEGMENT 0xffff +static int mlx5_rsc_dump_menu(struct mlx5_core_dev *dev) +{ + struct mlx5_rsc_dump_cmd *cmd = NULL; + struct mlx5_rsc_key key = {}; + struct page *page; + int size; + int err; + + page = alloc_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + key.rsc = MLX5_SGMT_TYPE_MENU; + key.size = PAGE_SIZE; + cmd = mlx5_rsc_dump_cmd_create(dev, &key); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto free_page; + } + MLX5_SET(resource_dump, cmd->cmd, segment_type, MLX5_RSC_DUMP_MENU_SEGMENT); + + do { + err = mlx5_rsc_dump_next(dev, cmd, page, &size); + if (err < 0) + goto destroy_cmd; + + mlx5_rsc_dump_read_menu_sgmt(dev->rsc_dump, page); + + } while (err > 0); + +destroy_cmd: + mlx5_rsc_dump_cmd_destroy(cmd); +free_page: + __free_page(page); + + return err; +} + +static int mlx5_rsc_dump_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, + struct mlx5_core_mkey *mkey) +{ + int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + void *mkc; + u32 *in; + int err; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA); + MLX5_SET(mkc, mkc, lw, 1); + MLX5_SET(mkc, mkc, lr, 1); + + MLX5_SET(mkc, mkc, pd, pdn); + MLX5_SET(mkc, mkc, length64, 1); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + + err = mlx5_core_create_mkey(mdev, mkey, in, inlen); + + kvfree(in); + return err; +} + +struct mlx5_rsc_dump *mlx5_rsc_dump_create(struct mlx5_core_dev *dev) +{ + struct mlx5_rsc_dump *rsc_dump; + + if (!MLX5_CAP_DEBUG(dev, resource_dump)) { + mlx5_core_dbg(dev, "Resource dump: capability not present\n"); + return NULL; + } + rsc_dump = kzalloc(sizeof(*rsc_dump), GFP_KERNEL); + if (!rsc_dump) + return ERR_PTR(-ENOMEM); + + return rsc_dump; +} + +void mlx5_rsc_dump_destroy(struct mlx5_core_dev *dev) +{ + if (IS_ERR_OR_NULL(dev->rsc_dump)) + return; + kfree(dev->rsc_dump); +} + +int mlx5_rsc_dump_init(struct mlx5_core_dev *dev) +{ + struct mlx5_rsc_dump *rsc_dump = dev->rsc_dump; + int err; + + if (IS_ERR_OR_NULL(dev->rsc_dump)) + return 0; + + err = mlx5_core_alloc_pd(dev, &rsc_dump->pdn); + if (err) { + mlx5_core_warn(dev, "Resource dump: Failed to allocate PD %d\n", err); + return err; + } + err = mlx5_rsc_dump_create_mkey(dev, rsc_dump->pdn, &rsc_dump->mkey); + if (err) { + mlx5_core_err(dev, "Resource dump: Failed to create mkey, %d\n", err); + goto free_pd; + } + err = mlx5_rsc_dump_menu(dev); + if (err) { + mlx5_core_err(dev, "Resource dump: Failed to read menu, %d\n", err); + goto destroy_mkey; + } + return err; + +destroy_mkey: + mlx5_core_destroy_mkey(dev, &rsc_dump->mkey); +free_pd: + mlx5_core_dealloc_pd(dev, rsc_dump->pdn); + return err; +} + +void mlx5_rsc_dump_cleanup(struct mlx5_core_dev *dev) +{ + if (IS_ERR_OR_NULL(dev->rsc_dump)) + return; + + mlx5_core_destroy_mkey(dev, &dev->rsc_dump->mkey); + mlx5_core_dealloc_pd(dev, dev->rsc_dump->pdn); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.h new file mode 100644 index 000000000000..148270073e71 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_RSC_DUMP_H +#define __MLX5_RSC_DUMP_H + +#include <linux/mlx5/driver.h> +#include "mlx5_core.h" + +enum mlx5_sgmt_type { + MLX5_SGMT_TYPE_HW_CQPC, + MLX5_SGMT_TYPE_HW_SQPC, + MLX5_SGMT_TYPE_HW_RQPC, + MLX5_SGMT_TYPE_FULL_SRQC, + MLX5_SGMT_TYPE_FULL_CQC, + MLX5_SGMT_TYPE_FULL_EQC, + MLX5_SGMT_TYPE_FULL_QPC, + MLX5_SGMT_TYPE_SND_BUFF, + MLX5_SGMT_TYPE_RCV_BUFF, + MLX5_SGMT_TYPE_SRQ_BUFF, + MLX5_SGMT_TYPE_CQ_BUFF, + MLX5_SGMT_TYPE_EQ_BUFF, + MLX5_SGMT_TYPE_SX_SLICE, + MLX5_SGMT_TYPE_SX_SLICE_ALL, + MLX5_SGMT_TYPE_RDB, + MLX5_SGMT_TYPE_RX_SLICE_ALL, + MLX5_SGMT_TYPE_MENU, + MLX5_SGMT_TYPE_TERMINATE, + + MLX5_SGMT_TYPE_NUM, /* Keep last */ +}; + +struct mlx5_rsc_key { + enum mlx5_sgmt_type rsc; + int index1; + int index2; + int num_of_obj1; + int num_of_obj2; + int size; +}; + +#define MLX5_RSC_DUMP_ALL 0xFFFF +struct mlx5_rsc_dump_cmd; +struct mlx5_rsc_dump; + +struct mlx5_rsc_dump *mlx5_rsc_dump_create(struct mlx5_core_dev *dev); +void mlx5_rsc_dump_destroy(struct mlx5_core_dev *dev); + +int mlx5_rsc_dump_init(struct mlx5_core_dev *dev); +void mlx5_rsc_dump_cleanup(struct mlx5_core_dev *dev); + +struct mlx5_rsc_dump_cmd *mlx5_rsc_dump_cmd_create(struct mlx5_core_dev *dev, + struct mlx5_rsc_key *key); +void mlx5_rsc_dump_cmd_destroy(struct mlx5_rsc_dump_cmd *cmd); + +int mlx5_rsc_dump_next(struct mlx5_core_dev *dev, struct mlx5_rsc_dump_cmd *cmd, + struct page *page, int *size); +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index c9606b8ab6ef..12a61bf82c14 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -204,7 +204,7 @@ struct mlx5e_tx_wqe { struct mlx5e_rx_wqe_ll { struct mlx5_wqe_srq_next_seg next; - struct mlx5_wqe_data_seg data[0]; + struct mlx5_wqe_data_seg data[]; }; struct mlx5e_rx_wqe_cyc { @@ -738,7 +738,6 @@ struct mlx5e_channel { DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES); int ix; int cpu; - cpumask_var_t xps_cpumask; }; struct mlx5e_channels { @@ -814,6 +813,15 @@ struct mlx5e_xsk { bool ever_used; }; +/* Temporary storage for variables that are allocated when struct mlx5e_priv is + * initialized, and used where we can't allocate them because that functions + * must not fail. Use with care and make sure the same variable is not used + * simultaneously by multiple users. + */ +struct mlx5e_scratchpad { + cpumask_var_t cpumask; +}; + struct mlx5e_priv { /* priv data path fields - start */ struct mlx5e_txqsq *txq2sq[MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC]; @@ -873,10 +881,12 @@ struct mlx5e_priv { #endif struct devlink_health_reporter *tx_reporter; struct devlink_health_reporter *rx_reporter; + struct devlink_port dl_port; struct mlx5e_xsk xsk; #if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE) struct mlx5e_hv_vhca_stats_agent stats_agent; #endif + struct mlx5e_scratchpad scratchpad; }; struct mlx5e_profile { @@ -1036,14 +1046,22 @@ int mlx5e_open_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs); void mlx5e_close_channels(struct mlx5e_channels *chs); -/* Function pointer to be used to modify WH settings while +/* Function pointer to be used to modify HW or kernel settings while * switching channels */ -typedef int (*mlx5e_fp_hw_modify)(struct mlx5e_priv *priv); +typedef int (*mlx5e_fp_preactivate)(struct mlx5e_priv *priv, void *context); +#define MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(fn) \ +int fn##_ctx(struct mlx5e_priv *priv, void *context) \ +{ \ + return fn(priv); \ +} int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv); int mlx5e_safe_switch_channels(struct mlx5e_priv *priv, struct mlx5e_channels *new_chs, - mlx5e_fp_hw_modify hw_modify); + mlx5e_fp_preactivate preactivate, + void *context); +int mlx5e_num_channels_changed(struct mlx5e_priv *priv); +int mlx5e_num_channels_changed_ctx(struct mlx5e_priv *priv, void *context); void mlx5e_activate_priv_channels(struct mlx5e_priv *priv); void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv); @@ -1124,10 +1142,10 @@ void mlx5e_update_ndo_stats(struct mlx5e_priv *priv); void mlx5e_queue_update_stats(struct mlx5e_priv *priv); int mlx5e_bits_invert(unsigned long a, int size); -typedef int (*change_hw_mtu_cb)(struct mlx5e_priv *priv); int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv); +int mlx5e_set_dev_port_mtu_ctx(struct mlx5e_priv *priv, void *context); int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, - change_hw_mtu_cb set_mtu_cb); + mlx5e_fp_preactivate preactivate); /* ethtool helpers */ void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv, @@ -1153,6 +1171,12 @@ int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv, struct ethtool_link_ksettings *link_ksettings); int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, const struct ethtool_link_ksettings *link_ksettings); +int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc); +int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, const u8 *key, + const u8 hfunc); +int mlx5e_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, + u32 *rule_locs); +int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd); u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv); u32 mlx5e_ethtool_get_rxfh_indir_size(struct mlx5e_priv *priv); int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c new file mode 100644 index 000000000000..f8b2de4b04be --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#include "en/devlink.h" + +int mlx5e_devlink_port_register(struct mlx5e_priv *priv) +{ + struct devlink *devlink = priv_to_devlink(priv->mdev); + + if (mlx5_core_is_pf(priv->mdev)) + devlink_port_attrs_set(&priv->dl_port, + DEVLINK_PORT_FLAVOUR_PHYSICAL, + PCI_FUNC(priv->mdev->pdev->devfn), + false, 0, + NULL, 0); + else + devlink_port_attrs_set(&priv->dl_port, + DEVLINK_PORT_FLAVOUR_VIRTUAL, + 0, false, 0, NULL, 0); + + return devlink_port_register(devlink, &priv->dl_port, 1); +} + +void mlx5e_devlink_port_type_eth_set(struct mlx5e_priv *priv) +{ + devlink_port_type_eth_set(&priv->dl_port, priv->netdev); +} + +void mlx5e_devlink_port_unregister(struct mlx5e_priv *priv) +{ + devlink_port_unregister(&priv->dl_port); +} + +struct devlink_port *mlx5e_get_devlink_port(struct net_device *dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return &priv->dl_port; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h new file mode 100644 index 000000000000..83123a801adc --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __MLX5E_EN_DEVLINK_H +#define __MLX5E_EN_DEVLINK_H + +#include <net/devlink.h> +#include "en.h" + +int mlx5e_devlink_port_register(struct mlx5e_priv *priv); +void mlx5e_devlink_port_unregister(struct mlx5e_priv *priv); +void mlx5e_devlink_port_type_eth_set(struct mlx5e_priv *priv); +struct devlink_port *mlx5e_get_devlink_port(struct net_device *dev); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c index 20b907dc1e29..3a199a03d929 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c @@ -3,6 +3,7 @@ #include "health.h" #include "lib/eq.h" +#include "lib/mlx5.h" int mlx5e_reporter_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name) { @@ -197,10 +198,114 @@ int mlx5e_health_report(struct mlx5e_priv *priv, struct devlink_health_reporter *reporter, char *err_str, struct mlx5e_err_ctx *err_ctx) { - netdev_err(priv->netdev, err_str); + netdev_err(priv->netdev, "%s\n", err_str); if (!reporter) return err_ctx->recover(err_ctx->ctx); return devlink_health_report(reporter, err_str, err_ctx); } + +#define MLX5_HEALTH_DEVLINK_MAX_SIZE 1024 +static int mlx5e_health_rsc_fmsg_binary(struct devlink_fmsg *fmsg, + const void *value, u32 value_len) + +{ + u32 data_size; + u32 offset; + int err; + + for (offset = 0; offset < value_len; offset += data_size) { + data_size = value_len - offset; + if (data_size > MLX5_HEALTH_DEVLINK_MAX_SIZE) + data_size = MLX5_HEALTH_DEVLINK_MAX_SIZE; + err = devlink_fmsg_binary_put(fmsg, value + offset, data_size); + if (err) + break; + } + return err; +} + +int mlx5e_health_rsc_fmsg_dump(struct mlx5e_priv *priv, struct mlx5_rsc_key *key, + struct devlink_fmsg *fmsg) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_rsc_dump_cmd *cmd; + struct page *page; + int cmd_err, err; + int end_err; + int size; + + if (IS_ERR_OR_NULL(mdev->rsc_dump)) + return -EOPNOTSUPP; + + page = alloc_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + err = devlink_fmsg_binary_pair_nest_start(fmsg, "data"); + if (err) + return err; + + cmd = mlx5_rsc_dump_cmd_create(mdev, key); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto free_page; + } + + do { + cmd_err = mlx5_rsc_dump_next(mdev, cmd, page, &size); + if (cmd_err < 0) { + err = cmd_err; + goto destroy_cmd; + } + + err = mlx5e_health_rsc_fmsg_binary(fmsg, page_address(page), size); + if (err) + goto destroy_cmd; + + } while (cmd_err > 0); + +destroy_cmd: + mlx5_rsc_dump_cmd_destroy(cmd); + end_err = devlink_fmsg_binary_pair_nest_end(fmsg); + if (end_err) + err = end_err; +free_page: + __free_page(page); + return err; +} + +int mlx5e_health_queue_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, + int queue_idx, char *lbl) +{ + struct mlx5_rsc_key key = {}; + int err; + + key.rsc = MLX5_SGMT_TYPE_FULL_QPC; + key.index1 = queue_idx; + key.size = PAGE_SIZE; + key.num_of_obj1 = 1; + + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, lbl); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "index", queue_idx); + if (err) + return err; + + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + return err; + + return devlink_fmsg_obj_nest_end(fmsg); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h index e54f70d9af22..38f97f79ef16 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h @@ -5,6 +5,7 @@ #define __MLX5E_EN_HEALTH_H #include "en.h" +#include "diag/rsc_dump.h" #define MLX5E_RX_ERR_CQE(cqe) (get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND) @@ -35,6 +36,7 @@ void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq); struct mlx5e_err_ctx { int (*recover)(void *ctx); + int (*dump)(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, void *ctx); void *ctx; }; @@ -47,6 +49,8 @@ int mlx5e_health_report(struct mlx5e_priv *priv, int mlx5e_health_create_reporters(struct mlx5e_priv *priv); void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv); void mlx5e_health_channels_update(struct mlx5e_priv *priv); - - +int mlx5e_health_rsc_fmsg_dump(struct mlx5e_priv *priv, struct mlx5_rsc_key *key, + struct devlink_fmsg *fmsg); +int mlx5e_health_queue_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, + int queue_idx, char *lbl); #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c new file mode 100644 index 000000000000..ea321e528749 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c @@ -0,0 +1,218 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2018 Mellanox Technologies */ + +#include <linux/jhash.h> +#include <linux/slab.h> +#include <linux/xarray.h> +#include <linux/hashtable.h> + +#include "mapping.h" + +#define MAPPING_GRACE_PERIOD 2000 + +struct mapping_ctx { + struct xarray xarray; + DECLARE_HASHTABLE(ht, 8); + struct mutex lock; /* Guards hashtable and xarray */ + unsigned long max_id; + size_t data_size; + bool delayed_removal; + struct delayed_work dwork; + struct list_head pending_list; + spinlock_t pending_list_lock; /* Guards pending list */ +}; + +struct mapping_item { + struct rcu_head rcu; + struct list_head list; + unsigned long timeout; + struct hlist_node node; + int cnt; + u32 id; + char data[]; +}; + +int mapping_add(struct mapping_ctx *ctx, void *data, u32 *id) +{ + struct mapping_item *mi; + int err = -ENOMEM; + u32 hash_key; + + mutex_lock(&ctx->lock); + + hash_key = jhash(data, ctx->data_size, 0); + hash_for_each_possible(ctx->ht, mi, node, hash_key) { + if (!memcmp(data, mi->data, ctx->data_size)) + goto attach; + } + + mi = kzalloc(sizeof(*mi) + ctx->data_size, GFP_KERNEL); + if (!mi) + goto err_alloc; + + memcpy(mi->data, data, ctx->data_size); + hash_add(ctx->ht, &mi->node, hash_key); + + err = xa_alloc(&ctx->xarray, &mi->id, mi, XA_LIMIT(1, ctx->max_id), + GFP_KERNEL); + if (err) + goto err_assign; +attach: + ++mi->cnt; + *id = mi->id; + + mutex_unlock(&ctx->lock); + + return 0; + +err_assign: + hash_del(&mi->node); + kfree(mi); +err_alloc: + mutex_unlock(&ctx->lock); + + return err; +} + +static void mapping_remove_and_free(struct mapping_ctx *ctx, + struct mapping_item *mi) +{ + xa_erase(&ctx->xarray, mi->id); + kfree_rcu(mi, rcu); +} + +static void mapping_free_item(struct mapping_ctx *ctx, + struct mapping_item *mi) +{ + if (!ctx->delayed_removal) { + mapping_remove_and_free(ctx, mi); + return; + } + + mi->timeout = jiffies + msecs_to_jiffies(MAPPING_GRACE_PERIOD); + + spin_lock(&ctx->pending_list_lock); + list_add_tail(&mi->list, &ctx->pending_list); + spin_unlock(&ctx->pending_list_lock); + + schedule_delayed_work(&ctx->dwork, MAPPING_GRACE_PERIOD); +} + +int mapping_remove(struct mapping_ctx *ctx, u32 id) +{ + unsigned long index = id; + struct mapping_item *mi; + int err = -ENOENT; + + mutex_lock(&ctx->lock); + mi = xa_load(&ctx->xarray, index); + if (!mi) + goto out; + err = 0; + + if (--mi->cnt > 0) + goto out; + + hash_del(&mi->node); + mapping_free_item(ctx, mi); +out: + mutex_unlock(&ctx->lock); + + return err; +} + +int mapping_find(struct mapping_ctx *ctx, u32 id, void *data) +{ + unsigned long index = id; + struct mapping_item *mi; + int err = -ENOENT; + + rcu_read_lock(); + mi = xa_load(&ctx->xarray, index); + if (!mi) + goto err_find; + + memcpy(data, mi->data, ctx->data_size); + err = 0; + +err_find: + rcu_read_unlock(); + return err; +} + +static void +mapping_remove_and_free_list(struct mapping_ctx *ctx, struct list_head *list) +{ + struct mapping_item *mi; + + list_for_each_entry(mi, list, list) + mapping_remove_and_free(ctx, mi); +} + +static void mapping_work_handler(struct work_struct *work) +{ + unsigned long min_timeout = 0, now = jiffies; + struct mapping_item *mi, *next; + LIST_HEAD(pending_items); + struct mapping_ctx *ctx; + + ctx = container_of(work, struct mapping_ctx, dwork.work); + + spin_lock(&ctx->pending_list_lock); + list_for_each_entry_safe(mi, next, &ctx->pending_list, list) { + if (time_after(now, mi->timeout)) + list_move(&mi->list, &pending_items); + else if (!min_timeout || + time_before(mi->timeout, min_timeout)) + min_timeout = mi->timeout; + } + spin_unlock(&ctx->pending_list_lock); + + mapping_remove_and_free_list(ctx, &pending_items); + + if (min_timeout) + schedule_delayed_work(&ctx->dwork, abs(min_timeout - now)); +} + +static void mapping_flush_work(struct mapping_ctx *ctx) +{ + if (!ctx->delayed_removal) + return; + + cancel_delayed_work_sync(&ctx->dwork); + mapping_remove_and_free_list(ctx, &ctx->pending_list); +} + +struct mapping_ctx * +mapping_create(size_t data_size, u32 max_id, bool delayed_removal) +{ + struct mapping_ctx *ctx; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return ERR_PTR(-ENOMEM); + + ctx->max_id = max_id ? max_id : UINT_MAX; + ctx->data_size = data_size; + + if (delayed_removal) { + INIT_DELAYED_WORK(&ctx->dwork, mapping_work_handler); + INIT_LIST_HEAD(&ctx->pending_list); + spin_lock_init(&ctx->pending_list_lock); + ctx->delayed_removal = true; + } + + mutex_init(&ctx->lock); + xa_init_flags(&ctx->xarray, XA_FLAGS_ALLOC1); + + return ctx; +} + +void mapping_destroy(struct mapping_ctx *ctx) +{ + mapping_flush_work(ctx); + xa_destroy(&ctx->xarray); + mutex_destroy(&ctx->lock); + + kfree(ctx); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h new file mode 100644 index 000000000000..285525cc5470 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies */ + +#ifndef __MLX5_MAPPING_H__ +#define __MLX5_MAPPING_H__ + +struct mapping_ctx; + +int mapping_add(struct mapping_ctx *ctx, void *data, u32 *id); +int mapping_remove(struct mapping_ctx *ctx, u32 id); +int mapping_find(struct mapping_ctx *ctx, u32 id, void *data); + +/* mapping uses an xarray to map data to ids in add(), and for find(). + * For locking, it uses a internal xarray spin lock for add()/remove(), + * find() uses rcu_read_lock(). + * Choosing delayed_removal postpones the removal of a previously mapped + * id by MAPPING_GRACE_PERIOD milliseconds. + * This is to avoid races against hardware, where we mark the packet in + * hardware with a previous id, and quick remove() and add() reusing the same + * previous id. Then find() will get the new mapping instead of the old + * which was used to mark the packet. + */ +struct mapping_ctx *mapping_create(size_t data_size, u32 max_id, + bool delayed_removal); +void mapping_destroy(struct mapping_ctx *ctx); + +#endif /* __MLX5_MAPPING_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c index fce6eccdcf8b..2c4a670c8ffd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c @@ -343,64 +343,76 @@ out: return err; } -static u32 fec_supported_speeds[] = { - 10000, - 40000, - 25000, - 50000, - 56000, - 100000 +enum mlx5e_fec_supported_link_mode { + MLX5E_FEC_SUPPORTED_LINK_MODES_10G_40G, + MLX5E_FEC_SUPPORTED_LINK_MODES_25G, + MLX5E_FEC_SUPPORTED_LINK_MODES_50G, + MLX5E_FEC_SUPPORTED_LINK_MODES_56G, + MLX5E_FEC_SUPPORTED_LINK_MODES_100G, + MLX5E_FEC_SUPPORTED_LINK_MODE_50G_1X, + MLX5E_FEC_SUPPORTED_LINK_MODE_100G_2X, + MLX5E_FEC_SUPPORTED_LINK_MODE_200G_4X, + MLX5E_FEC_SUPPORTED_LINK_MODE_400G_8X, + MLX5E_MAX_FEC_SUPPORTED_LINK_MODE, }; -#define MLX5E_FEC_SUPPORTED_SPEEDS ARRAY_SIZE(fec_supported_speeds) +#define MLX5E_FEC_FIRST_50G_PER_LANE_MODE MLX5E_FEC_SUPPORTED_LINK_MODE_50G_1X + +#define MLX5E_FEC_OVERRIDE_ADMIN_POLICY(buf, policy, write, link) \ + do { \ + u16 *_policy = &(policy); \ + u32 *_buf = buf; \ + \ + if (write) \ + MLX5_SET(pplm_reg, _buf, fec_override_admin_##link, *_policy); \ + else \ + *_policy = MLX5_GET(pplm_reg, _buf, fec_override_admin_##link); \ + } while (0) + +#define MLX5E_FEC_OVERRIDE_ADMIN_50G_POLICY(buf, policy, write, link) \ + do { \ + u16 *__policy = &(policy); \ + bool _write = (write); \ + \ + if (_write && *__policy) \ + *__policy = find_first_bit((u_long *)__policy, \ + sizeof(u16) * BITS_PER_BYTE);\ + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(buf, *__policy, _write, link); \ + if (!_write && *__policy) \ + *__policy = 1 << *__policy; \ + } while (0) /* get/set FEC admin field for a given speed */ -static int mlx5e_fec_admin_field(u32 *pplm, - u8 *fec_policy, - bool write, - u32 speed) +static int mlx5e_fec_admin_field(u32 *pplm, u16 *fec_policy, bool write, + enum mlx5e_fec_supported_link_mode link_mode) { - switch (speed) { - case 10000: - case 40000: - if (!write) - *fec_policy = MLX5_GET(pplm_reg, pplm, - fec_override_admin_10g_40g); - else - MLX5_SET(pplm_reg, pplm, - fec_override_admin_10g_40g, *fec_policy); + switch (link_mode) { + case MLX5E_FEC_SUPPORTED_LINK_MODES_10G_40G: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 10g_40g); break; - case 25000: - if (!write) - *fec_policy = MLX5_GET(pplm_reg, pplm, - fec_override_admin_25g); - else - MLX5_SET(pplm_reg, pplm, - fec_override_admin_25g, *fec_policy); + case MLX5E_FEC_SUPPORTED_LINK_MODES_25G: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 25g); break; - case 50000: - if (!write) - *fec_policy = MLX5_GET(pplm_reg, pplm, - fec_override_admin_50g); - else - MLX5_SET(pplm_reg, pplm, - fec_override_admin_50g, *fec_policy); + case MLX5E_FEC_SUPPORTED_LINK_MODES_50G: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 50g); break; - case 56000: - if (!write) - *fec_policy = MLX5_GET(pplm_reg, pplm, - fec_override_admin_56g); - else - MLX5_SET(pplm_reg, pplm, - fec_override_admin_56g, *fec_policy); + case MLX5E_FEC_SUPPORTED_LINK_MODES_56G: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 56g); break; - case 100000: - if (!write) - *fec_policy = MLX5_GET(pplm_reg, pplm, - fec_override_admin_100g); - else - MLX5_SET(pplm_reg, pplm, - fec_override_admin_100g, *fec_policy); + case MLX5E_FEC_SUPPORTED_LINK_MODES_100G: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 100g); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_50G_1X: + MLX5E_FEC_OVERRIDE_ADMIN_50G_POLICY(pplm, *fec_policy, write, 50g_1x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_100G_2X: + MLX5E_FEC_OVERRIDE_ADMIN_50G_POLICY(pplm, *fec_policy, write, 100g_2x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_200G_4X: + MLX5E_FEC_OVERRIDE_ADMIN_50G_POLICY(pplm, *fec_policy, write, 200g_4x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_400G_8X: + MLX5E_FEC_OVERRIDE_ADMIN_50G_POLICY(pplm, *fec_policy, write, 400g_8x); break; default: return -EINVAL; @@ -408,32 +420,40 @@ static int mlx5e_fec_admin_field(u32 *pplm, return 0; } +#define MLX5E_GET_FEC_OVERRIDE_CAP(buf, link) \ + MLX5_GET(pplm_reg, buf, fec_override_cap_##link) + /* returns FEC capabilities for a given speed */ -static int mlx5e_get_fec_cap_field(u32 *pplm, - u8 *fec_cap, - u32 speed) +static int mlx5e_get_fec_cap_field(u32 *pplm, u16 *fec_cap, + enum mlx5e_fec_supported_link_mode link_mode) { - switch (speed) { - case 10000: - case 40000: - *fec_cap = MLX5_GET(pplm_reg, pplm, - fec_override_cap_10g_40g); + switch (link_mode) { + case MLX5E_FEC_SUPPORTED_LINK_MODES_10G_40G: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 10g_40g); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODES_25G: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 25g); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODES_50G: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 50g); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODES_56G: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 56g); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODES_100G: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 100g); break; - case 25000: - *fec_cap = MLX5_GET(pplm_reg, pplm, - fec_override_cap_25g); + case MLX5E_FEC_SUPPORTED_LINK_MODE_50G_1X: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 50g_1x); break; - case 50000: - *fec_cap = MLX5_GET(pplm_reg, pplm, - fec_override_cap_50g); + case MLX5E_FEC_SUPPORTED_LINK_MODE_100G_2X: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 100g_2x); break; - case 56000: - *fec_cap = MLX5_GET(pplm_reg, pplm, - fec_override_cap_56g); + case MLX5E_FEC_SUPPORTED_LINK_MODE_200G_4X: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 200g_4x); break; - case 100000: - *fec_cap = MLX5_GET(pplm_reg, pplm, - fec_override_cap_100g); + case MLX5E_FEC_SUPPORTED_LINK_MODE_400G_8X: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 400g_8x); break; default: return -EINVAL; @@ -441,13 +461,14 @@ static int mlx5e_get_fec_cap_field(u32 *pplm, return 0; } -int mlx5e_get_fec_caps(struct mlx5_core_dev *dev, u8 *fec_caps) +bool mlx5e_fec_in_caps(struct mlx5_core_dev *dev, int fec_policy) { + bool fec_50g_per_lane = MLX5_CAP_PCAM_FEATURE(dev, fec_50G_per_lane_in_pplm); u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {}; u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {}; int sz = MLX5_ST_SZ_BYTES(pplm_reg); - u32 current_fec_speed; int err; + int i; if (!MLX5_CAP_GEN(dev, pcam_reg)) return -EOPNOTSUPP; @@ -458,23 +479,30 @@ int mlx5e_get_fec_caps(struct mlx5_core_dev *dev, u8 *fec_caps) MLX5_SET(pplm_reg, in, local_port, 1); err = mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0); if (err) - return err; + return false; - err = mlx5e_port_linkspeed(dev, ¤t_fec_speed); - if (err) - return err; + for (i = 0; i < MLX5E_MAX_FEC_SUPPORTED_LINK_MODE; i++) { + u16 fec_caps; - return mlx5e_get_fec_cap_field(out, fec_caps, current_fec_speed); + if (i >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE && !fec_50g_per_lane) + break; + + mlx5e_get_fec_cap_field(out, &fec_caps, i); + if (fec_caps & fec_policy) + return true; + } + return false; } int mlx5e_get_fec_mode(struct mlx5_core_dev *dev, u32 *fec_mode_active, - u8 *fec_configured_mode) + u16 *fec_configured_mode) { + bool fec_50g_per_lane = MLX5_CAP_PCAM_FEATURE(dev, fec_50G_per_lane_in_pplm); u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {}; u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {}; int sz = MLX5_ST_SZ_BYTES(pplm_reg); - u32 link_speed; int err; + int i; if (!MLX5_CAP_GEN(dev, pcam_reg)) return -EOPNOTSUPP; @@ -490,24 +518,28 @@ int mlx5e_get_fec_mode(struct mlx5_core_dev *dev, u32 *fec_mode_active, *fec_mode_active = MLX5_GET(pplm_reg, out, fec_mode_active); if (!fec_configured_mode) - return 0; + goto out; - err = mlx5e_port_linkspeed(dev, &link_speed); - if (err) - return err; + *fec_configured_mode = 0; + for (i = 0; i < MLX5E_MAX_FEC_SUPPORTED_LINK_MODE; i++) { + if (i >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE && !fec_50g_per_lane) + break; - return mlx5e_fec_admin_field(out, fec_configured_mode, 0, link_speed); + mlx5e_fec_admin_field(out, fec_configured_mode, 0, i); + if (*fec_configured_mode != 0) + goto out; + } +out: + return 0; } -int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u8 fec_policy) +int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u16 fec_policy) { - u8 fec_policy_nofec = BIT(MLX5E_FEC_NOFEC); - bool fec_mode_not_supp_in_speed = false; + bool fec_50g_per_lane = MLX5_CAP_PCAM_FEATURE(dev, fec_50G_per_lane_in_pplm); u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {}; u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {}; int sz = MLX5_ST_SZ_BYTES(pplm_reg); - u8 fec_policy_auto = 0; - u8 fec_caps = 0; + u16 fec_policy_auto = 0; int err; int i; @@ -517,6 +549,9 @@ int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u8 fec_policy) if (!MLX5_CAP_PCAM_REG(dev, pplm)) return -EOPNOTSUPP; + if (fec_policy >= (1 << MLX5E_FEC_LLRS_272_257_1) && !fec_50g_per_lane) + return -EOPNOTSUPP; + MLX5_SET(pplm_reg, in, local_port, 1); err = mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0); if (err) @@ -524,25 +559,31 @@ int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u8 fec_policy) MLX5_SET(pplm_reg, out, local_port, 1); - for (i = 0; i < MLX5E_FEC_SUPPORTED_SPEEDS; i++) { - mlx5e_get_fec_cap_field(out, &fec_caps, fec_supported_speeds[i]); - /* policy supported for link speed, or policy is auto */ - if (fec_caps & fec_policy || fec_policy == fec_policy_auto) { - mlx5e_fec_admin_field(out, &fec_policy, 1, - fec_supported_speeds[i]); - } else { - /* turn off FEC if supported. Else, leave it the same */ - if (fec_caps & fec_policy_nofec) - mlx5e_fec_admin_field(out, &fec_policy_nofec, 1, - fec_supported_speeds[i]); - fec_mode_not_supp_in_speed = true; - } - } + for (i = 0; i < MLX5E_MAX_FEC_SUPPORTED_LINK_MODE; i++) { + u16 conf_fec = fec_policy; + u16 fec_caps = 0; + + if (i >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE && !fec_50g_per_lane) + break; - if (fec_mode_not_supp_in_speed) - mlx5_core_dbg(dev, - "FEC policy 0x%x is not supported for some speeds", - fec_policy); + /* RS fec in ethtool is mapped to MLX5E_FEC_RS_528_514 + * to link modes up to 25G per lane and to + * MLX5E_FEC_RS_544_514 in the new link modes based on + * 50 G per lane + */ + if (conf_fec == (1 << MLX5E_FEC_RS_528_514) && + i >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE) + conf_fec = (1 << MLX5E_FEC_RS_544_514); + + mlx5e_get_fec_cap_field(out, &fec_caps, i); + + /* policy supported for link speed */ + if (fec_caps & conf_fec) + mlx5e_fec_admin_field(out, &conf_fec, 1, i); + else + /* set FEC to auto*/ + mlx5e_fec_admin_field(out, &fec_policy_auto, 1, i); + } return mlx5_core_access_reg(dev, out, sz, out, sz, MLX5_REG_PPLM, 0, 1); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h index 4a7f4497692b..a2ddd446dd59 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h @@ -60,15 +60,17 @@ int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in); int mlx5e_port_query_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer); int mlx5e_port_set_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer); -int mlx5e_get_fec_caps(struct mlx5_core_dev *dev, u8 *fec_caps); +bool mlx5e_fec_in_caps(struct mlx5_core_dev *dev, int fec_policy); int mlx5e_get_fec_mode(struct mlx5_core_dev *dev, u32 *fec_mode_active, - u8 *fec_configured_mode); -int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u8 fec_policy); + u16 *fec_configured_mode); +int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u16 fec_policy); enum { MLX5E_FEC_NOFEC, MLX5E_FEC_FIRECODE, MLX5E_FEC_RS_528_514, + MLX5E_FEC_RS_544_514 = 7, + MLX5E_FEC_LLRS_272_257_1 = 9, }; #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index a01e2de2488f..c209579fc213 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -102,19 +102,6 @@ out: return err; } -void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq) -{ - struct mlx5e_priv *priv = icosq->channel->priv; - char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; - struct mlx5e_err_ctx err_ctx = {}; - - err_ctx.ctx = icosq; - err_ctx.recover = mlx5e_rx_reporter_err_icosq_cqe_recover; - sprintf(err_str, "ERR CQE on ICOSQ: 0x%x", icosq->sqn); - - mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx); -} - static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state) { struct net_device *dev = rq->netdev; @@ -171,19 +158,6 @@ out: return err; } -void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq) -{ - struct mlx5e_priv *priv = rq->channel->priv; - char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; - struct mlx5e_err_ctx err_ctx = {}; - - err_ctx.ctx = rq; - err_ctx.recover = mlx5e_rx_reporter_err_rq_cqe_recover; - sprintf(err_str, "ERR CQE on RQ: 0x%x", rq->rqn); - - mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx); -} - static int mlx5e_rx_reporter_timeout_recover(void *ctx) { struct mlx5e_icosq *icosq; @@ -201,21 +175,6 @@ static int mlx5e_rx_reporter_timeout_recover(void *ctx) return err; } -void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq) -{ - struct mlx5e_icosq *icosq = &rq->channel->icosq; - struct mlx5e_priv *priv = rq->channel->priv; - char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; - struct mlx5e_err_ctx err_ctx = {}; - - err_ctx.ctx = rq; - err_ctx.recover = mlx5e_rx_reporter_timeout_recover; - sprintf(err_str, "RX timeout on channel: %d, ICOSQ: 0x%x RQ: 0x%x, CQ: 0x%x\n", - icosq->channel->ix, icosq->sqn, rq->rqn, rq->cq.mcq.cqn); - - mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx); -} - static int mlx5e_rx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx) { return err_ctx->recover(err_ctx->ctx); @@ -371,10 +330,235 @@ unlock: return err; } +static int mlx5e_rx_reporter_dump_icosq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, + void *ctx) +{ + struct mlx5e_txqsq *icosq = ctx; + struct mlx5_rsc_key key = {}; + int err; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return 0; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "SX Slice"); + if (err) + return err; + + key.size = PAGE_SIZE; + key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL; + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "ICOSQ"); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "QPC"); + if (err) + return err; + + key.rsc = MLX5_SGMT_TYPE_FULL_QPC; + key.index1 = icosq->sqn; + key.num_of_obj1 = 1; + + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "send_buff"); + if (err) + return err; + + key.rsc = MLX5_SGMT_TYPE_SND_BUFF; + key.num_of_obj2 = MLX5_RSC_DUMP_ALL; + + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + return err; + + return mlx5e_reporter_named_obj_nest_end(fmsg); +} + +static int mlx5e_rx_reporter_dump_rq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, + void *ctx) +{ + struct mlx5_rsc_key key = {}; + struct mlx5e_rq *rq = ctx; + int err; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return 0; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "RX Slice"); + if (err) + return err; + + key.size = PAGE_SIZE; + key.rsc = MLX5_SGMT_TYPE_RX_SLICE_ALL; + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "RQ"); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "QPC"); + if (err) + return err; + + key.rsc = MLX5_SGMT_TYPE_FULL_QPC; + key.index1 = rq->rqn; + key.num_of_obj1 = 1; + + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "receive_buff"); + if (err) + return err; + + key.rsc = MLX5_SGMT_TYPE_RCV_BUFF; + key.num_of_obj2 = MLX5_RSC_DUMP_ALL; + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + return err; + + return mlx5e_reporter_named_obj_nest_end(fmsg); +} + +static int mlx5e_rx_reporter_dump_all_rqs(struct mlx5e_priv *priv, + struct devlink_fmsg *fmsg) +{ + struct mlx5_rsc_key key = {}; + int i, err; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return 0; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "RX Slice"); + if (err) + return err; + + key.size = PAGE_SIZE; + key.rsc = MLX5_SGMT_TYPE_RX_SLICE_ALL; + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + return err; + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "RQs"); + if (err) + return err; + + for (i = 0; i < priv->channels.num; i++) { + struct mlx5e_rq *rq = &priv->channels.c[i]->rq; + + err = mlx5e_health_queue_dump(priv, fmsg, rq->rqn, "RQ"); + if (err) + return err; + } + + return devlink_fmsg_arr_pair_nest_end(fmsg); +} + +static int mlx5e_rx_reporter_dump_from_ctx(struct mlx5e_priv *priv, + struct mlx5e_err_ctx *err_ctx, + struct devlink_fmsg *fmsg) +{ + return err_ctx->dump(priv, fmsg, err_ctx->ctx); +} + +static int mlx5e_rx_reporter_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *context, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + struct mlx5e_err_ctx *err_ctx = context; + + return err_ctx ? mlx5e_rx_reporter_dump_from_ctx(priv, err_ctx, fmsg) : + mlx5e_rx_reporter_dump_all_rqs(priv, fmsg); +} + +void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq) +{ + struct mlx5e_icosq *icosq = &rq->channel->icosq; + struct mlx5e_priv *priv = rq->channel->priv; + char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; + struct mlx5e_err_ctx err_ctx = {}; + + err_ctx.ctx = rq; + err_ctx.recover = mlx5e_rx_reporter_timeout_recover; + err_ctx.dump = mlx5e_rx_reporter_dump_rq; + snprintf(err_str, sizeof(err_str), + "RX timeout on channel: %d, ICOSQ: 0x%x RQ: 0x%x, CQ: 0x%x", + icosq->channel->ix, icosq->sqn, rq->rqn, rq->cq.mcq.cqn); + + mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx); +} + +void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq) +{ + struct mlx5e_priv *priv = rq->channel->priv; + char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; + struct mlx5e_err_ctx err_ctx = {}; + + err_ctx.ctx = rq; + err_ctx.recover = mlx5e_rx_reporter_err_rq_cqe_recover; + err_ctx.dump = mlx5e_rx_reporter_dump_rq; + snprintf(err_str, sizeof(err_str), "ERR CQE on RQ: 0x%x", rq->rqn); + + mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx); +} + +void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq) +{ + struct mlx5e_priv *priv = icosq->channel->priv; + char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; + struct mlx5e_err_ctx err_ctx = {}; + + err_ctx.ctx = icosq; + err_ctx.recover = mlx5e_rx_reporter_err_icosq_cqe_recover; + err_ctx.dump = mlx5e_rx_reporter_dump_icosq; + snprintf(err_str, sizeof(err_str), "ERR CQE on ICOSQ: 0x%x", icosq->sqn); + + mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx); +} + static const struct devlink_health_reporter_ops mlx5_rx_reporter_ops = { .name = "rx", .recover = mlx5e_rx_reporter_recover, .diagnose = mlx5e_rx_reporter_diagnose, + .dump = mlx5e_rx_reporter_dump, }; #define MLX5E_REPORTER_RX_GRACEFUL_PERIOD 500 @@ -387,7 +571,7 @@ int mlx5e_reporter_rx_create(struct mlx5e_priv *priv) reporter = devlink_health_reporter_create(devlink, &mlx5_rx_reporter_ops, MLX5E_REPORTER_RX_GRACEFUL_PERIOD, - true, priv); + priv); if (IS_ERR(reporter)) { netdev_warn(priv->netdev, "Failed to create rx reporter, err = %ld\n", PTR_ERR(reporter)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index b468549e96ff..9805fc085512 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -82,19 +82,6 @@ out: return err; } -void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq) -{ - struct mlx5e_priv *priv = sq->channel->priv; - char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; - struct mlx5e_err_ctx err_ctx = {0}; - - err_ctx.ctx = sq; - err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover; - sprintf(err_str, "ERR CQE on SQ: 0x%x", sq->sqn); - - mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); -} - static int mlx5e_tx_reporter_timeout_recover(void *ctx) { struct mlx5_eq_comp *eq; @@ -110,22 +97,6 @@ static int mlx5e_tx_reporter_timeout_recover(void *ctx) return err; } -int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq) -{ - struct mlx5e_priv *priv = sq->channel->priv; - char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; - struct mlx5e_err_ctx err_ctx; - - err_ctx.ctx = sq; - err_ctx.recover = mlx5e_tx_reporter_timeout_recover; - sprintf(err_str, - "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n", - sq->channel->ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, - jiffies_to_usecs(jiffies - sq->txq->trans_start)); - - return mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); -} - /* state lock cannot be grabbed within this function. * It can cause a dead lock or a read-after-free. */ @@ -275,10 +246,162 @@ unlock: return err; } +static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, + void *ctx) +{ + struct mlx5_rsc_key key = {}; + struct mlx5e_txqsq *sq = ctx; + int err; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return 0; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "SX Slice"); + if (err) + return err; + + key.size = PAGE_SIZE; + key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL; + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "SQ"); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "QPC"); + if (err) + return err; + + key.rsc = MLX5_SGMT_TYPE_FULL_QPC; + key.index1 = sq->sqn; + key.num_of_obj1 = 1; + + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "send_buff"); + if (err) + return err; + + key.rsc = MLX5_SGMT_TYPE_SND_BUFF; + key.num_of_obj2 = MLX5_RSC_DUMP_ALL; + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + return err; + + return mlx5e_reporter_named_obj_nest_end(fmsg); +} + +static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv, + struct devlink_fmsg *fmsg) +{ + struct mlx5_rsc_key key = {}; + int i, tc, err; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return 0; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "SX Slice"); + if (err) + return err; + + key.size = PAGE_SIZE; + key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL; + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + return err; + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs"); + if (err) + return err; + + for (i = 0; i < priv->channels.num; i++) { + struct mlx5e_channel *c = priv->channels.c[i]; + + for (tc = 0; tc < priv->channels.params.num_tc; tc++) { + struct mlx5e_txqsq *sq = &c->sq[tc]; + + err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "SQ"); + if (err) + return err; + } + } + return devlink_fmsg_arr_pair_nest_end(fmsg); +} + +static int mlx5e_tx_reporter_dump_from_ctx(struct mlx5e_priv *priv, + struct mlx5e_err_ctx *err_ctx, + struct devlink_fmsg *fmsg) +{ + return err_ctx->dump(priv, fmsg, err_ctx->ctx); +} + +static int mlx5e_tx_reporter_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *context, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + struct mlx5e_err_ctx *err_ctx = context; + + return err_ctx ? mlx5e_tx_reporter_dump_from_ctx(priv, err_ctx, fmsg) : + mlx5e_tx_reporter_dump_all_sqs(priv, fmsg); +} + +void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq) +{ + struct mlx5e_priv *priv = sq->channel->priv; + char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; + struct mlx5e_err_ctx err_ctx = {}; + + err_ctx.ctx = sq; + err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover; + err_ctx.dump = mlx5e_tx_reporter_dump_sq; + snprintf(err_str, sizeof(err_str), "ERR CQE on SQ: 0x%x", sq->sqn); + + mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); +} + +int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq) +{ + struct mlx5e_priv *priv = sq->channel->priv; + char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; + struct mlx5e_err_ctx err_ctx = {}; + + err_ctx.ctx = sq; + err_ctx.recover = mlx5e_tx_reporter_timeout_recover; + err_ctx.dump = mlx5e_tx_reporter_dump_sq; + snprintf(err_str, sizeof(err_str), + "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u", + sq->channel->ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, + jiffies_to_usecs(jiffies - sq->txq->trans_start)); + + return mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); +} + static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = { .name = "tx", .recover = mlx5e_tx_reporter_recover, .diagnose = mlx5e_tx_reporter_diagnose, + .dump = mlx5e_tx_reporter_dump, }; #define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500 @@ -293,7 +416,7 @@ int mlx5e_reporter_tx_create(struct mlx5e_priv *priv) reporter = devlink_health_reporter_create(devlink, &mlx5_tx_reporter_ops, MLX5_REPORTER_TX_GRACEFUL_PERIOD, - true, priv); + priv); if (IS_ERR(reporter)) { netdev_warn(priv->netdev, "Failed to create tx reporter, err = %ld\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c new file mode 100644 index 000000000000..ad3e3a65d403 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -0,0 +1,1369 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_conntrack_zones.h> +#include <net/netfilter/nf_conntrack_labels.h> +#include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_acct.h> +#include <uapi/linux/tc_act/tc_pedit.h> +#include <net/tc_act/tc_ct.h> +#include <net/flow_offload.h> +#include <net/netfilter/nf_flow_table.h> +#include <linux/workqueue.h> + +#include "esw/chains.h" +#include "en/tc_ct.h" +#include "en.h" +#include "en_tc.h" +#include "en_rep.h" + +#define MLX5_CT_ZONE_BITS (mlx5e_tc_attr_to_reg_mappings[ZONE_TO_REG].mlen * 8) +#define MLX5_CT_ZONE_MASK GENMASK(MLX5_CT_ZONE_BITS - 1, 0) +#define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1) +#define MLX5_CT_STATE_TRK_BIT BIT(2) + +#define MLX5_FTE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen * 8) +#define MLX5_FTE_ID_MAX GENMASK(MLX5_FTE_ID_BITS - 1, 0) +#define MLX5_FTE_ID_MASK MLX5_FTE_ID_MAX + +#define ct_dbg(fmt, args...)\ + netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args) + +struct mlx5_tc_ct_priv { + struct mlx5_eswitch *esw; + const struct net_device *netdev; + struct idr fte_ids; + struct idr tuple_ids; + struct rhashtable zone_ht; + struct mlx5_flow_table *ct; + struct mlx5_flow_table *ct_nat; + struct mlx5_flow_table *post_ct; + struct mutex control_lock; /* guards parallel adds/dels */ +}; + +struct mlx5_ct_flow { + struct mlx5_esw_flow_attr pre_ct_attr; + struct mlx5_esw_flow_attr post_ct_attr; + struct mlx5_flow_handle *pre_ct_rule; + struct mlx5_flow_handle *post_ct_rule; + struct mlx5_ct_ft *ft; + u32 fte_id; + u32 chain_mapping; +}; + +struct mlx5_ct_zone_rule { + struct mlx5_flow_handle *rule; + struct mlx5_esw_flow_attr attr; + int tupleid; + bool nat; +}; + +struct mlx5_ct_ft { + struct rhash_head node; + u16 zone; + refcount_t refcount; + struct nf_flowtable *nf_ft; + struct mlx5_tc_ct_priv *ct_priv; + struct rhashtable ct_entries_ht; + struct list_head ct_entries_list; +}; + +struct mlx5_ct_entry { + struct list_head list; + u16 zone; + struct rhash_head node; + struct flow_rule *flow_rule; + struct mlx5_fc *counter; + unsigned long lastuse; + unsigned long cookie; + unsigned long restore_cookie; + struct mlx5_ct_zone_rule zone_rules[2]; +}; + +static const struct rhashtable_params cts_ht_params = { + .head_offset = offsetof(struct mlx5_ct_entry, node), + .key_offset = offsetof(struct mlx5_ct_entry, cookie), + .key_len = sizeof(((struct mlx5_ct_entry *)0)->cookie), + .automatic_shrinking = true, + .min_size = 16 * 1024, +}; + +static const struct rhashtable_params zone_params = { + .head_offset = offsetof(struct mlx5_ct_ft, node), + .key_offset = offsetof(struct mlx5_ct_ft, zone), + .key_len = sizeof(((struct mlx5_ct_ft *)0)->zone), + .automatic_shrinking = true, +}; + +static struct mlx5_tc_ct_priv * +mlx5_tc_ct_get_ct_priv(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + return uplink_priv->ct_priv; +} + +static int +mlx5_tc_ct_set_tuple_match(struct mlx5_flow_spec *spec, + struct flow_rule *rule) +{ + void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers); + void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers); + u16 addr_type = 0; + u8 ip_proto = 0; + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; + + flow_rule_match_basic(rule, &match); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype, + ntohs(match.mask->n_proto)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, + ntohs(match.key->n_proto)); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, + match.mask->ip_proto); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, + match.key->ip_proto); + + ip_proto = match.key->ip_proto; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_match_control match; + + flow_rule_match_control(rule, &match); + addr_type = match.key->addr_type; + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + struct flow_match_ipv4_addrs match; + + flow_rule_match_ipv4_addrs(rule, &match); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv4_layout.ipv4), + &match.mask->src, sizeof(match.mask->src)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv4_layout.ipv4), + &match.key->src, sizeof(match.key->src)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &match.mask->dst, sizeof(match.mask->dst)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &match.key->dst, sizeof(match.key->dst)); + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct flow_match_ipv6_addrs match; + + flow_rule_match_ipv6_addrs(rule, &match); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &match.mask->src, sizeof(match.mask->src)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &match.key->src, sizeof(match.key->src)); + + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &match.mask->dst, sizeof(match.mask->dst)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &match.key->dst, sizeof(match.key->dst)); + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { + struct flow_match_ports match; + + flow_rule_match_ports(rule, &match); + switch (ip_proto) { + case IPPROTO_TCP: + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + tcp_sport, ntohs(match.mask->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + tcp_sport, ntohs(match.key->src)); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + tcp_dport, ntohs(match.mask->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + tcp_dport, ntohs(match.key->dst)); + break; + + case IPPROTO_UDP: + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + udp_sport, ntohs(match.mask->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + udp_sport, ntohs(match.key->src)); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + udp_dport, ntohs(match.mask->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + udp_dport, ntohs(match.key->dst)); + break; + default: + break; + } + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) { + struct flow_match_tcp match; + + flow_rule_match_tcp(rule, &match); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags, + ntohs(match.mask->flags)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags, + ntohs(match.key->flags)); + } + + return 0; +} + +static void +mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv, + struct mlx5_ct_entry *entry, + bool nat) +{ + struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat]; + struct mlx5_esw_flow_attr *attr = &zone_rule->attr; + struct mlx5_eswitch *esw = ct_priv->esw; + + ct_dbg("Deleting ct entry rule in zone %d", entry->zone); + + mlx5_eswitch_del_offloaded_rule(esw, zone_rule->rule, attr); + mlx5_modify_header_dealloc(esw->dev, attr->modify_hdr); + idr_remove(&ct_priv->tuple_ids, zone_rule->tupleid); +} + +static void +mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv, + struct mlx5_ct_entry *entry) +{ + mlx5_tc_ct_entry_del_rule(ct_priv, entry, true); + mlx5_tc_ct_entry_del_rule(ct_priv, entry, false); + + mlx5_fc_destroy(ct_priv->esw->dev, entry->counter); +} + +static struct flow_action_entry * +mlx5_tc_ct_get_ct_metadata_action(struct flow_rule *flow_rule) +{ + struct flow_action *flow_action = &flow_rule->action; + struct flow_action_entry *act; + int i; + + flow_action_for_each(i, act, flow_action) { + if (act->id == FLOW_ACTION_CT_METADATA) + return act; + } + + return NULL; +} + +static int +mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv, + struct mlx5e_tc_mod_hdr_acts *mod_acts, + u8 ct_state, + u32 mark, + u32 label, + u32 tupleid) +{ + struct mlx5_eswitch *esw = ct_priv->esw; + int err; + + err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts, + CTSTATE_TO_REG, ct_state); + if (err) + return err; + + err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts, + MARK_TO_REG, mark); + if (err) + return err; + + err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts, + LABELS_TO_REG, label); + if (err) + return err; + + err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts, + TUPLEID_TO_REG, tupleid); + if (err) + return err; + + return 0; +} + +static int +mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act, + char *modact) +{ + u32 offset = act->mangle.offset, field; + + switch (act->mangle.htype) { + case FLOW_ACT_MANGLE_HDR_TYPE_IP4: + MLX5_SET(set_action_in, modact, length, 0); + if (offset == offsetof(struct iphdr, saddr)) + field = MLX5_ACTION_IN_FIELD_OUT_SIPV4; + else if (offset == offsetof(struct iphdr, daddr)) + field = MLX5_ACTION_IN_FIELD_OUT_DIPV4; + else + return -EOPNOTSUPP; + break; + + case FLOW_ACT_MANGLE_HDR_TYPE_IP6: + MLX5_SET(set_action_in, modact, length, 0); + if (offset == offsetof(struct ipv6hdr, saddr)) + field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0; + else if (offset == offsetof(struct ipv6hdr, saddr) + 4) + field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32; + else if (offset == offsetof(struct ipv6hdr, saddr) + 8) + field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64; + else if (offset == offsetof(struct ipv6hdr, saddr) + 12) + field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96; + else if (offset == offsetof(struct ipv6hdr, daddr)) + field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0; + else if (offset == offsetof(struct ipv6hdr, daddr) + 4) + field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32; + else if (offset == offsetof(struct ipv6hdr, daddr) + 8) + field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64; + else if (offset == offsetof(struct ipv6hdr, daddr) + 12) + field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96; + else + return -EOPNOTSUPP; + break; + + case FLOW_ACT_MANGLE_HDR_TYPE_TCP: + MLX5_SET(set_action_in, modact, length, 16); + if (offset == offsetof(struct tcphdr, source)) + field = MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT; + else if (offset == offsetof(struct tcphdr, dest)) + field = MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT; + else + return -EOPNOTSUPP; + break; + + case FLOW_ACT_MANGLE_HDR_TYPE_UDP: + MLX5_SET(set_action_in, modact, length, 16); + if (offset == offsetof(struct udphdr, source)) + field = MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT; + else if (offset == offsetof(struct udphdr, dest)) + field = MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT; + else + return -EOPNOTSUPP; + break; + + default: + return -EOPNOTSUPP; + } + + MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, modact, offset, 0); + MLX5_SET(set_action_in, modact, field, field); + MLX5_SET(set_action_in, modact, data, act->mangle.val); + + return 0; +} + +static int +mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv, + struct flow_rule *flow_rule, + struct mlx5e_tc_mod_hdr_acts *mod_acts) +{ + struct flow_action *flow_action = &flow_rule->action; + struct mlx5_core_dev *mdev = ct_priv->esw->dev; + struct flow_action_entry *act; + size_t action_size; + char *modact; + int err, i; + + action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto); + + flow_action_for_each(i, act, flow_action) { + switch (act->id) { + case FLOW_ACTION_MANGLE: { + err = alloc_mod_hdr_actions(mdev, + MLX5_FLOW_NAMESPACE_FDB, + mod_acts); + if (err) + return err; + + modact = mod_acts->actions + + mod_acts->num_actions * action_size; + + err = mlx5_tc_ct_parse_mangle_to_mod_act(act, modact); + if (err) + return err; + + mod_acts->num_actions++; + } + break; + + case FLOW_ACTION_CT_METADATA: + /* Handled earlier */ + continue; + default: + return -EOPNOTSUPP; + } + } + + return 0; +} + +static int +mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, + struct mlx5_esw_flow_attr *attr, + struct flow_rule *flow_rule, + u32 tupleid, + bool nat) +{ + struct mlx5e_tc_mod_hdr_acts mod_acts = {}; + struct mlx5_eswitch *esw = ct_priv->esw; + struct mlx5_modify_hdr *mod_hdr; + struct flow_action_entry *meta; + int err; + + meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule); + if (!meta) + return -EOPNOTSUPP; + + if (meta->ct_metadata.labels[1] || + meta->ct_metadata.labels[2] || + meta->ct_metadata.labels[3]) { + ct_dbg("Failed to offload ct entry due to unsupported label"); + return -EOPNOTSUPP; + } + + if (nat) { + err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule, + &mod_acts); + if (err) + goto err_mapping; + } + + err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts, + (MLX5_CT_STATE_ESTABLISHED_BIT | + MLX5_CT_STATE_TRK_BIT), + meta->ct_metadata.mark, + meta->ct_metadata.labels[0], + tupleid); + if (err) + goto err_mapping; + + mod_hdr = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_FDB, + mod_acts.num_actions, + mod_acts.actions); + if (IS_ERR(mod_hdr)) { + err = PTR_ERR(mod_hdr); + goto err_mapping; + } + attr->modify_hdr = mod_hdr; + + dealloc_mod_hdr_actions(&mod_acts); + return 0; + +err_mapping: + dealloc_mod_hdr_actions(&mod_acts); + return err; +} + +static int +mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, + struct flow_rule *flow_rule, + struct mlx5_ct_entry *entry, + bool nat) +{ + struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat]; + struct mlx5_esw_flow_attr *attr = &zone_rule->attr; + struct mlx5_eswitch *esw = ct_priv->esw; + struct mlx5_flow_spec *spec = NULL; + u32 tupleid = 1; + int err; + + zone_rule->nat = nat; + + spec = kzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + /* Get tuple unique id */ + err = idr_alloc_u32(&ct_priv->tuple_ids, zone_rule, &tupleid, + TUPLE_ID_MAX, GFP_KERNEL); + if (err) { + netdev_warn(ct_priv->netdev, + "Failed to allocate tuple id, err: %d\n", err); + goto err_idr_alloc; + } + zone_rule->tupleid = tupleid; + + err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule, + tupleid, nat); + if (err) { + ct_dbg("Failed to create ct entry mod hdr"); + goto err_mod_hdr; + } + + attr->action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + attr->dest_chain = 0; + attr->dest_ft = ct_priv->post_ct; + attr->fdb = nat ? ct_priv->ct_nat : ct_priv->ct; + attr->outer_match_level = MLX5_MATCH_L4; + attr->counter = entry->counter; + attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT; + + mlx5_tc_ct_set_tuple_match(spec, flow_rule); + mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, + entry->zone & MLX5_CT_ZONE_MASK, + MLX5_CT_ZONE_MASK); + + zone_rule->rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr); + if (IS_ERR(zone_rule->rule)) { + err = PTR_ERR(zone_rule->rule); + ct_dbg("Failed to add ct entry rule, nat: %d", nat); + goto err_rule; + } + + kfree(spec); + ct_dbg("Offloaded ct entry rule in zone %d", entry->zone); + + return 0; + +err_rule: + mlx5_modify_header_dealloc(esw->dev, attr->modify_hdr); +err_mod_hdr: + idr_remove(&ct_priv->tuple_ids, zone_rule->tupleid); +err_idr_alloc: + kfree(spec); + return err; +} + +static int +mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv, + struct flow_rule *flow_rule, + struct mlx5_ct_entry *entry) +{ + struct mlx5_eswitch *esw = ct_priv->esw; + int err; + + entry->counter = mlx5_fc_create(esw->dev, true); + if (IS_ERR(entry->counter)) { + err = PTR_ERR(entry->counter); + ct_dbg("Failed to create counter for ct entry"); + return err; + } + + err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, false); + if (err) + goto err_orig; + + err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, true); + if (err) + goto err_nat; + + return 0; + +err_nat: + mlx5_tc_ct_entry_del_rule(ct_priv, entry, false); +err_orig: + mlx5_fc_destroy(esw->dev, entry->counter); + return err; +} + +static int +mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft, + struct flow_cls_offload *flow) +{ + struct flow_rule *flow_rule = flow_cls_offload_flow_rule(flow); + struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv; + struct flow_action_entry *meta_action; + unsigned long cookie = flow->cookie; + struct mlx5_ct_entry *entry; + int err; + + meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule); + if (!meta_action) + return -EOPNOTSUPP; + + entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, + cts_ht_params); + if (entry) + return 0; + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return -ENOMEM; + + entry->zone = ft->zone; + entry->flow_rule = flow_rule; + entry->cookie = flow->cookie; + entry->restore_cookie = meta_action->ct_metadata.cookie; + + err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry); + if (err) + goto err_rules; + + err = rhashtable_insert_fast(&ft->ct_entries_ht, &entry->node, + cts_ht_params); + if (err) + goto err_insert; + + list_add(&entry->list, &ft->ct_entries_list); + + return 0; + +err_insert: + mlx5_tc_ct_entry_del_rules(ct_priv, entry); +err_rules: + kfree(entry); + netdev_warn(ct_priv->netdev, + "Failed to offload ct entry, err: %d\n", err); + return err; +} + +static int +mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft, + struct flow_cls_offload *flow) +{ + unsigned long cookie = flow->cookie; + struct mlx5_ct_entry *entry; + + entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, + cts_ht_params); + if (!entry) + return -ENOENT; + + mlx5_tc_ct_entry_del_rules(ft->ct_priv, entry); + WARN_ON(rhashtable_remove_fast(&ft->ct_entries_ht, + &entry->node, + cts_ht_params)); + list_del(&entry->list); + kfree(entry); + + return 0; +} + +static int +mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft, + struct flow_cls_offload *f) +{ + unsigned long cookie = f->cookie; + struct mlx5_ct_entry *entry; + u64 lastuse, packets, bytes; + + entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, + cts_ht_params); + if (!entry) + return -ENOENT; + + mlx5_fc_query_cached(entry->counter, &bytes, &packets, &lastuse); + flow_stats_update(&f->stats, bytes, packets, lastuse, + FLOW_ACTION_HW_STATS_DELAYED); + + return 0; +} + +static int +mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data, + void *cb_priv) +{ + struct flow_cls_offload *f = type_data; + struct mlx5_ct_ft *ft = cb_priv; + + if (type != TC_SETUP_CLSFLOWER) + return -EOPNOTSUPP; + + switch (f->command) { + case FLOW_CLS_REPLACE: + return mlx5_tc_ct_block_flow_offload_add(ft, f); + case FLOW_CLS_DESTROY: + return mlx5_tc_ct_block_flow_offload_del(ft, f); + case FLOW_CLS_STATS: + return mlx5_tc_ct_block_flow_offload_stats(ft, f); + default: + break; + }; + + return -EOPNOTSUPP; +} + +int +mlx5_tc_ct_parse_match(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + struct netlink_ext_ack *extack) +{ + struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); + struct flow_dissector_key_ct *mask, *key; + bool trk, est, untrk, unest, new; + u32 ctstate = 0, ctstate_mask = 0; + u16 ct_state_on, ct_state_off; + u16 ct_state, ct_state_mask; + struct flow_match_ct match; + + if (!flow_rule_match_key(f->rule, FLOW_DISSECTOR_KEY_CT)) + return 0; + + if (!ct_priv) { + NL_SET_ERR_MSG_MOD(extack, + "offload of ct matching isn't available"); + return -EOPNOTSUPP; + } + + flow_rule_match_ct(f->rule, &match); + + key = match.key; + mask = match.mask; + + ct_state = key->ct_state; + ct_state_mask = mask->ct_state; + + if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED | + TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED | + TCA_FLOWER_KEY_CT_FLAGS_NEW)) { + NL_SET_ERR_MSG_MOD(extack, + "only ct_state trk, est and new are supported for offload"); + return -EOPNOTSUPP; + } + + if (mask->ct_labels[1] || mask->ct_labels[2] || mask->ct_labels[3]) { + NL_SET_ERR_MSG_MOD(extack, + "only lower 32bits of ct_labels are supported for offload"); + return -EOPNOTSUPP; + } + + ct_state_on = ct_state & ct_state_mask; + ct_state_off = (ct_state & ct_state_mask) ^ ct_state_mask; + trk = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_TRACKED; + new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW; + est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED; + untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED; + unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED; + + ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0; + ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0; + ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0; + ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0; + + if (new) { + NL_SET_ERR_MSG_MOD(extack, + "matching on ct_state +new isn't supported"); + return -EOPNOTSUPP; + } + + if (mask->ct_zone) + mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, + key->ct_zone, MLX5_CT_ZONE_MASK); + if (ctstate_mask) + mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, + ctstate, ctstate_mask); + if (mask->ct_mark) + mlx5e_tc_match_to_reg_match(spec, MARK_TO_REG, + key->ct_mark, mask->ct_mark); + if (mask->ct_labels[0]) + mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, + key->ct_labels[0], + mask->ct_labels[0]); + + return 0; +} + +int +mlx5_tc_ct_parse_action(struct mlx5e_priv *priv, + struct mlx5_esw_flow_attr *attr, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack) +{ + struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); + + if (!ct_priv) { + NL_SET_ERR_MSG_MOD(extack, + "offload of ct action isn't available"); + return -EOPNOTSUPP; + } + + attr->ct_attr.zone = act->ct.zone; + attr->ct_attr.ct_action = act->ct.action; + attr->ct_attr.nf_ft = act->ct.flow_table; + + return 0; +} + +static struct mlx5_ct_ft * +mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone, + struct nf_flowtable *nf_ft) +{ + struct mlx5_ct_ft *ft; + int err; + + ft = rhashtable_lookup_fast(&ct_priv->zone_ht, &zone, zone_params); + if (ft) { + refcount_inc(&ft->refcount); + return ft; + } + + ft = kzalloc(sizeof(*ft), GFP_KERNEL); + if (!ft) + return ERR_PTR(-ENOMEM); + + ft->zone = zone; + ft->nf_ft = nf_ft; + ft->ct_priv = ct_priv; + INIT_LIST_HEAD(&ft->ct_entries_list); + refcount_set(&ft->refcount, 1); + + err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params); + if (err) + goto err_init; + + err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node, + zone_params); + if (err) + goto err_insert; + + err = nf_flow_table_offload_add_cb(ft->nf_ft, + mlx5_tc_ct_block_flow_offload, ft); + if (err) + goto err_add_cb; + + return ft; + +err_add_cb: + rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params); +err_insert: + rhashtable_destroy(&ft->ct_entries_ht); +err_init: + kfree(ft); + return ERR_PTR(err); +} + +static void +mlx5_tc_ct_flush_ft(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) +{ + struct mlx5_ct_entry *entry; + + list_for_each_entry(entry, &ft->ct_entries_list, list) + mlx5_tc_ct_entry_del_rules(ft->ct_priv, entry); +} + +static void +mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) +{ + if (!refcount_dec_and_test(&ft->refcount)) + return; + + nf_flow_table_offload_del_cb(ft->nf_ft, + mlx5_tc_ct_block_flow_offload, ft); + mlx5_tc_ct_flush_ft(ct_priv, ft); + rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params); + rhashtable_destroy(&ft->ct_entries_ht); + kfree(ft); +} + +/* We translate the tc filter with CT action to the following HW model: + * + * +-------------------+ +--------------------+ +--------------+ + * + pre_ct (tc chain) +----->+ CT (nat or no nat) +--->+ post_ct +-----> + * + original match + | + tuple + zone match + | + fte_id match + | + * +-------------------+ | +--------------------+ | +--------------+ | + * v v v + * set chain miss mapping set mark original + * set fte_id set label filter + * set zone set established actions + * set tunnel_id do nat (if needed) + * do decap + */ +static int +__mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *orig_spec, + struct mlx5_esw_flow_attr *attr, + struct mlx5_flow_handle **flow_rule) +{ + struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); + bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT; + struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {}; + struct mlx5_flow_spec *post_ct_spec = NULL; + struct mlx5_eswitch *esw = ct_priv->esw; + struct mlx5_esw_flow_attr *pre_ct_attr; + struct mlx5_modify_hdr *mod_hdr; + struct mlx5_flow_handle *rule; + struct mlx5_ct_flow *ct_flow; + int chain_mapping = 0, err; + struct mlx5_ct_ft *ft; + u32 fte_id = 1; + + post_ct_spec = kzalloc(sizeof(*post_ct_spec), GFP_KERNEL); + ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL); + if (!post_ct_spec || !ct_flow) { + kfree(post_ct_spec); + kfree(ct_flow); + return -ENOMEM; + } + + /* Register for CT established events */ + ft = mlx5_tc_ct_add_ft_cb(ct_priv, attr->ct_attr.zone, + attr->ct_attr.nf_ft); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + ct_dbg("Failed to register to ft callback"); + goto err_ft; + } + ct_flow->ft = ft; + + err = idr_alloc_u32(&ct_priv->fte_ids, ct_flow, &fte_id, + MLX5_FTE_ID_MAX, GFP_KERNEL); + if (err) { + netdev_warn(priv->netdev, + "Failed to allocate fte id, err: %d\n", err); + goto err_idr; + } + ct_flow->fte_id = fte_id; + + /* Base esw attributes of both rules on original rule attribute */ + pre_ct_attr = &ct_flow->pre_ct_attr; + memcpy(pre_ct_attr, attr, sizeof(*attr)); + memcpy(&ct_flow->post_ct_attr, attr, sizeof(*attr)); + + /* Modify the original rule's action to fwd and modify, leave decap */ + pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP; + pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + + /* Write chain miss tag for miss in ct table as we + * don't go though all prios of this chain as normal tc rules + * miss. + */ + err = mlx5_esw_chains_get_chain_mapping(esw, attr->chain, + &chain_mapping); + if (err) { + ct_dbg("Failed to get chain register mapping for chain"); + goto err_get_chain; + } + ct_flow->chain_mapping = chain_mapping; + + err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts, + CHAIN_TO_REG, chain_mapping); + if (err) { + ct_dbg("Failed to set chain register mapping"); + goto err_mapping; + } + + err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts, ZONE_TO_REG, + attr->ct_attr.zone & + MLX5_CT_ZONE_MASK); + if (err) { + ct_dbg("Failed to set zone register mapping"); + goto err_mapping; + } + + err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts, + FTEID_TO_REG, fte_id); + if (err) { + ct_dbg("Failed to set fte_id register mapping"); + goto err_mapping; + } + + /* If original flow is decap, we do it before going into ct table + * so add a rewrite for the tunnel match_id. + */ + if ((pre_ct_attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) && + attr->chain == 0) { + u32 tun_id = mlx5e_tc_get_flow_tun_id(flow); + + err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts, + TUNNEL_TO_REG, + tun_id); + if (err) { + ct_dbg("Failed to set tunnel register mapping"); + goto err_mapping; + } + } + + mod_hdr = mlx5_modify_header_alloc(esw->dev, + MLX5_FLOW_NAMESPACE_FDB, + pre_mod_acts.num_actions, + pre_mod_acts.actions); + if (IS_ERR(mod_hdr)) { + err = PTR_ERR(mod_hdr); + ct_dbg("Failed to create pre ct mod hdr"); + goto err_mapping; + } + pre_ct_attr->modify_hdr = mod_hdr; + + /* Post ct rule matches on fte_id and executes original rule's + * tc rule action + */ + mlx5e_tc_match_to_reg_match(post_ct_spec, FTEID_TO_REG, + fte_id, MLX5_FTE_ID_MASK); + + /* Put post_ct rule on post_ct fdb */ + ct_flow->post_ct_attr.chain = 0; + ct_flow->post_ct_attr.prio = 0; + ct_flow->post_ct_attr.fdb = ct_priv->post_ct; + + ct_flow->post_ct_attr.inner_match_level = MLX5_MATCH_NONE; + ct_flow->post_ct_attr.outer_match_level = MLX5_MATCH_NONE; + ct_flow->post_ct_attr.action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP); + rule = mlx5_eswitch_add_offloaded_rule(esw, post_ct_spec, + &ct_flow->post_ct_attr); + ct_flow->post_ct_rule = rule; + if (IS_ERR(ct_flow->post_ct_rule)) { + err = PTR_ERR(ct_flow->post_ct_rule); + ct_dbg("Failed to add post ct rule"); + goto err_insert_post_ct; + } + + /* Change original rule point to ct table */ + pre_ct_attr->dest_chain = 0; + pre_ct_attr->dest_ft = nat ? ct_priv->ct_nat : ct_priv->ct; + ct_flow->pre_ct_rule = mlx5_eswitch_add_offloaded_rule(esw, + orig_spec, + pre_ct_attr); + if (IS_ERR(ct_flow->pre_ct_rule)) { + err = PTR_ERR(ct_flow->pre_ct_rule); + ct_dbg("Failed to add pre ct rule"); + goto err_insert_orig; + } + + attr->ct_attr.ct_flow = ct_flow; + *flow_rule = ct_flow->post_ct_rule; + dealloc_mod_hdr_actions(&pre_mod_acts); + kfree(post_ct_spec); + + return 0; + +err_insert_orig: + mlx5_eswitch_del_offloaded_rule(ct_priv->esw, ct_flow->post_ct_rule, + &ct_flow->post_ct_attr); +err_insert_post_ct: + mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr); +err_mapping: + dealloc_mod_hdr_actions(&pre_mod_acts); + mlx5_esw_chains_put_chain_mapping(esw, ct_flow->chain_mapping); +err_get_chain: + idr_remove(&ct_priv->fte_ids, fte_id); +err_idr: + mlx5_tc_ct_del_ft_cb(ct_priv, ft); +err_ft: + kfree(post_ct_spec); + kfree(ct_flow); + netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err); + return err; +} + +static int +__mlx5_tc_ct_flow_offload_clear(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *orig_spec, + struct mlx5_esw_flow_attr *attr, + struct mlx5e_tc_mod_hdr_acts *mod_acts, + struct mlx5_flow_handle **flow_rule) +{ + struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); + struct mlx5_eswitch *esw = ct_priv->esw; + struct mlx5_esw_flow_attr *pre_ct_attr; + struct mlx5_modify_hdr *mod_hdr; + struct mlx5_flow_handle *rule; + struct mlx5_ct_flow *ct_flow; + int err; + + ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL); + if (!ct_flow) + return -ENOMEM; + + /* Base esw attributes on original rule attribute */ + pre_ct_attr = &ct_flow->pre_ct_attr; + memcpy(pre_ct_attr, attr, sizeof(*attr)); + + err = mlx5_tc_ct_entry_set_registers(ct_priv, mod_acts, 0, 0, 0, 0); + if (err) { + ct_dbg("Failed to set register for ct clear"); + goto err_set_registers; + } + + mod_hdr = mlx5_modify_header_alloc(esw->dev, + MLX5_FLOW_NAMESPACE_FDB, + mod_acts->num_actions, + mod_acts->actions); + if (IS_ERR(mod_hdr)) { + err = PTR_ERR(mod_hdr); + ct_dbg("Failed to add create ct clear mod hdr"); + goto err_set_registers; + } + + dealloc_mod_hdr_actions(mod_acts); + pre_ct_attr->modify_hdr = mod_hdr; + pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + + rule = mlx5_eswitch_add_offloaded_rule(esw, orig_spec, pre_ct_attr); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + ct_dbg("Failed to add ct clear rule"); + goto err_insert; + } + + attr->ct_attr.ct_flow = ct_flow; + ct_flow->pre_ct_rule = rule; + *flow_rule = rule; + + return 0; + +err_insert: + mlx5_modify_header_dealloc(priv->mdev, mod_hdr); +err_set_registers: + netdev_warn(priv->netdev, + "Failed to offload ct clear flow, err %d\n", err); + return err; +} + +struct mlx5_flow_handle * +mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec, + struct mlx5_esw_flow_attr *attr, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) +{ + bool clear_action = attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR; + struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); + struct mlx5_flow_handle *rule; + int err; + + if (!ct_priv) + return ERR_PTR(-EOPNOTSUPP); + + mutex_lock(&ct_priv->control_lock); + if (clear_action) + err = __mlx5_tc_ct_flow_offload_clear(priv, flow, spec, attr, + mod_hdr_acts, &rule); + else + err = __mlx5_tc_ct_flow_offload(priv, flow, spec, attr, + &rule); + mutex_unlock(&ct_priv->control_lock); + if (err) + return ERR_PTR(err); + + return rule; +} + +static void +__mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv, + struct mlx5_ct_flow *ct_flow) +{ + struct mlx5_esw_flow_attr *pre_ct_attr = &ct_flow->pre_ct_attr; + struct mlx5_eswitch *esw = ct_priv->esw; + + mlx5_eswitch_del_offloaded_rule(esw, ct_flow->pre_ct_rule, + pre_ct_attr); + mlx5_modify_header_dealloc(esw->dev, pre_ct_attr->modify_hdr); + + if (ct_flow->post_ct_rule) { + mlx5_eswitch_del_offloaded_rule(esw, ct_flow->post_ct_rule, + &ct_flow->post_ct_attr); + mlx5_esw_chains_put_chain_mapping(esw, ct_flow->chain_mapping); + idr_remove(&ct_priv->fte_ids, ct_flow->fte_id); + mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft); + } + + kfree(ct_flow); +} + +void +mlx5_tc_ct_delete_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, + struct mlx5_esw_flow_attr *attr) +{ + struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); + struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow; + + /* We are called on error to clean up stuff from parsing + * but we don't have anything for now + */ + if (!ct_flow) + return; + + mutex_lock(&ct_priv->control_lock); + __mlx5_tc_ct_delete_flow(ct_priv, ct_flow); + mutex_unlock(&ct_priv->control_lock); +} + +static int +mlx5_tc_ct_init_check_support(struct mlx5_eswitch *esw, + const char **err_msg) +{ +#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + /* cannot restore chain ID on HW miss */ + + *err_msg = "tc skb extension missing"; + return -EOPNOTSUPP; +#endif + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level)) { + *err_msg = "firmware level support is missing"; + return -EOPNOTSUPP; + } + + if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) { + /* vlan workaround should be avoided for multi chain rules. + * This is just a sanity check as pop vlan action should + * be supported by any FW that supports ignore_flow_level + */ + + *err_msg = "firmware vlan actions support is missing"; + return -EOPNOTSUPP; + } + + if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, + fdb_modify_header_fwd_to_table)) { + /* CT always writes to registers which are mod header actions. + * Therefore, mod header and goto is required + */ + + *err_msg = "firmware fwd and modify support is missing"; + return -EOPNOTSUPP; + } + + if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) { + *err_msg = "register loopback isn't supported"; + return -EOPNOTSUPP; + } + + return 0; +} + +static void +mlx5_tc_ct_init_err(struct mlx5e_rep_priv *rpriv, const char *msg, int err) +{ + if (msg) + netdev_warn(rpriv->netdev, + "tc ct offload not supported, %s, err: %d\n", + msg, err); + else + netdev_warn(rpriv->netdev, + "tc ct offload not supported, err: %d\n", + err); +} + +int +mlx5_tc_ct_init(struct mlx5_rep_uplink_priv *uplink_priv) +{ + struct mlx5_tc_ct_priv *ct_priv; + struct mlx5e_rep_priv *rpriv; + struct mlx5_eswitch *esw; + struct mlx5e_priv *priv; + const char *msg; + int err; + + rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv); + priv = netdev_priv(rpriv->netdev); + esw = priv->mdev->priv.eswitch; + + err = mlx5_tc_ct_init_check_support(esw, &msg); + if (err) { + mlx5_tc_ct_init_err(rpriv, msg, err); + goto err_support; + } + + ct_priv = kzalloc(sizeof(*ct_priv), GFP_KERNEL); + if (!ct_priv) { + mlx5_tc_ct_init_err(rpriv, NULL, -ENOMEM); + goto err_alloc; + } + + ct_priv->esw = esw; + ct_priv->netdev = rpriv->netdev; + ct_priv->ct = mlx5_esw_chains_create_global_table(esw); + if (IS_ERR(ct_priv->ct)) { + err = PTR_ERR(ct_priv->ct); + mlx5_tc_ct_init_err(rpriv, "failed to create ct table", err); + goto err_ct_tbl; + } + + ct_priv->ct_nat = mlx5_esw_chains_create_global_table(esw); + if (IS_ERR(ct_priv->ct_nat)) { + err = PTR_ERR(ct_priv->ct_nat); + mlx5_tc_ct_init_err(rpriv, "failed to create ct nat table", + err); + goto err_ct_nat_tbl; + } + + ct_priv->post_ct = mlx5_esw_chains_create_global_table(esw); + if (IS_ERR(ct_priv->post_ct)) { + err = PTR_ERR(ct_priv->post_ct); + mlx5_tc_ct_init_err(rpriv, "failed to create post ct table", + err); + goto err_post_ct_tbl; + } + + idr_init(&ct_priv->fte_ids); + idr_init(&ct_priv->tuple_ids); + mutex_init(&ct_priv->control_lock); + rhashtable_init(&ct_priv->zone_ht, &zone_params); + + /* Done, set ct_priv to know it initializted */ + uplink_priv->ct_priv = ct_priv; + + return 0; + +err_post_ct_tbl: + mlx5_esw_chains_destroy_global_table(esw, ct_priv->ct_nat); +err_ct_nat_tbl: + mlx5_esw_chains_destroy_global_table(esw, ct_priv->ct); +err_ct_tbl: + kfree(ct_priv); +err_alloc: +err_support: + + return 0; +} + +void +mlx5_tc_ct_clean(struct mlx5_rep_uplink_priv *uplink_priv) +{ + struct mlx5_tc_ct_priv *ct_priv = uplink_priv->ct_priv; + + if (!ct_priv) + return; + + mlx5_esw_chains_destroy_global_table(ct_priv->esw, ct_priv->post_ct); + mlx5_esw_chains_destroy_global_table(ct_priv->esw, ct_priv->ct_nat); + mlx5_esw_chains_destroy_global_table(ct_priv->esw, ct_priv->ct); + + rhashtable_destroy(&ct_priv->zone_ht); + mutex_destroy(&ct_priv->control_lock); + idr_destroy(&ct_priv->tuple_ids); + idr_destroy(&ct_priv->fte_ids); + kfree(ct_priv); + + uplink_priv->ct_priv = NULL; +} + +bool +mlx5e_tc_ct_restore_flow(struct mlx5_rep_uplink_priv *uplink_priv, + struct sk_buff *skb, u32 tupleid) +{ + struct mlx5_tc_ct_priv *ct_priv = uplink_priv->ct_priv; + struct mlx5_ct_zone_rule *zone_rule; + struct mlx5_ct_entry *entry; + + if (!ct_priv || !tupleid) + return true; + + zone_rule = idr_find(&ct_priv->tuple_ids, tupleid); + if (!zone_rule) + return false; + + entry = container_of(zone_rule, struct mlx5_ct_entry, + zone_rules[zone_rule->nat]); + tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie); + + return true; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h new file mode 100644 index 000000000000..091d305b633e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h @@ -0,0 +1,180 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2018 Mellanox Technologies. */ + +#ifndef __MLX5_EN_TC_CT_H__ +#define __MLX5_EN_TC_CT_H__ + +#include <net/pkt_cls.h> +#include <linux/mlx5/fs.h> +#include <net/tc_act/tc_ct.h> + +#include "en.h" + +struct mlx5_esw_flow_attr; +struct mlx5e_tc_mod_hdr_acts; +struct mlx5_rep_uplink_priv; +struct mlx5e_tc_flow; +struct mlx5e_priv; + +struct mlx5_ct_flow; + +struct nf_flowtable; + +struct mlx5_ct_attr { + u16 zone; + u16 ct_action; + struct mlx5_ct_flow *ct_flow; + struct nf_flowtable *nf_ft; +}; + +#define zone_to_reg_ct {\ + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_2,\ + .moffset = 0,\ + .mlen = 2,\ + .soffset = MLX5_BYTE_OFF(fte_match_param,\ + misc_parameters_2.metadata_reg_c_2) + 2,\ +} + +#define ctstate_to_reg_ct {\ + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_2,\ + .moffset = 2,\ + .mlen = 2,\ + .soffset = MLX5_BYTE_OFF(fte_match_param,\ + misc_parameters_2.metadata_reg_c_2),\ +} + +#define mark_to_reg_ct {\ + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_3,\ + .moffset = 0,\ + .mlen = 4,\ + .soffset = MLX5_BYTE_OFF(fte_match_param,\ + misc_parameters_2.metadata_reg_c_3),\ +} + +#define labels_to_reg_ct {\ + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_4,\ + .moffset = 0,\ + .mlen = 4,\ + .soffset = MLX5_BYTE_OFF(fte_match_param,\ + misc_parameters_2.metadata_reg_c_4),\ +} + +#define fteid_to_reg_ct {\ + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_5,\ + .moffset = 0,\ + .mlen = 4,\ + .soffset = MLX5_BYTE_OFF(fte_match_param,\ + misc_parameters_2.metadata_reg_c_5),\ +} + +#define tupleid_to_reg_ct {\ + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1,\ + .moffset = 0,\ + .mlen = 3,\ + .soffset = MLX5_BYTE_OFF(fte_match_param,\ + misc_parameters_2.metadata_reg_c_1),\ +} + +#define TUPLE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[TUPLEID_TO_REG].mlen * 8) +#define TUPLE_ID_MAX GENMASK(TUPLE_ID_BITS - 1, 0) + +#if IS_ENABLED(CONFIG_MLX5_TC_CT) + +int +mlx5_tc_ct_init(struct mlx5_rep_uplink_priv *uplink_priv); +void +mlx5_tc_ct_clean(struct mlx5_rep_uplink_priv *uplink_priv); + +int +mlx5_tc_ct_parse_match(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + struct netlink_ext_ack *extack); +int +mlx5_tc_ct_parse_action(struct mlx5e_priv *priv, + struct mlx5_esw_flow_attr *attr, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack); + +struct mlx5_flow_handle * +mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec, + struct mlx5_esw_flow_attr *attr, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts); +void +mlx5_tc_ct_delete_flow(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_esw_flow_attr *attr); + +bool +mlx5e_tc_ct_restore_flow(struct mlx5_rep_uplink_priv *uplink_priv, + struct sk_buff *skb, u32 tupleid); + +#else /* CONFIG_MLX5_TC_CT */ + +static inline int +mlx5_tc_ct_init(struct mlx5_rep_uplink_priv *uplink_priv) +{ + return 0; +} + +static inline void +mlx5_tc_ct_clean(struct mlx5_rep_uplink_priv *uplink_priv) +{ +} + +static inline int +mlx5_tc_ct_parse_match(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + struct netlink_ext_ack *extack) +{ + if (!flow_rule_match_key(f->rule, FLOW_DISSECTOR_KEY_CT)) + return 0; + + NL_SET_ERR_MSG_MOD(extack, "mlx5 tc ct offload isn't enabled."); + netdev_warn(priv->netdev, "mlx5 tc ct offload isn't enabled.\n"); + return -EOPNOTSUPP; +} + +static inline int +mlx5_tc_ct_parse_action(struct mlx5e_priv *priv, + struct mlx5_esw_flow_attr *attr, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack) +{ + NL_SET_ERR_MSG_MOD(extack, "mlx5 tc ct offload isn't enabled."); + netdev_warn(priv->netdev, "mlx5 tc ct offload isn't enabled.\n"); + return -EOPNOTSUPP; +} + +static inline struct mlx5_flow_handle * +mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec, + struct mlx5_esw_flow_attr *attr, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void +mlx5_tc_ct_delete_flow(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_esw_flow_attr *attr) +{ +} + +static inline bool +mlx5e_tc_ct_restore_flow(struct mlx5_rep_uplink_priv *uplink_priv, + struct sk_buff *skb, u32 tupleid) +{ + if (!tupleid) + return true; + + return false; +} + +#endif /* !IS_ENABLED(CONFIG_MLX5_TC_CT) */ +#endif /* __MLX5_EN_TC_CT_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index af4ebd2951b5..b45c3f46570b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -66,6 +66,9 @@ static int get_route_and_out_devs(struct mlx5e_priv *priv, mlx5e_is_uplink_rep(netdev_priv(*out_dev)))) return -EOPNOTSUPP; + if (mlx5e_eswitch_uplink_rep(priv->netdev) && *out_dev != priv->netdev) + return -EOPNOTSUPP; + return 0; } @@ -469,10 +472,15 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev, struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, struct flow_cls_offload *f, - void *headers_c, - void *headers_v, u8 *match_level) + u8 *match_level) { struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev); + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers); + void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers); + struct netlink_ext_ack *extack = f->common.extack; int err = 0; if (!tunnel) { @@ -499,6 +507,109 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev, goto out; } + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { + struct flow_match_control match; + u16 addr_type; + + flow_rule_match_enc_control(rule, &match); + addr_type = match.key->addr_type; + + /* For tunnel addr_type used same key id`s as for non-tunnel */ + if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + struct flow_match_ipv4_addrs match; + + flow_rule_match_enc_ipv4_addrs(rule, &match); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv4_layout.ipv4, + ntohl(match.mask->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv4_layout.ipv4, + ntohl(match.key->src)); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4, + ntohl(match.mask->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4, + ntohl(match.key->dst)); + + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, + ethertype); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, + ETH_P_IP); + } else if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct flow_match_ipv6_addrs match; + + flow_rule_match_enc_ipv6_addrs(rule, &match); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &match.mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, + ipv6)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &match.key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, + ipv6)); + + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &match.mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, + ipv6)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &match.key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, + ipv6)); + + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, + ethertype); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, + ETH_P_IPV6); + } + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) { + struct flow_match_ip match; + + flow_rule_match_enc_ip(rule, &match); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, + match.mask->tos & 0x3); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, + match.key->tos & 0x3); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, + match.mask->tos >> 2); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, + match.key->tos >> 2); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, + match.mask->ttl); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, + match.key->ttl); + + if (match.mask->ttl && + !MLX5_CAP_ESW_FLOWTABLE_FDB + (priv->mdev, + ft_field_support.outer_ipv4_ttl)) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on TTL is not supported"); + err = -EOPNOTSUPP; + goto out; + } + } + + /* Enforce DMAC when offloading incoming tunneled flows. + * Flow counters require a match on the DMAC. + */ + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_47_16); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_15_0); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dmac_47_16), priv->netdev->dev_addr); + + /* let software handle IP fragments */ + MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0); + + return 0; + out: return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h index 6f9a78c85ffd..1630f0ec3ad7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h @@ -76,8 +76,7 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev, struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, struct flow_cls_offload *f, - void *headers_c, - void *headers_v, u8 *match_level); + u8 *match_level); int mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index cf58c9637904..29626c6c9c25 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -433,7 +433,6 @@ void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv) if (!ipsec) return; - drain_workqueue(ipsec->wq); destroy_workqueue(ipsec->wq); ida_destroy(&ipsec->halloc); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index 2c75b2752f58..014639ea06e3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -175,28 +175,20 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv, struct mlx5e_tir *tir = priv->indir_tir; struct mlx5_flow_destination dest = {}; MLX5_DECLARE_FLOW_ACT(flow_act); - struct mlx5_flow_spec *spec; enum mlx5e_traffic_types tt; int err = 0; - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - if (!spec) { - err = -ENOMEM; - goto out; - } - dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; tt = arfs_get_tt(type); if (tt == -EINVAL) { netdev_err(priv->netdev, "%s: bad arfs_type: %d\n", __func__, type); - err = -EINVAL; - goto out; + return -EINVAL; } dest.tir_num = tir[tt].tirn; - arfs_t->default_rule = mlx5_add_flow_rules(arfs_t->ft.t, spec, + arfs_t->default_rule = mlx5_add_flow_rules(arfs_t->ft.t, NULL, &flow_act, &dest, 1); if (IS_ERR(arfs_t->default_rule)) { @@ -205,8 +197,7 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv, netdev_err(priv->netdev, "%s: add rule failed, arfs type=%d\n", __func__, type); } -out: - kvfree(spec); + return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index 01f2918063af..47874d34156b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -1098,49 +1098,59 @@ void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv) mlx5e_dcbnl_dscp_app(priv, DELETE); } -static void mlx5e_trust_update_tx_min_inline_mode(struct mlx5e_priv *priv, - struct mlx5e_params *params) +static void mlx5e_params_calc_trust_tx_min_inline_mode(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + u8 trust_state) { - mlx5_query_min_inline(priv->mdev, ¶ms->tx_min_inline_mode); - if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_DSCP && + mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); + if (trust_state == MLX5_QPTS_TRUST_DSCP && params->tx_min_inline_mode == MLX5_INLINE_MODE_L2) params->tx_min_inline_mode = MLX5_INLINE_MODE_IP; } -static void mlx5e_trust_update_sq_inline_mode(struct mlx5e_priv *priv) +static int mlx5e_update_trust_state_hw(struct mlx5e_priv *priv, void *context) +{ + u8 *trust_state = context; + int err; + + err = mlx5_set_trust_state(priv->mdev, *trust_state); + if (err) + return err; + priv->dcbx_dp.trust_state = *trust_state; + + return 0; +} + +static int mlx5e_set_trust_state(struct mlx5e_priv *priv, u8 trust_state) { struct mlx5e_channels new_channels = {}; + bool reset_channels = true; + int err = 0; mutex_lock(&priv->state_lock); new_channels.params = priv->channels.params; - mlx5e_trust_update_tx_min_inline_mode(priv, &new_channels.params); + mlx5e_params_calc_trust_tx_min_inline_mode(priv->mdev, &new_channels.params, + trust_state); if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { priv->channels.params = new_channels.params; - goto out; + reset_channels = false; } /* Skip if tx_min_inline is the same */ if (new_channels.params.tx_min_inline_mode == priv->channels.params.tx_min_inline_mode) - goto out; + reset_channels = false; - mlx5e_safe_switch_channels(priv, &new_channels, NULL); + if (reset_channels) + err = mlx5e_safe_switch_channels(priv, &new_channels, + mlx5e_update_trust_state_hw, + &trust_state); + else + err = mlx5e_update_trust_state_hw(priv, &trust_state); -out: mutex_unlock(&priv->state_lock); -} - -static int mlx5e_set_trust_state(struct mlx5e_priv *priv, u8 trust_state) -{ - int err; - - err = mlx5_set_trust_state(priv->mdev, trust_state); - if (err) - return err; - priv->dcbx_dp.trust_state = trust_state; - mlx5e_trust_update_sq_inline_mode(priv); return err; } @@ -1171,7 +1181,8 @@ static int mlx5e_trust_initialize(struct mlx5e_priv *priv) if (err) return err; - mlx5e_trust_update_tx_min_inline_mode(priv, &priv->channels.params); + mlx5e_params_calc_trust_tx_min_inline_mode(priv->mdev, &priv->channels.params, + priv->dcbx_dp.trust_state); err = mlx5_query_dscp2prio(priv->mdev, priv->dcbx_dp.dscp2prio); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index d674cb679895..6d703ddee4e2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -357,7 +357,7 @@ int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv, goto unlock; } - err = mlx5e_safe_switch_channels(priv, &new_channels, NULL); + err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL); unlock: mutex_unlock(&priv->state_lock); @@ -432,9 +432,7 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { *cur_params = new_channels.params; - if (!netif_is_rxfh_configured(priv->netdev)) - mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt, - MLX5E_INDIR_RQT_SIZE, count); + mlx5e_num_channels_changed(priv); goto out; } @@ -442,12 +440,9 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, if (arfs_enabled) mlx5e_arfs_disable(priv); - if (!netif_is_rxfh_configured(priv->netdev)) - mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt, - MLX5E_INDIR_RQT_SIZE, count); - /* Switch to new channels, set new parameters and close old ones */ - err = mlx5e_safe_switch_channels(priv, &new_channels, NULL); + err = mlx5e_safe_switch_channels(priv, &new_channels, + mlx5e_num_channels_changed_ctx, NULL); if (arfs_enabled) { int err2 = mlx5e_arfs_enable(priv); @@ -580,7 +575,7 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, goto out; } - err = mlx5e_safe_switch_channels(priv, &new_channels, NULL); + err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL); out: mutex_unlock(&priv->state_lock); @@ -633,6 +628,8 @@ static const u32 pplm_fec_2_ethtool[] = { [MLX5E_FEC_NOFEC] = ETHTOOL_FEC_OFF, [MLX5E_FEC_FIRECODE] = ETHTOOL_FEC_BASER, [MLX5E_FEC_RS_528_514] = ETHTOOL_FEC_RS, + [MLX5E_FEC_RS_544_514] = ETHTOOL_FEC_RS, + [MLX5E_FEC_LLRS_272_257_1] = ETHTOOL_FEC_LLRS, }; static u32 pplm2ethtool_fec(u_long fec_mode, unsigned long size) @@ -650,45 +647,48 @@ static u32 pplm2ethtool_fec(u_long fec_mode, unsigned long size) return 0; } -/* we use ETHTOOL_FEC_* offset and apply it to ETHTOOL_LINK_MODE_FEC_*_BIT */ -static u32 ethtool_fec2ethtool_caps(u_long ethtool_fec_code) -{ - u32 offset; - - offset = find_first_bit(ðtool_fec_code, sizeof(u32)); - offset -= ETHTOOL_FEC_OFF_BIT; - offset += ETHTOOL_LINK_MODE_FEC_NONE_BIT; +#define MLX5E_ADVERTISE_SUPPORTED_FEC(mlx5_fec, ethtool_fec) \ + do { \ + if (mlx5e_fec_in_caps(dev, 1 << (mlx5_fec))) \ + __set_bit(ethtool_fec, \ + link_ksettings->link_modes.supported);\ + } while (0) - return offset; -} +static const u32 pplm_fec_2_ethtool_linkmodes[] = { + [MLX5E_FEC_NOFEC] = ETHTOOL_LINK_MODE_FEC_NONE_BIT, + [MLX5E_FEC_FIRECODE] = ETHTOOL_LINK_MODE_FEC_BASER_BIT, + [MLX5E_FEC_RS_528_514] = ETHTOOL_LINK_MODE_FEC_RS_BIT, + [MLX5E_FEC_RS_544_514] = ETHTOOL_LINK_MODE_FEC_RS_BIT, + [MLX5E_FEC_LLRS_272_257_1] = ETHTOOL_LINK_MODE_FEC_LLRS_BIT, +}; static int get_fec_supported_advertised(struct mlx5_core_dev *dev, struct ethtool_link_ksettings *link_ksettings) { - u_long fec_caps = 0; - u32 active_fec = 0; - u32 offset; + u_long active_fec = 0; u32 bitn; int err; - err = mlx5e_get_fec_caps(dev, (u8 *)&fec_caps); + err = mlx5e_get_fec_mode(dev, (u32 *)&active_fec, NULL); if (err) return (err == -EOPNOTSUPP) ? 0 : err; - err = mlx5e_get_fec_mode(dev, &active_fec, NULL); - if (err) - return err; - - for_each_set_bit(bitn, &fec_caps, ARRAY_SIZE(pplm_fec_2_ethtool)) { - u_long ethtool_bitmask = pplm_fec_2_ethtool[bitn]; - - offset = ethtool_fec2ethtool_caps(ethtool_bitmask); - __set_bit(offset, link_ksettings->link_modes.supported); - } - - active_fec = pplm2ethtool_fec(active_fec, sizeof(u32) * BITS_PER_BYTE); - offset = ethtool_fec2ethtool_caps(active_fec); - __set_bit(offset, link_ksettings->link_modes.advertising); + MLX5E_ADVERTISE_SUPPORTED_FEC(MLX5E_FEC_NOFEC, + ETHTOOL_LINK_MODE_FEC_NONE_BIT); + MLX5E_ADVERTISE_SUPPORTED_FEC(MLX5E_FEC_FIRECODE, + ETHTOOL_LINK_MODE_FEC_BASER_BIT); + MLX5E_ADVERTISE_SUPPORTED_FEC(MLX5E_FEC_RS_528_514, + ETHTOOL_LINK_MODE_FEC_RS_BIT); + MLX5E_ADVERTISE_SUPPORTED_FEC(MLX5E_FEC_LLRS_272_257_1, + ETHTOOL_LINK_MODE_FEC_LLRS_BIT); + + /* active fec is a bit set, find out which bit is set and + * advertise the corresponding ethtool bit + */ + bitn = find_first_bit(&active_fec, sizeof(u32) * BITS_PER_BYTE); + if (bitn < ARRAY_SIZE(pplm_fec_2_ethtool_linkmodes)) + __set_bit(pplm_fec_2_ethtool_linkmodes[bitn], + link_ksettings->link_modes.advertising); return 0; } @@ -773,6 +773,7 @@ static void ptys2ethtool_supported_advertised_port(struct ethtool_link_ksettings static void get_speed_duplex(struct net_device *netdev, u32 eth_proto_oper, bool force_legacy, + u16 data_rate_oper, struct ethtool_link_ksettings *link_ksettings) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -784,7 +785,10 @@ static void get_speed_duplex(struct net_device *netdev, speed = mlx5e_port_ptys2speed(priv->mdev, eth_proto_oper, force_legacy); if (!speed) { - speed = SPEED_UNKNOWN; + if (data_rate_oper) + speed = 100 * data_rate_oper; + else + speed = SPEED_UNKNOWN; goto out; } @@ -873,17 +877,18 @@ int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv, struct ethtool_link_ksettings *link_ksettings) { struct mlx5_core_dev *mdev = priv->mdev; - u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0}; - u32 rx_pause = 0; - u32 tx_pause = 0; - u32 eth_proto_cap; + u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {}; u32 eth_proto_admin; - u32 eth_proto_lp; - u32 eth_proto_oper; u8 an_disable_admin; - u8 an_status; + u16 data_rate_oper; + u32 eth_proto_oper; + u32 eth_proto_cap; u8 connector_type; + u32 rx_pause = 0; + u32 tx_pause = 0; + u32 eth_proto_lp; bool admin_ext; + u8 an_status; bool ext; int err; @@ -917,6 +922,7 @@ int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv, an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin); an_status = MLX5_GET(ptys_reg, out, an_status); connector_type = MLX5_GET(ptys_reg, out, connector_type); + data_rate_oper = MLX5_GET(ptys_reg, out, data_rate_oper); mlx5_query_port_pause(mdev, &rx_pause, &tx_pause); @@ -927,7 +933,7 @@ int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv, get_advertising(eth_proto_admin, tx_pause, rx_pause, link_ksettings, admin_ext); get_speed_duplex(priv->netdev, eth_proto_oper, !admin_ext, - link_ksettings); + data_rate_oper, link_ksettings); eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap; @@ -1126,8 +1132,8 @@ static u32 mlx5e_get_rxfh_indir_size(struct net_device *netdev) return mlx5e_ethtool_get_rxfh_indir_size(priv); } -static int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, - u8 *hfunc) +int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, + u8 *hfunc) { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_rss_params *rss = &priv->rss_params; @@ -1146,8 +1152,8 @@ static int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, return 0; } -static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, - const u8 *key, const u8 hfunc) +int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, + const u8 *key, const u8 hfunc) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5e_rss_params *rss = &priv->rss_params; @@ -1511,7 +1517,7 @@ static int mlx5e_get_fecparam(struct net_device *netdev, { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; - u8 fec_configured = 0; + u16 fec_configured = 0; u32 fec_active = 0; int err; @@ -1527,7 +1533,7 @@ static int mlx5e_get_fecparam(struct net_device *netdev, return -EOPNOTSUPP; fecparam->fec = pplm2ethtool_fec((u_long)fec_configured, - sizeof(u8) * BITS_PER_BYTE); + sizeof(u16) * BITS_PER_BYTE); return 0; } @@ -1537,10 +1543,14 @@ static int mlx5e_set_fecparam(struct net_device *netdev, { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; - u8 fec_policy = 0; + u16 fec_policy = 0; int mode; int err; + if (bitmap_weight((unsigned long *)&fecparam->fec, + ETHTOOL_FEC_LLRS_BIT + 1) > 1) + return -EOPNOTSUPP; + for (mode = 0; mode < ARRAY_SIZE(pplm_fec_2_ethtool); mode++) { if (!(pplm_fec_2_ethtool[mode] & fecparam->fec)) continue; @@ -1739,7 +1749,7 @@ static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable, return 0; } - return mlx5e_safe_switch_channels(priv, &new_channels, NULL); + return mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL); } static int set_pflag_tx_cqe_based_moder(struct net_device *netdev, bool enable) @@ -1772,7 +1782,7 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val return 0; } - err = mlx5e_safe_switch_channels(priv, &new_channels, NULL); + err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL); if (err) return err; @@ -1829,7 +1839,7 @@ static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable) return 0; } - return mlx5e_safe_switch_channels(priv, &new_channels, NULL); + return mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL); } static int set_pflag_rx_no_csum_complete(struct net_device *netdev, bool enable) @@ -1873,7 +1883,7 @@ static int set_pflag_xdp_tx_mpwqe(struct net_device *netdev, bool enable) return 0; } - err = mlx5e_safe_switch_channels(priv, &new_channels, NULL); + err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL); return err; } @@ -1938,7 +1948,8 @@ static u32 mlx5e_get_priv_flags(struct net_device *netdev) return priv->channels.params.pflags; } -static int mlx5e_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rule_locs) +int mlx5e_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, + u32 *rule_locs) { struct mlx5e_priv *priv = netdev_priv(dev); @@ -1955,12 +1966,15 @@ static int mlx5e_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u return mlx5e_ethtool_get_rxnfc(dev, info, rule_locs); } -static int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) +int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) { return mlx5e_ethtool_set_rxnfc(dev, cmd); } const struct ethtool_ops mlx5e_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_MAX_FRAMES | + ETHTOOL_COALESCE_USE_ADAPTIVE, .get_drvinfo = mlx5e_get_drvinfo, .get_link = ethtool_op_get_link, .get_strings = mlx5e_get_strings, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 4ef3dc79f73c..dd7f338425eb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -63,6 +63,7 @@ #include "en/xsk/rx.h" #include "en/xsk/tx.h" #include "en/hv_vhca_stats.h" +#include "en/devlink.h" #include "lib/mlx5.h" @@ -1811,29 +1812,6 @@ static int mlx5e_set_tx_maxrate(struct net_device *dev, int index, u32 rate) return err; } -static int mlx5e_alloc_xps_cpumask(struct mlx5e_channel *c, - struct mlx5e_params *params) -{ - int num_comp_vectors = mlx5_comp_vectors_count(c->mdev); - int irq; - - if (!zalloc_cpumask_var(&c->xps_cpumask, GFP_KERNEL)) - return -ENOMEM; - - for (irq = c->ix; irq < num_comp_vectors; irq += params->num_channels) { - int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(c->mdev, irq)); - - cpumask_set_cpu(cpu, c->xps_cpumask); - } - - return 0; -} - -static void mlx5e_free_xps_cpumask(struct mlx5e_channel *c) -{ - free_cpumask_var(c->xps_cpumask); -} - static int mlx5e_open_queues(struct mlx5e_channel *c, struct mlx5e_params *params, struct mlx5e_channel_param *cparam) @@ -1984,10 +1962,6 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, c->irq_desc = irq_to_desc(irq); c->lag_port = mlx5e_enumerate_lag_port(priv->mdev, ix); - err = mlx5e_alloc_xps_cpumask(c, params); - if (err) - goto err_free_channel; - netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64); err = mlx5e_open_queues(c, params, cparam); @@ -2010,9 +1984,7 @@ err_close_queues: err_napi_del: netif_napi_del(&c->napi); - mlx5e_free_xps_cpumask(c); -err_free_channel: kvfree(c); return err; @@ -2026,7 +1998,6 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c) mlx5e_activate_txqsq(&c->sq[tc]); mlx5e_activate_icosq(&c->icosq); mlx5e_activate_rq(&c->rq); - netif_set_xps_queue(c->netdev, c->xps_cpumask, c->ix); if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) mlx5e_activate_xsk(c); @@ -2051,7 +2022,6 @@ static void mlx5e_close_channel(struct mlx5e_channel *c) mlx5e_close_xsk(c); mlx5e_close_queues(c); netif_napi_del(&c->napi); - mlx5e_free_xps_cpumask(c); kvfree(c); } @@ -2801,6 +2771,8 @@ free_in: return err; } +static MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_modify_tirs_lro); + static int mlx5e_set_mtu(struct mlx5_core_dev *mdev, struct mlx5e_params *params, u16 mtu) { @@ -2850,6 +2822,8 @@ int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv) return 0; } +MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_set_dev_port_mtu); + void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv) { struct mlx5e_params *params = &priv->channels.params; @@ -2886,6 +2860,54 @@ static void mlx5e_netdev_set_tcs(struct net_device *netdev) netdev_set_tc_queue(netdev, tc, nch, 0); } +static void mlx5e_update_netdev_queues(struct mlx5e_priv *priv, u16 count) +{ + int num_txqs = count * priv->channels.params.num_tc; + int num_rxqs = count * priv->profile->rq_groups; + struct net_device *netdev = priv->netdev; + + mlx5e_netdev_set_tcs(netdev); + netif_set_real_num_tx_queues(netdev, num_txqs); + netif_set_real_num_rx_queues(netdev, num_rxqs); +} + +static void mlx5e_set_default_xps_cpumasks(struct mlx5e_priv *priv, + struct mlx5e_params *params) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int num_comp_vectors, ix, irq; + + num_comp_vectors = mlx5_comp_vectors_count(mdev); + + for (ix = 0; ix < params->num_channels; ix++) { + cpumask_clear(priv->scratchpad.cpumask); + + for (irq = ix; irq < num_comp_vectors; irq += params->num_channels) { + int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(mdev, irq)); + + cpumask_set_cpu(cpu, priv->scratchpad.cpumask); + } + + netif_set_xps_queue(priv->netdev, priv->scratchpad.cpumask, ix); + } +} + +int mlx5e_num_channels_changed(struct mlx5e_priv *priv) +{ + u16 count = priv->channels.params.num_channels; + + mlx5e_update_netdev_queues(priv, count); + mlx5e_set_default_xps_cpumasks(priv, &priv->channels.params); + + if (!netif_is_rxfh_configured(priv->netdev)) + mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt, + MLX5E_INDIR_RQT_SIZE, count); + + return 0; +} + +MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_num_channels_changed); + static void mlx5e_build_txq_maps(struct mlx5e_priv *priv) { int i, ch; @@ -2907,14 +2929,6 @@ static void mlx5e_build_txq_maps(struct mlx5e_priv *priv) void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) { - int num_txqs = priv->channels.num * priv->channels.params.num_tc; - int num_rxqs = priv->channels.num * priv->profile->rq_groups; - struct net_device *netdev = priv->netdev; - - mlx5e_netdev_set_tcs(netdev); - netif_set_real_num_tx_queues(netdev, num_txqs); - netif_set_real_num_rx_queues(netdev, num_rxqs); - mlx5e_build_txq_maps(priv); mlx5e_activate_channels(&priv->channels); mlx5e_xdp_tx_enable(priv); @@ -2947,42 +2961,52 @@ void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv) mlx5e_deactivate_channels(&priv->channels); } -static void mlx5e_switch_priv_channels(struct mlx5e_priv *priv, - struct mlx5e_channels *new_chs, - mlx5e_fp_hw_modify hw_modify) +static int mlx5e_switch_priv_channels(struct mlx5e_priv *priv, + struct mlx5e_channels *new_chs, + mlx5e_fp_preactivate preactivate, + void *context) { struct net_device *netdev = priv->netdev; - int new_num_txqs; + struct mlx5e_channels old_chs; int carrier_ok; - - new_num_txqs = new_chs->num * new_chs->params.num_tc; + int err = 0; carrier_ok = netif_carrier_ok(netdev); netif_carrier_off(netdev); - if (new_num_txqs < netdev->real_num_tx_queues) - netif_set_real_num_tx_queues(netdev, new_num_txqs); - mlx5e_deactivate_priv_channels(priv); - mlx5e_close_channels(&priv->channels); + old_chs = priv->channels; priv->channels = *new_chs; - /* New channels are ready to roll, modify HW settings if needed */ - if (hw_modify) - hw_modify(priv); + /* New channels are ready to roll, call the preactivate hook if needed + * to modify HW settings or update kernel parameters. + */ + if (preactivate) { + err = preactivate(priv, context); + if (err) { + priv->channels = old_chs; + goto out; + } + } + mlx5e_close_channels(&old_chs); priv->profile->update_rx(priv); + +out: mlx5e_activate_priv_channels(priv); /* return carrier back if needed */ if (carrier_ok) netif_carrier_on(netdev); + + return err; } int mlx5e_safe_switch_channels(struct mlx5e_priv *priv, struct mlx5e_channels *new_chs, - mlx5e_fp_hw_modify hw_modify) + mlx5e_fp_preactivate preactivate, + void *context) { int err; @@ -2990,8 +3014,16 @@ int mlx5e_safe_switch_channels(struct mlx5e_priv *priv, if (err) return err; - mlx5e_switch_priv_channels(priv, new_chs, hw_modify); + err = mlx5e_switch_priv_channels(priv, new_chs, preactivate, context); + if (err) + goto err_close; + return 0; + +err_close: + mlx5e_close_channels(new_chs); + + return err; } int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv) @@ -2999,7 +3031,7 @@ int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv) struct mlx5e_channels new_channels = {}; new_channels.params = priv->channels.params; - return mlx5e_safe_switch_channels(priv, &new_channels, NULL); + return mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL); } void mlx5e_timestamp_init(struct mlx5e_priv *priv) @@ -3448,7 +3480,8 @@ static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv, goto out; } - err = mlx5e_safe_switch_channels(priv, &new_channels, NULL); + err = mlx5e_safe_switch_channels(priv, &new_channels, + mlx5e_num_channels_changed_ctx, NULL); if (err) goto out; @@ -3661,7 +3694,8 @@ static int set_feature_lro(struct net_device *netdev, bool enable) goto out; } - err = mlx5e_safe_switch_channels(priv, &new_channels, mlx5e_modify_tirs_lro); + err = mlx5e_safe_switch_channels(priv, &new_channels, + mlx5e_modify_tirs_lro_ctx, NULL); out: mutex_unlock(&priv->state_lock); return err; @@ -3880,7 +3914,7 @@ static bool mlx5e_xsk_validate_mtu(struct net_device *netdev, } int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, - change_hw_mtu_cb set_mtu_cb) + mlx5e_fp_preactivate preactivate) { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_channels new_channels = {}; @@ -3929,13 +3963,13 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, if (!reset) { params->sw_mtu = new_mtu; - if (set_mtu_cb) - set_mtu_cb(priv); + if (preactivate) + preactivate(priv, NULL); netdev->mtu = params->sw_mtu; goto out; } - err = mlx5e_safe_switch_channels(priv, &new_channels, set_mtu_cb); + err = mlx5e_safe_switch_channels(priv, &new_channels, preactivate, NULL); if (err) goto out; @@ -3948,7 +3982,7 @@ out: static int mlx5e_change_nic_mtu(struct net_device *netdev, int new_mtu) { - return mlx5e_change_mtu(netdev, new_mtu, mlx5e_set_dev_port_mtu); + return mlx5e_change_mtu(netdev, new_mtu, mlx5e_set_dev_port_mtu_ctx); } int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) @@ -4409,7 +4443,7 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) mlx5e_set_rq_type(priv->mdev, &new_channels.params); old_prog = priv->channels.params.xdp_prog; - err = mlx5e_safe_switch_channels(priv, &new_channels, NULL); + err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL); if (err) goto unlock; } else { @@ -4589,6 +4623,7 @@ const struct net_device_ops mlx5e_netdev_ops = { .ndo_set_vf_link_state = mlx5e_set_vf_link_state, .ndo_get_vf_stats = mlx5e_get_vf_stats, #endif + .ndo_get_devlink_port = mlx5e_get_devlink_port, }; static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev) @@ -4787,9 +4822,8 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, mlx5e_build_rq_params(mdev, params); /* HW LRO */ - - /* TODO: && MLX5_CAP_ETH(mdev, lro_cap) */ - if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { + if (MLX5_CAP_ETH(mdev, lro_cap) && + params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { /* No XSK params: checking the availability of striding RQ in general. */ if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL)) params->lro_en = !slow_pci_heuristic(mdev); @@ -5230,6 +5264,9 @@ int mlx5e_netdev_init(struct net_device *netdev, priv->max_nch = netdev->num_rx_queues / max_t(u8, profile->rq_groups, 1); priv->max_opened_tc = 1; + if (!alloc_cpumask_var(&priv->scratchpad.cpumask, GFP_KERNEL)) + return -ENOMEM; + mutex_init(&priv->state_lock); INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work); INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work); @@ -5238,7 +5275,7 @@ int mlx5e_netdev_init(struct net_device *netdev, priv->wq = create_singlethread_workqueue("mlx5e"); if (!priv->wq) - return -ENOMEM; + goto err_free_cpumask; /* netdev init */ netif_carrier_off(netdev); @@ -5248,11 +5285,17 @@ int mlx5e_netdev_init(struct net_device *netdev, #endif return 0; + +err_free_cpumask: + free_cpumask_var(priv->scratchpad.cpumask); + + return -ENOMEM; } void mlx5e_netdev_cleanup(struct net_device *netdev, struct mlx5e_priv *priv) { destroy_workqueue(priv->wq); + free_cpumask_var(priv->scratchpad.cpumask); } struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev, @@ -5287,6 +5330,7 @@ err_free_netdev: int mlx5e_attach_netdev(struct mlx5e_priv *priv) { + const bool take_rtnl = priv->netdev->reg_state == NETREG_REGISTERED; const struct mlx5e_profile *profile; int max_nch; int err; @@ -5298,10 +5342,25 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv) max_nch = mlx5e_get_max_num_channels(priv->mdev); if (priv->channels.params.num_channels > max_nch) { mlx5_core_warn(priv->mdev, "MLX5E: Reducing number of channels to %d\n", max_nch); + /* Reducing the number of channels - RXFH has to be reset, and + * mlx5e_num_channels_changed below will build the RQT. + */ + priv->netdev->priv_flags &= ~IFF_RXFH_CONFIGURED; priv->channels.params.num_channels = max_nch; - mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt, - MLX5E_INDIR_RQT_SIZE, max_nch); } + /* 1. Set the real number of queues in the kernel the first time. + * 2. Set our default XPS cpumask. + * 3. Build the RQT. + * + * rtnl_lock is required by netif_set_real_num_*_queues in case the + * netdev has been registered by this point (if this function was called + * in the reload or resume flow). + */ + if (take_rtnl) + rtnl_lock(); + mlx5e_num_channels_changed(priv); + if (take_rtnl) + rtnl_unlock(); err = profile->init_tx(priv); if (err) @@ -5425,17 +5484,27 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev) goto err_destroy_netdev; } + err = mlx5e_devlink_port_register(priv); + if (err) { + mlx5_core_err(mdev, "mlx5e_devlink_port_register failed, %d\n", err); + goto err_detach; + } + err = register_netdev(netdev); if (err) { mlx5_core_err(mdev, "register_netdev failed, %d\n", err); - goto err_detach; + goto err_devlink_port_unregister; } + mlx5e_devlink_port_type_eth_set(priv); + #ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_dcbnl_init_app(priv); #endif return priv; +err_devlink_port_unregister: + mlx5e_devlink_port_unregister(priv); err_detach: mlx5e_detach(mdev, priv); err_destroy_netdev: @@ -5457,6 +5526,7 @@ static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv) #ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_dcbnl_delete_app(priv); #endif + mlx5e_devlink_port_unregister(priv); unregister_netdev(priv->netdev); mlx5e_detach(mdev, vpriv); mlx5e_destroy_netdev(priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 6ed307d7f191..2a0243e4af75 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -41,7 +41,7 @@ #include <net/ipv6_stubs.h> #include "eswitch.h" -#include "eswitch_offloads_chains.h" +#include "esw/chains.h" #include "en.h" #include "en_rep.h" #include "en_tc.h" @@ -192,7 +192,8 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vport_rep) err = mlx5_eswitch_get_vport_stats(esw, rep->vport, &vf_stats); if (err) { - pr_warn("vport %d error %d reading stats\n", rep->vport, err); + netdev_warn(priv->netdev, "vport %d error %d reading stats\n", + rep->vport, err); return; } @@ -252,25 +253,6 @@ static int mlx5e_rep_set_ringparam(struct net_device *dev, return mlx5e_ethtool_set_ringparam(priv, param); } -static int mlx5e_replace_rep_vport_rx_rule(struct mlx5e_priv *priv, - struct mlx5_flow_destination *dest) -{ - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5e_rep_priv *rpriv = priv->ppriv; - struct mlx5_eswitch_rep *rep = rpriv->rep; - struct mlx5_flow_handle *flow_rule; - - flow_rule = mlx5_eswitch_create_vport_rx_rule(esw, - rep->vport, - dest); - if (IS_ERR(flow_rule)) - return PTR_ERR(flow_rule); - - mlx5_del_flow_rules(rpriv->vport_rx_rule); - rpriv->vport_rx_rule = flow_rule; - return 0; -} - static void mlx5e_rep_get_channels(struct net_device *dev, struct ethtool_channels *ch) { @@ -283,33 +265,8 @@ static int mlx5e_rep_set_channels(struct net_device *dev, struct ethtool_channels *ch) { struct mlx5e_priv *priv = netdev_priv(dev); - u16 curr_channels_amount = priv->channels.params.num_channels; - u32 new_channels_amount = ch->combined_count; - struct mlx5_flow_destination new_dest; - int err = 0; - - err = mlx5e_ethtool_set_channels(priv, ch); - if (err) - return err; - if (curr_channels_amount == 1 && new_channels_amount > 1) { - new_dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - new_dest.ft = priv->fs.ttc.ft.t; - } else if (new_channels_amount == 1 && curr_channels_amount > 1) { - new_dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; - new_dest.tir_num = priv->direct_tir[0].tirn; - } else { - return 0; - } - - err = mlx5e_replace_rep_vport_rx_rule(priv, &new_dest); - if (err) { - netdev_warn(priv->netdev, "Failed to update vport rx rule, when going from (%d) channels to (%d) channels\n", - curr_channels_amount, new_channels_amount); - return err; - } - - return 0; + return mlx5e_ethtool_set_channels(priv, ch); } static int mlx5e_rep_get_coalesce(struct net_device *netdev, @@ -375,6 +332,9 @@ static int mlx5e_uplink_rep_set_link_ksettings(struct net_device *netdev, } static const struct ethtool_ops mlx5e_rep_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_MAX_FRAMES | + ETHTOOL_COALESCE_USE_ADAPTIVE, .get_drvinfo = mlx5e_rep_get_drvinfo, .get_link = ethtool_op_get_link, .get_strings = mlx5e_rep_get_strings, @@ -391,6 +351,9 @@ static const struct ethtool_ops mlx5e_rep_ethtool_ops = { }; static const struct ethtool_ops mlx5e_uplink_rep_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_MAX_FRAMES | + ETHTOOL_COALESCE_USE_ADAPTIVE, .get_drvinfo = mlx5e_uplink_rep_get_drvinfo, .get_link = ethtool_op_get_link, .get_strings = mlx5e_rep_get_strings, @@ -406,6 +369,10 @@ static const struct ethtool_ops mlx5e_uplink_rep_ethtool_ops = { .set_link_ksettings = mlx5e_uplink_rep_set_link_ksettings, .get_rxfh_key_size = mlx5e_rep_get_rxfh_key_size, .get_rxfh_indir_size = mlx5e_rep_get_rxfh_indir_size, + .get_rxfh = mlx5e_get_rxfh, + .set_rxfh = mlx5e_set_rxfh, + .get_rxnfc = mlx5e_get_rxnfc, + .set_rxnfc = mlx5e_set_rxnfc, .get_pauseparam = mlx5e_uplink_rep_get_pauseparam, .set_pauseparam = mlx5e_uplink_rep_set_pauseparam, }; @@ -727,9 +694,9 @@ static void mlx5e_rep_indr_clean_block_privs(struct mlx5e_rep_priv *rpriv) static int mlx5e_rep_indr_offload(struct net_device *netdev, struct flow_cls_offload *flower, - struct mlx5e_rep_indr_block_priv *indr_priv) + struct mlx5e_rep_indr_block_priv *indr_priv, + unsigned long flags) { - unsigned long flags = MLX5_TC_FLAG(EGRESS) | MLX5_TC_FLAG(ESW_OFFLOAD); struct mlx5e_priv *priv = netdev_priv(indr_priv->rpriv->netdev); int err = 0; @@ -750,20 +717,68 @@ mlx5e_rep_indr_offload(struct net_device *netdev, return err; } -static int mlx5e_rep_indr_setup_block_cb(enum tc_setup_type type, - void *type_data, void *indr_priv) +static int mlx5e_rep_indr_setup_tc_cb(enum tc_setup_type type, + void *type_data, void *indr_priv) { + unsigned long flags = MLX5_TC_FLAG(EGRESS) | MLX5_TC_FLAG(ESW_OFFLOAD); struct mlx5e_rep_indr_block_priv *priv = indr_priv; switch (type) { case TC_SETUP_CLSFLOWER: - return mlx5e_rep_indr_offload(priv->netdev, type_data, priv); + return mlx5e_rep_indr_offload(priv->netdev, type_data, priv, + flags); default: return -EOPNOTSUPP; } } -static void mlx5e_rep_indr_tc_block_unbind(void *cb_priv) +static int mlx5e_rep_indr_setup_ft_cb(enum tc_setup_type type, + void *type_data, void *indr_priv) +{ + struct mlx5e_rep_indr_block_priv *priv = indr_priv; + struct flow_cls_offload *f = type_data; + struct flow_cls_offload tmp; + struct mlx5e_priv *mpriv; + struct mlx5_eswitch *esw; + unsigned long flags; + int err; + + mpriv = netdev_priv(priv->rpriv->netdev); + esw = mpriv->mdev->priv.eswitch; + + flags = MLX5_TC_FLAG(EGRESS) | + MLX5_TC_FLAG(ESW_OFFLOAD) | + MLX5_TC_FLAG(FT_OFFLOAD); + + switch (type) { + case TC_SETUP_CLSFLOWER: + memcpy(&tmp, f, sizeof(*f)); + + /* Re-use tc offload path by moving the ft flow to the + * reserved ft chain. + * + * FT offload can use prio range [0, INT_MAX], so we normalize + * it to range [1, mlx5_esw_chains_get_prio_range(esw)] + * as with tc, where prio 0 isn't supported. + * + * We only support chain 0 of FT offload. + */ + if (!mlx5_esw_chains_prios_supported(esw) || + tmp.common.prio >= mlx5_esw_chains_get_prio_range(esw) || + tmp.common.chain_index) + return -EOPNOTSUPP; + + tmp.common.chain_index = mlx5_esw_chains_get_ft_chain(esw); + tmp.common.prio++; + err = mlx5e_rep_indr_offload(priv->netdev, &tmp, priv, flags); + memcpy(&f->stats, &tmp.stats, sizeof(f->stats)); + return err; + default: + return -EOPNOTSUPP; + } +} + +static void mlx5e_rep_indr_block_unbind(void *cb_priv) { struct mlx5e_rep_indr_block_priv *indr_priv = cb_priv; @@ -774,9 +789,10 @@ static void mlx5e_rep_indr_tc_block_unbind(void *cb_priv) static LIST_HEAD(mlx5e_block_cb_list); static int -mlx5e_rep_indr_setup_tc_block(struct net_device *netdev, - struct mlx5e_rep_priv *rpriv, - struct flow_block_offload *f) +mlx5e_rep_indr_setup_block(struct net_device *netdev, + struct mlx5e_rep_priv *rpriv, + struct flow_block_offload *f, + flow_setup_cb_t *setup_cb) { struct mlx5e_rep_indr_block_priv *indr_priv; struct flow_block_cb *block_cb; @@ -802,9 +818,8 @@ mlx5e_rep_indr_setup_tc_block(struct net_device *netdev, list_add(&indr_priv->list, &rpriv->uplink_priv.tc_indr_block_priv_list); - block_cb = flow_block_cb_alloc(mlx5e_rep_indr_setup_block_cb, - indr_priv, indr_priv, - mlx5e_rep_indr_tc_block_unbind); + block_cb = flow_block_cb_alloc(setup_cb, indr_priv, indr_priv, + mlx5e_rep_indr_block_unbind); if (IS_ERR(block_cb)) { list_del(&indr_priv->list); kfree(indr_priv); @@ -819,9 +834,7 @@ mlx5e_rep_indr_setup_tc_block(struct net_device *netdev, if (!indr_priv) return -ENOENT; - block_cb = flow_block_cb_lookup(f->block, - mlx5e_rep_indr_setup_block_cb, - indr_priv); + block_cb = flow_block_cb_lookup(f->block, setup_cb, indr_priv); if (!block_cb) return -ENOENT; @@ -835,13 +848,16 @@ mlx5e_rep_indr_setup_tc_block(struct net_device *netdev, } static -int mlx5e_rep_indr_setup_tc_cb(struct net_device *netdev, void *cb_priv, - enum tc_setup_type type, void *type_data) +int mlx5e_rep_indr_setup_cb(struct net_device *netdev, void *cb_priv, + enum tc_setup_type type, void *type_data) { switch (type) { case TC_SETUP_BLOCK: - return mlx5e_rep_indr_setup_tc_block(netdev, cb_priv, - type_data); + return mlx5e_rep_indr_setup_block(netdev, cb_priv, type_data, + mlx5e_rep_indr_setup_tc_cb); + case TC_SETUP_FT: + return mlx5e_rep_indr_setup_block(netdev, cb_priv, type_data, + mlx5e_rep_indr_setup_ft_cb); default: return -EOPNOTSUPP; } @@ -853,7 +869,7 @@ static int mlx5e_rep_indr_register_block(struct mlx5e_rep_priv *rpriv, int err; err = __flow_indr_block_cb_register(netdev, rpriv, - mlx5e_rep_indr_setup_tc_cb, + mlx5e_rep_indr_setup_cb, rpriv); if (err) { struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); @@ -867,7 +883,7 @@ static int mlx5e_rep_indr_register_block(struct mlx5e_rep_priv *rpriv, static void mlx5e_rep_indr_unregister_block(struct mlx5e_rep_priv *rpriv, struct net_device *netdev) { - __flow_indr_block_cb_unregister(netdev, mlx5e_rep_indr_setup_tc_cb, + __flow_indr_block_cb_unregister(netdev, mlx5e_rep_indr_setup_cb, rpriv); } @@ -1279,8 +1295,7 @@ static int mlx5e_rep_setup_ft_cb(enum tc_setup_type type, void *type_data, case TC_SETUP_CLSFLOWER: memcpy(&tmp, f, sizeof(*f)); - if (!mlx5_esw_chains_prios_supported(esw) || - tmp.common.chain_index) + if (!mlx5_esw_chains_prios_supported(esw)) return -EOPNOTSUPP; /* Re-use tc offload path by moving the ft flow to the @@ -1396,7 +1411,7 @@ static int mlx5e_rep_change_mtu(struct net_device *netdev, int new_mtu) static int mlx5e_uplink_rep_change_mtu(struct net_device *netdev, int new_mtu) { - return mlx5e_change_mtu(netdev, new_mtu, mlx5e_set_dev_port_mtu); + return mlx5e_change_mtu(netdev, new_mtu, mlx5e_set_dev_port_mtu_ctx); } static int mlx5e_uplink_rep_set_mac(struct net_device *netdev, void *addr) @@ -1422,7 +1437,7 @@ static int mlx5e_uplink_rep_set_vf_vlan(struct net_device *dev, int vf, u16 vlan return 0; } -static struct devlink_port *mlx5e_get_devlink_port(struct net_device *dev) +static struct devlink_port *mlx5e_rep_get_devlink_port(struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5e_rep_priv *rpriv = priv->ppriv; @@ -1435,7 +1450,7 @@ static const struct net_device_ops mlx5e_netdev_ops_rep = { .ndo_stop = mlx5e_rep_close, .ndo_start_xmit = mlx5e_xmit, .ndo_setup_tc = mlx5e_rep_setup_tc, - .ndo_get_devlink_port = mlx5e_get_devlink_port, + .ndo_get_devlink_port = mlx5e_rep_get_devlink_port, .ndo_get_stats64 = mlx5e_rep_get_stats, .ndo_has_offload_stats = mlx5e_rep_has_offload_stats, .ndo_get_offload_stats = mlx5e_rep_get_offload_stats, @@ -1448,7 +1463,7 @@ static const struct net_device_ops mlx5e_netdev_ops_uplink_rep = { .ndo_start_xmit = mlx5e_xmit, .ndo_set_mac_address = mlx5e_uplink_rep_set_mac, .ndo_setup_tc = mlx5e_rep_setup_tc, - .ndo_get_devlink_port = mlx5e_get_devlink_port, + .ndo_get_devlink_port = mlx5e_rep_get_devlink_port, .ndo_get_stats64 = mlx5e_get_stats, .ndo_has_offload_stats = mlx5e_rep_has_offload_stats, .ndo_get_offload_stats = mlx5e_rep_get_offload_stats, @@ -1464,6 +1479,11 @@ static const struct net_device_ops mlx5e_netdev_ops_uplink_rep = { .ndo_set_features = mlx5e_set_features, }; +bool mlx5e_eswitch_uplink_rep(struct net_device *netdev) +{ + return netdev->netdev_ops == &mlx5e_netdev_ops_uplink_rep; +} + bool mlx5e_eswitch_rep(struct net_device *netdev) { if (netdev->netdev_ops == &mlx5e_netdev_ops_rep || @@ -1584,6 +1604,8 @@ static void mlx5e_cleanup_rep(struct mlx5e_priv *priv) static int mlx5e_create_rep_ttc_table(struct mlx5e_priv *priv) { + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; struct ttc_params ttc_params = {}; int tt, err; @@ -1593,6 +1615,11 @@ static int mlx5e_create_rep_ttc_table(struct mlx5e_priv *priv) /* The inner_ttc in the ttc params is intentionally not set */ ttc_params.any_tt_tirn = priv->direct_tir[0].tirn; mlx5e_set_ttc_ft_params(&ttc_params); + + if (rep->vport != MLX5_VPORT_UPLINK) + /* To give uplik rep TTC a lower level for chaining from root ft */ + ttc_params.ft_attr.level = MLX5E_TTC_FT_LEVEL + 1; + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) ttc_params.indir_tirn[tt] = priv->indir_tir[tt].tirn; @@ -1604,6 +1631,52 @@ static int mlx5e_create_rep_ttc_table(struct mlx5e_priv *priv) return 0; } +static int mlx5e_create_rep_root_ft(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_namespace *ns; + int err = 0; + + if (rep->vport != MLX5_VPORT_UPLINK) { + /* non uplik reps will skip any bypass tables and go directly to + * their own ttc + */ + rpriv->root_ft = priv->fs.ttc.ft.t; + return 0; + } + + /* uplink root ft will be used to auto chain, to ethtool or ttc tables */ + ns = mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_OFFLOADS); + if (!ns) { + netdev_err(priv->netdev, "Failed to get reps offloads namespace\n"); + return -EOPNOTSUPP; + } + + ft_attr.max_fte = 0; /* Empty table, miss rule will always point to next table */ + ft_attr.prio = 1; + ft_attr.level = 1; + + rpriv->root_ft = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(rpriv->root_ft)) { + err = PTR_ERR(rpriv->root_ft); + rpriv->root_ft = NULL; + } + + return err; +} + +static void mlx5e_destroy_rep_root_ft(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + + if (rep->vport != MLX5_VPORT_UPLINK) + return; + mlx5_destroy_flow_table(rpriv->root_ft); +} + static int mlx5e_create_rep_vport_rx_rule(struct mlx5e_priv *priv) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; @@ -1612,11 +1685,10 @@ static int mlx5e_create_rep_vport_rx_rule(struct mlx5e_priv *priv) struct mlx5_flow_handle *flow_rule; struct mlx5_flow_destination dest; - dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; - dest.tir_num = priv->direct_tir[0].tirn; - flow_rule = mlx5_eswitch_create_vport_rx_rule(esw, - rep->vport, - &dest); + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = rpriv->root_ft; + + flow_rule = mlx5_eswitch_create_vport_rx_rule(esw, rep->vport, &dest); if (IS_ERR(flow_rule)) return PTR_ERR(flow_rule); rpriv->vport_rx_rule = flow_rule; @@ -1656,12 +1728,20 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) if (err) goto err_destroy_direct_tirs; - err = mlx5e_create_rep_vport_rx_rule(priv); + err = mlx5e_create_rep_root_ft(priv); if (err) goto err_destroy_ttc_table; + err = mlx5e_create_rep_vport_rx_rule(priv); + if (err) + goto err_destroy_root_ft; + + mlx5e_ethtool_init_steering(priv); + return 0; +err_destroy_root_ft: + mlx5e_destroy_rep_root_ft(priv); err_destroy_ttc_table: mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); err_destroy_direct_tirs: @@ -1682,6 +1762,7 @@ static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv) struct mlx5e_rep_priv *rpriv = priv->ppriv; mlx5_del_flow_rules(rpriv->vport_rx_rule); + mlx5e_destroy_rep_root_ft(priv); mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); mlx5e_destroy_direct_tirs(priv, priv->direct_tir); mlx5e_destroy_indirect_tirs(priv, false); @@ -1920,7 +2001,7 @@ static const struct mlx5e_profile mlx5e_rep_profile = { .update_rx = mlx5e_update_rep_rx, .update_stats = mlx5e_update_ndo_stats, .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep, - .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq, + .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq_rep, .max_tc = 1, .rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR), .stats_grps = mlx5e_rep_stats_grps, @@ -1940,7 +2021,7 @@ static const struct mlx5e_profile mlx5e_uplink_rep_profile = { .update_stats = mlx5e_update_ndo_stats, .update_carrier = mlx5e_update_carrier, .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep, - .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq, + .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq_rep, .max_tc = MLX5E_MAX_NUM_TC, .rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR), .stats_grps = mlx5e_ul_rep_stats_grps, @@ -2026,8 +2107,9 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) &mlx5e_uplink_rep_profile : &mlx5e_rep_profile; netdev = mlx5e_create_netdev(dev, profile, nch, rpriv); if (!netdev) { - pr_warn("Failed to create representor netdev for vport %d\n", - rep->vport); + mlx5_core_warn(dev, + "Failed to create representor netdev for vport %d\n", + rep->vport); kfree(rpriv); return -EINVAL; } @@ -2045,29 +2127,32 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) err = mlx5e_attach_netdev(netdev_priv(netdev)); if (err) { - pr_warn("Failed to attach representor netdev for vport %d\n", - rep->vport); + netdev_warn(netdev, + "Failed to attach representor netdev for vport %d\n", + rep->vport); goto err_destroy_mdev_resources; } err = mlx5e_rep_neigh_init(rpriv); if (err) { - pr_warn("Failed to initialized neighbours handling for vport %d\n", - rep->vport); + netdev_warn(netdev, + "Failed to initialized neighbours handling for vport %d\n", + rep->vport); goto err_detach_netdev; } err = register_devlink_port(dev, rpriv); if (err) { - esw_warn(dev, "Failed to register devlink port %d\n", - rep->vport); + netdev_warn(netdev, "Failed to register devlink port %d\n", + rep->vport); goto err_neigh_cleanup; } err = register_netdev(netdev); if (err) { - pr_warn("Failed to register representor netdev for vport %d\n", - rep->vport); + netdev_warn(netdev, + "Failed to register representor netdev for vport %d\n", + rep->vport); goto err_devlink_cleanup; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index 3f756d51435f..6a2337900420 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -55,6 +55,7 @@ struct mlx5e_neigh_update_table { unsigned long min_interval; /* jiffies */ }; +struct mlx5_tc_ct_priv; struct mlx5_rep_uplink_priv { /* Filters DB - instantiated by the uplink representor and shared by * the uplink's VFs @@ -81,12 +82,20 @@ struct mlx5_rep_uplink_priv { struct mutex unready_flows_lock; struct list_head unready_flows; struct work_struct reoffload_flows_work; + + /* maps tun_info to a unique id*/ + struct mapping_ctx *tunnel_mapping; + /* maps tun_enc_opts to a unique id*/ + struct mapping_ctx *tunnel_enc_opts_mapping; + + struct mlx5_tc_ct_priv *ct_priv; }; struct mlx5e_rep_priv { struct mlx5_eswitch_rep *rep; struct mlx5e_neigh_update_table neigh_update; struct net_device *netdev; + struct mlx5_flow_table *root_ft; struct mlx5_flow_handle *vport_rx_rule; struct list_head vport_sqs_list; struct mlx5_rep_uplink_priv uplink_priv; /* valid for uplink rep */ @@ -191,6 +200,8 @@ int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv); void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv); void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); +void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe); int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e); @@ -200,6 +211,7 @@ void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv, void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv); bool mlx5e_eswitch_rep(struct net_device *netdev); +bool mlx5e_eswitch_uplink_rep(struct net_device *netdev); #else /* CONFIG_MLX5_ESWITCH */ static inline bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) { return false; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 312d4692425b..6173faf542b0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -158,7 +158,8 @@ static inline u32 mlx5e_decompress_cqes_cont(struct mlx5e_rq *rq, mlx5e_read_mini_arr_slot(wq, cqd, cqcc); mlx5e_decompress_cqe_no_hash(rq, wq, cqcc); - rq->handle_rx_cqe(rq, &cqd->title); + INDIRECT_CALL_2(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq, + mlx5e_handle_rx_cqe, rq, &cqd->title); } mlx5e_cqes_update_owner(wq, cqcc - wq->cc); wq->cc = cqcc; @@ -178,7 +179,8 @@ static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq, mlx5e_read_title_slot(rq, wq, cc); mlx5e_read_mini_arr_slot(wq, cqd, cc + 1); mlx5e_decompress_cqe(rq, wq, cc); - rq->handle_rx_cqe(rq, &cqd->title); + INDIRECT_CALL_2(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq, + mlx5e_handle_rx_cqe, rq, &cqd->title); cqd->mini_arr_idx++; return mlx5e_decompress_cqes_cont(rq, wq, 1, budget_rem) - 1; @@ -1192,6 +1194,7 @@ void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch_rep *rep = rpriv->rep; + struct mlx5e_tc_update_priv tc_priv = {}; struct mlx5_wq_cyc *wq = &rq->wqe.wq; struct mlx5e_wqe_frag_info *wi; struct sk_buff *skb; @@ -1224,13 +1227,78 @@ void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) if (rep->vlan && skb_vlan_tag_present(skb)) skb_vlan_pop(skb); + if (!mlx5e_tc_rep_update_skb(cqe, skb, &tc_priv)) + goto free_wqe; + napi_gro_receive(rq->cq.napi, skb); + mlx5_tc_rep_post_napi_receive(&tc_priv); + free_wqe: mlx5e_free_rx_wqe(rq, wi, true); wq_cyc_pop: mlx5_wq_cyc_pop(wq); } + +void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe) +{ + u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe); + u16 wqe_id = be16_to_cpu(cqe->wqe_id); + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[wqe_id]; + u16 stride_ix = mpwrq_get_cqe_stride_index(cqe); + u32 wqe_offset = stride_ix << rq->mpwqe.log_stride_sz; + u32 head_offset = wqe_offset & (PAGE_SIZE - 1); + u32 page_idx = wqe_offset >> PAGE_SHIFT; + struct mlx5e_tc_update_priv tc_priv = {}; + struct mlx5e_rx_wqe_ll *wqe; + struct mlx5_wq_ll *wq; + struct sk_buff *skb; + u16 cqe_bcnt; + + wi->consumed_strides += cstrides; + + if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { + trigger_report(rq, cqe); + rq->stats->wqe_err++; + goto mpwrq_cqe_out; + } + + if (unlikely(mpwrq_is_filler_cqe(cqe))) { + struct mlx5e_rq_stats *stats = rq->stats; + + stats->mpwqe_filler_cqes++; + stats->mpwqe_filler_strides += cstrides; + goto mpwrq_cqe_out; + } + + cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe); + + skb = INDIRECT_CALL_2(rq->mpwqe.skb_from_cqe_mpwrq, + mlx5e_skb_from_cqe_mpwrq_linear, + mlx5e_skb_from_cqe_mpwrq_nonlinear, + rq, wi, cqe_bcnt, head_offset, page_idx); + if (!skb) + goto mpwrq_cqe_out; + + mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + + if (!mlx5e_tc_rep_update_skb(cqe, skb, &tc_priv)) + goto mpwrq_cqe_out; + + napi_gro_receive(rq->cq.napi, skb); + + mlx5_tc_rep_post_napi_receive(&tc_priv); + +mpwrq_cqe_out: + if (likely(wi->consumed_strides < rq->mpwqe.num_strides)) + return; + + wq = &rq->mpwqe.wq; + wqe = mlx5_wq_ll_get_wqe(wq, wqe_id); + mlx5e_free_rx_mpwqe(rq, wi, true); + mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index); +} #endif struct sk_buff * diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index ec5fc52bf572..438128dde187 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -51,14 +51,18 @@ #include "en_rep.h" #include "en_tc.h" #include "eswitch.h" -#include "eswitch_offloads_chains.h" +#include "esw/chains.h" #include "fs_core.h" #include "en/port.h" #include "en/tc_tun.h" +#include "en/mapping.h" +#include "en/tc_ct.h" #include "lib/devcom.h" #include "lib/geneve.h" #include "diag/en_tc_tracepoint.h" +#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto) + struct mlx5_nic_flow_attr { u32 action; u32 flow_tag; @@ -84,6 +88,7 @@ enum { MLX5E_TC_FLOW_FLAG_DUP = MLX5E_TC_FLOW_BASE + 4, MLX5E_TC_FLOW_FLAG_NOT_READY = MLX5E_TC_FLOW_BASE + 5, MLX5E_TC_FLOW_FLAG_DELETED = MLX5E_TC_FLOW_BASE + 6, + MLX5E_TC_FLOW_FLAG_CT = MLX5E_TC_FLOW_BASE + 7, }; #define MLX5E_TC_MAX_SPLITS 1 @@ -134,6 +139,8 @@ struct mlx5e_tc_flow { refcount_t refcnt; struct rcu_head rcu_head; struct completion init_done; + int tunnel_id; /* the mapped tunnel id of this flow */ + union { struct mlx5_esw_flow_attr esw_attr[0]; struct mlx5_nic_flow_attr nic_attr[0]; @@ -144,15 +151,118 @@ struct mlx5e_tc_flow_parse_attr { const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS]; struct net_device *filter_dev; struct mlx5_flow_spec spec; - int num_mod_hdr_actions; - int max_mod_hdr_actions; - void *mod_hdr_actions; + struct mlx5e_tc_mod_hdr_acts mod_hdr_acts; int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS]; }; #define MLX5E_TC_TABLE_NUM_GROUPS 4 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(16) +struct tunnel_match_key { + struct flow_dissector_key_control enc_control; + struct flow_dissector_key_keyid enc_key_id; + struct flow_dissector_key_ports enc_tp; + struct flow_dissector_key_ip enc_ip; + union { + struct flow_dissector_key_ipv4_addrs enc_ipv4; + struct flow_dissector_key_ipv6_addrs enc_ipv6; + }; + + int filter_ifindex; +}; + +/* Tunnel_id mapping is TUNNEL_INFO_BITS + ENC_OPTS_BITS. + * Upper TUNNEL_INFO_BITS for general tunnel info. + * Lower ENC_OPTS_BITS bits for enc_opts. + */ +#define TUNNEL_INFO_BITS 6 +#define TUNNEL_INFO_BITS_MASK GENMASK(TUNNEL_INFO_BITS - 1, 0) +#define ENC_OPTS_BITS 2 +#define ENC_OPTS_BITS_MASK GENMASK(ENC_OPTS_BITS - 1, 0) +#define TUNNEL_ID_BITS (TUNNEL_INFO_BITS + ENC_OPTS_BITS) +#define TUNNEL_ID_MASK GENMASK(TUNNEL_ID_BITS - 1, 0) + +struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = { + [CHAIN_TO_REG] = { + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0, + .moffset = 0, + .mlen = 2, + }, + [TUNNEL_TO_REG] = { + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1, + .moffset = 3, + .mlen = 1, + .soffset = MLX5_BYTE_OFF(fte_match_param, + misc_parameters_2.metadata_reg_c_1), + }, + [ZONE_TO_REG] = zone_to_reg_ct, + [CTSTATE_TO_REG] = ctstate_to_reg_ct, + [MARK_TO_REG] = mark_to_reg_ct, + [LABELS_TO_REG] = labels_to_reg_ct, + [FTEID_TO_REG] = fteid_to_reg_ct, + [TUPLEID_TO_REG] = tupleid_to_reg_ct, +}; + +static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow); + +void +mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec, + enum mlx5e_tc_attr_to_reg type, + u32 data, + u32 mask) +{ + int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset; + int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen; + void *headers_c = spec->match_criteria; + void *headers_v = spec->match_value; + void *fmask, *fval; + + fmask = headers_c + soffset; + fval = headers_v + soffset; + + mask = cpu_to_be32(mask) >> (32 - (match_len * 8)); + data = cpu_to_be32(data) >> (32 - (match_len * 8)); + + memcpy(fmask, &mask, match_len); + memcpy(fval, &data, match_len); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; +} + +int +mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, + enum mlx5e_tc_attr_to_reg type, + u32 data) +{ + int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset; + int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield; + int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen; + char *modact; + int err; + + err = alloc_mod_hdr_actions(mdev, MLX5_FLOW_NAMESPACE_FDB, + mod_hdr_acts); + if (err) + return err; + + modact = mod_hdr_acts->actions + + (mod_hdr_acts->num_actions * MLX5_MH_ACT_SZ); + + /* Firmware has 5bit length field and 0 means 32bits */ + if (mlen == 4) + mlen = 0; + + MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, modact, field, mfield); + MLX5_SET(set_action_in, modact, offset, moffset * 8); + MLX5_SET(set_action_in, modact, length, mlen * 8); + MLX5_SET(set_action_in, modact, data, data); + mod_hdr_acts->num_actions++; + + return 0; +} + struct mlx5e_hairpin { struct mlx5_hairpin *pair; @@ -210,8 +320,6 @@ struct mlx5e_mod_hdr_entry { int compl_result; }; -#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto) - static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow); @@ -361,10 +469,10 @@ static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv, struct mod_hdr_key key; u32 hash_key; - num_actions = parse_attr->num_mod_hdr_actions; + num_actions = parse_attr->mod_hdr_acts.num_actions; actions_size = MLX5_MH_ACT_SZ * num_actions; - key.actions = parse_attr->mod_hdr_actions; + key.actions = parse_attr->mod_hdr_acts.actions; key.num_actions = num_actions; hash_key = hash_mod_hdr_info(&key); @@ -954,7 +1062,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); flow_act.modify_hdr = attr->modify_hdr; - kfree(parse_attr->mod_hdr_actions); + dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); if (err) return err; } @@ -1043,8 +1151,16 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, struct mlx5_esw_flow_attr *attr) { + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts; struct mlx5_flow_handle *rule; + if (flow_flag_test(flow, CT)) { + mod_hdr_acts = &attr->parse_attr->mod_hdr_acts; + + return mlx5_tc_ct_flow_offload(flow->priv, flow, spec, attr, + mod_hdr_acts); + } + rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr); if (IS_ERR(rule)) return rule; @@ -1063,10 +1179,15 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, static void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, struct mlx5e_tc_flow *flow, - struct mlx5_esw_flow_attr *attr) + struct mlx5_esw_flow_attr *attr) { flow_flag_clear(flow, OFFLOADED); + if (flow_flag_test(flow, CT)) { + mlx5_tc_ct_delete_flow(flow->priv, flow, attr); + return; + } + if (attr->split_count) mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr); @@ -1076,17 +1197,17 @@ mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, static struct mlx5_flow_handle * mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw, struct mlx5e_tc_flow *flow, - struct mlx5_flow_spec *spec, - struct mlx5_esw_flow_attr *slow_attr) + struct mlx5_flow_spec *spec) { + struct mlx5_esw_flow_attr slow_attr; struct mlx5_flow_handle *rule; - memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr)); - slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - slow_attr->split_count = 0; - slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH; + memcpy(&slow_attr, flow->esw_attr, sizeof(slow_attr)); + slow_attr.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + slow_attr.split_count = 0; + slow_attr.flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH; - rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr); + rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, &slow_attr); if (!IS_ERR(rule)) flow_flag_set(flow, SLOW); @@ -1095,14 +1216,15 @@ mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw, static void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw, - struct mlx5e_tc_flow *flow, - struct mlx5_esw_flow_attr *slow_attr) + struct mlx5e_tc_flow *flow) { - memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr)); - slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - slow_attr->split_count = 0; - slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH; - mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr); + struct mlx5_esw_flow_attr slow_attr; + + memcpy(&slow_attr, flow->esw_attr, sizeof(slow_attr)); + slow_attr.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + slow_attr.split_count = 0; + slow_attr.flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH; + mlx5e_tc_unoffload_fdb_rules(esw, flow, &slow_attr); flow_flag_clear(flow, SLOW); } @@ -1173,7 +1295,8 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, int out_index; if (!mlx5_esw_chains_prios_supported(esw) && attr->prio != 1) { - NL_SET_ERR_MSG(extack, "E-switch priorities unsupported, upgrade FW"); + NL_SET_ERR_MSG_MOD(extack, + "E-switch priorities unsupported, upgrade FW"); return -EOPNOTSUPP; } @@ -1184,13 +1307,15 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, */ max_chain = mlx5_esw_chains_get_chain_range(esw); if (!mlx5e_is_ft_flow(flow) && attr->chain > max_chain) { - NL_SET_ERR_MSG(extack, "Requested chain is out of supported range"); + NL_SET_ERR_MSG_MOD(extack, + "Requested chain is out of supported range"); return -EOPNOTSUPP; } max_prio = mlx5_esw_chains_get_prio_range(esw); if (attr->prio > max_prio) { - NL_SET_ERR_MSG(extack, "Requested priority is out of supported range"); + NL_SET_ERR_MSG_MOD(extack, + "Requested priority is out of supported range"); return -EOPNOTSUPP; } @@ -1220,7 +1345,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); - kfree(parse_attr->mod_hdr_actions); + dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); if (err) return err; } @@ -1237,14 +1362,10 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, * (1) there's no error * (2) there's an encap action and we don't have valid neigh */ - if (!encap_valid) { - /* continue with goto slow path rule instead */ - struct mlx5_esw_flow_attr slow_attr; - - flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec, &slow_attr); - } else { + if (!encap_valid) + flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec); + else flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr); - } if (IS_ERR(flow->rule[0])) return PTR_ERR(flow->rule[0]); @@ -1272,9 +1393,10 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_esw_flow_attr *attr = flow->esw_attr; - struct mlx5_esw_flow_attr slow_attr; int out_index; + mlx5e_put_flow_tunnel_id(flow); + if (flow_flag_test(flow, NOT_READY)) { remove_unready_flow(flow); kvfree(attr->parse_attr); @@ -1283,7 +1405,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, if (mlx5e_is_offloaded_flow(flow)) { if (flow_flag_test(flow, SLOW)) - mlx5e_tc_unoffload_from_slow_path(esw, flow, &slow_attr); + mlx5e_tc_unoffload_from_slow_path(esw, flow); else mlx5e_tc_unoffload_fdb_rules(esw, flow, attr); } @@ -1312,7 +1434,7 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, struct list_head *flow_list) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5_esw_flow_attr slow_attr, *esw_attr; + struct mlx5_esw_flow_attr *esw_attr; struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; struct mlx5e_tc_flow *flow; @@ -1365,7 +1487,7 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, continue; } - mlx5e_tc_unoffload_from_slow_path(esw, flow, &slow_attr); + mlx5e_tc_unoffload_from_slow_path(esw, flow); flow->rule[0] = rule; /* was unset when slow path rule removed */ flow_flag_set(flow, OFFLOADED); @@ -1377,7 +1499,6 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, struct list_head *flow_list) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5_esw_flow_attr slow_attr; struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; struct mlx5e_tc_flow *flow; @@ -1389,7 +1510,7 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, spec = &flow->esw_attr->parse_attr->spec; /* update from encap rule to slow path rule */ - rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec, &slow_attr); + rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); /* mark the flow's encap dest as non-valid */ flow->esw_attr->dests[flow->tmp_efi_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID; @@ -1664,150 +1785,272 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, } } +static int flow_has_tc_fwd_action(struct flow_cls_offload *f) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct flow_action *flow_action = &rule->action; + const struct flow_action_entry *act; + int i; -static int parse_tunnel_attr(struct mlx5e_priv *priv, - struct mlx5_flow_spec *spec, - struct flow_cls_offload *f, - struct net_device *filter_dev, u8 *match_level) + flow_action_for_each(i, act, flow_action) { + switch (act->id) { + case FLOW_ACTION_GOTO: + return true; + default: + continue; + } + } + + return false; +} + +static int +enc_opts_is_dont_care_or_full_match(struct mlx5e_priv *priv, + struct flow_dissector_key_enc_opts *opts, + struct netlink_ext_ack *extack, + bool *dont_care) +{ + struct geneve_opt *opt; + int off = 0; + + *dont_care = true; + + while (opts->len > off) { + opt = (struct geneve_opt *)&opts->data[off]; + + if (!(*dont_care) || opt->opt_class || opt->type || + memchr_inv(opt->opt_data, 0, opt->length * 4)) { + *dont_care = false; + + if (opt->opt_class != U16_MAX || + opt->type != U8_MAX || + memchr_inv(opt->opt_data, 0xFF, + opt->length * 4)) { + NL_SET_ERR_MSG(extack, + "Partial match of tunnel options in chain > 0 isn't supported"); + netdev_warn(priv->netdev, + "Partial match of tunnel options in chain > 0 isn't supported"); + return -EOPNOTSUPP; + } + } + + off += sizeof(struct geneve_opt) + opt->length * 4; + } + + return 0; +} + +#define COPY_DISSECTOR(rule, diss_key, dst)\ +({ \ + struct flow_rule *__rule = (rule);\ + typeof(dst) __dst = dst;\ +\ + memcpy(__dst,\ + skb_flow_dissector_target(__rule->match.dissector,\ + diss_key,\ + __rule->match.key),\ + sizeof(*__dst));\ +}) + +static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct flow_cls_offload *f, + struct net_device *filter_dev) { - struct netlink_ext_ack *extack = f->common.extack; - void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - outer_headers); - void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, - outer_headers); struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct mlx5_esw_flow_attr *attr = flow->esw_attr; + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts; + struct flow_match_enc_opts enc_opts_match; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + struct tunnel_match_key tunnel_key; + bool enc_opts_is_dont_care = true; + u32 tun_id, enc_opts_id = 0; + struct mlx5_eswitch *esw; + u32 value, mask; int err; - err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f, - headers_c, headers_v, match_level); - if (err) { - NL_SET_ERR_MSG_MOD(extack, - "failed to parse tunnel attributes"); + esw = priv->mdev->priv.eswitch; + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + memset(&tunnel_key, 0, sizeof(tunnel_key)); + COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL, + &tunnel_key.enc_control); + if (tunnel_key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) + COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, + &tunnel_key.enc_ipv4); + else + COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, + &tunnel_key.enc_ipv6); + COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IP, &tunnel_key.enc_ip); + COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_PORTS, + &tunnel_key.enc_tp); + COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_KEYID, + &tunnel_key.enc_key_id); + tunnel_key.filter_ifindex = filter_dev->ifindex; + + err = mapping_add(uplink_priv->tunnel_mapping, &tunnel_key, &tun_id); + if (err) return err; - } - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { - struct flow_match_control match; - u16 addr_type; + flow_rule_match_enc_opts(rule, &enc_opts_match); + err = enc_opts_is_dont_care_or_full_match(priv, + enc_opts_match.mask, + extack, + &enc_opts_is_dont_care); + if (err) + goto err_enc_opts; - flow_rule_match_enc_control(rule, &match); - addr_type = match.key->addr_type; + if (!enc_opts_is_dont_care) { + err = mapping_add(uplink_priv->tunnel_enc_opts_mapping, + enc_opts_match.key, &enc_opts_id); + if (err) + goto err_enc_opts; + } - /* For tunnel addr_type used same key id`s as for non-tunnel */ - if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { - struct flow_match_ipv4_addrs match; + value = tun_id << ENC_OPTS_BITS | enc_opts_id; + mask = enc_opts_id ? TUNNEL_ID_MASK : + (TUNNEL_ID_MASK & ~ENC_OPTS_BITS_MASK); - flow_rule_match_enc_ipv4_addrs(rule, &match); - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - src_ipv4_src_ipv6.ipv4_layout.ipv4, - ntohl(match.mask->src)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - src_ipv4_src_ipv6.ipv4_layout.ipv4, - ntohl(match.key->src)); + if (attr->chain) { + mlx5e_tc_match_to_reg_match(&attr->parse_attr->spec, + TUNNEL_TO_REG, value, mask); + } else { + mod_hdr_acts = &attr->parse_attr->mod_hdr_acts; + err = mlx5e_tc_match_to_reg_set(priv->mdev, + mod_hdr_acts, + TUNNEL_TO_REG, value); + if (err) + goto err_set; - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - dst_ipv4_dst_ipv6.ipv4_layout.ipv4, - ntohl(match.mask->dst)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - dst_ipv4_dst_ipv6.ipv4_layout.ipv4, - ntohl(match.key->dst)); - - MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, - ethertype); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, - ETH_P_IP); - } else if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { - struct flow_match_ipv6_addrs match; - - flow_rule_match_enc_ipv6_addrs(rule, &match); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, - src_ipv4_src_ipv6.ipv6_layout.ipv6), - &match.mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, - ipv6)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - src_ipv4_src_ipv6.ipv6_layout.ipv6), - &match.key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, - ipv6)); - - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, - dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - &match.mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, - ipv6)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - &match.key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, - ipv6)); - - MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, - ethertype); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, - ETH_P_IPV6); - } + attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; } - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) { - struct flow_match_ip match; + flow->tunnel_id = value; + return 0; - flow_rule_match_enc_ip(rule, &match); - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, - match.mask->tos & 0x3); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, - match.key->tos & 0x3); +err_set: + if (enc_opts_id) + mapping_remove(uplink_priv->tunnel_enc_opts_mapping, + enc_opts_id); +err_enc_opts: + mapping_remove(uplink_priv->tunnel_mapping, tun_id); + return err; +} - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, - match.mask->tos >> 2); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, - match.key->tos >> 2); +static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow) +{ + u32 enc_opts_id = flow->tunnel_id & ENC_OPTS_BITS_MASK; + u32 tun_id = flow->tunnel_id >> ENC_OPTS_BITS; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + struct mlx5_eswitch *esw; - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, - match.mask->ttl); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, - match.key->ttl); + esw = flow->priv->mdev->priv.eswitch; + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + if (tun_id) + mapping_remove(uplink_priv->tunnel_mapping, tun_id); + if (enc_opts_id) + mapping_remove(uplink_priv->tunnel_enc_opts_mapping, + enc_opts_id); +} - if (match.mask->ttl && - !MLX5_CAP_ESW_FLOWTABLE_FDB - (priv->mdev, - ft_field_support.outer_ipv4_ttl)) { +u32 mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow *flow) +{ + return flow->tunnel_id; +} + +static int parse_tunnel_attr(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + struct net_device *filter_dev, + u8 *match_level, + bool *match_inner) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct netlink_ext_ack *extack = f->common.extack; + bool needs_mapping, sets_mapping; + int err; + + if (!mlx5e_is_eswitch_flow(flow)) + return -EOPNOTSUPP; + + needs_mapping = !!flow->esw_attr->chain; + sets_mapping = !flow->esw_attr->chain && flow_has_tc_fwd_action(f); + *match_inner = !needs_mapping; + + if ((needs_mapping || sets_mapping) && + !mlx5_eswitch_reg_c1_loopback_enabled(esw)) { + NL_SET_ERR_MSG(extack, + "Chains on tunnel devices isn't supported without register loopback support"); + netdev_warn(priv->netdev, + "Chains on tunnel devices isn't supported without register loopback support"); + return -EOPNOTSUPP; + } + + if (!flow->esw_attr->chain) { + err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f, + match_level); + if (err) { NL_SET_ERR_MSG_MOD(extack, - "Matching on TTL is not supported"); - return -EOPNOTSUPP; + "Failed to parse tunnel attributes"); + netdev_warn(priv->netdev, + "Failed to parse tunnel attributes"); + return err; } + flow->esw_attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; } - /* Enforce DMAC when offloading incoming tunneled flows. - * Flow counters require a match on the DMAC. - */ - MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_47_16); - MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_15_0); - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - dmac_47_16), priv->netdev->dev_addr); + if (!needs_mapping && !sets_mapping) + return 0; - /* let software handle IP fragments */ - MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0); + return mlx5e_get_flow_tunnel_id(priv, flow, f, filter_dev); +} - return 0; +static void *get_match_inner_headers_criteria(struct mlx5_flow_spec *spec) +{ + return MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + inner_headers); } -static void *get_match_headers_criteria(u32 flags, - struct mlx5_flow_spec *spec) +static void *get_match_inner_headers_value(struct mlx5_flow_spec *spec) { - return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ? - MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - inner_headers) : - MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - outer_headers); + return MLX5_ADDR_OF(fte_match_param, spec->match_value, + inner_headers); +} + +static void *get_match_outer_headers_criteria(struct mlx5_flow_spec *spec) +{ + return MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers); +} + +static void *get_match_outer_headers_value(struct mlx5_flow_spec *spec) +{ + return MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers); } static void *get_match_headers_value(u32 flags, struct mlx5_flow_spec *spec) { return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ? - MLX5_ADDR_OF(fte_match_param, spec->match_value, - inner_headers) : - MLX5_ADDR_OF(fte_match_param, spec->match_value, - outer_headers); + get_match_inner_headers_value(spec) : + get_match_outer_headers_value(spec); +} + +static void *get_match_headers_criteria(u32 flags, + struct mlx5_flow_spec *spec) +{ + return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ? + get_match_inner_headers_criteria(spec) : + get_match_outer_headers_criteria(spec); } static int mlx5e_flower_parse_meta(struct net_device *filter_dev, @@ -1845,6 +2088,7 @@ static int mlx5e_flower_parse_meta(struct net_device *filter_dev, } static int __parse_cls_flower(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec, struct flow_cls_offload *f, struct net_device *filter_dev, @@ -1885,6 +2129,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | BIT(FLOW_DISSECTOR_KEY_TCP) | BIT(FLOW_DISSECTOR_KEY_IP) | + BIT(FLOW_DISSECTOR_KEY_CT) | BIT(FLOW_DISSECTOR_KEY_ENC_IP) | BIT(FLOW_DISSECTOR_KEY_ENC_OPTS))) { NL_SET_ERR_MSG_MOD(extack, "Unsupported key"); @@ -1894,18 +2139,22 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, } if (mlx5e_get_tc_tun(filter_dev)) { - if (parse_tunnel_attr(priv, spec, f, filter_dev, - outer_match_level)) - return -EOPNOTSUPP; + bool match_inner = false; - /* At this point, header pointers should point to the inner - * headers, outer header were already set by parse_tunnel_attr - */ - match_level = inner_match_level; - headers_c = get_match_headers_criteria(MLX5_FLOW_CONTEXT_ACTION_DECAP, - spec); - headers_v = get_match_headers_value(MLX5_FLOW_CONTEXT_ACTION_DECAP, - spec); + err = parse_tunnel_attr(priv, flow, spec, f, filter_dev, + outer_match_level, &match_inner); + if (err) + return err; + + if (match_inner) { + /* header pointers should point to the inner headers + * if the packet was decapsulated already. + * outer headers are set by parse_tunnel_attr. + */ + match_level = inner_match_level; + headers_c = get_match_inner_headers_criteria(spec); + headers_v = get_match_inner_headers_value(spec); + } } err = mlx5e_flower_parse_meta(filter_dev, f); @@ -2222,8 +2471,8 @@ static int parse_cls_flower(struct mlx5e_priv *priv, inner_match_level = MLX5_MATCH_NONE; outer_match_level = MLX5_MATCH_NONE; - err = __parse_cls_flower(priv, spec, f, filter_dev, &inner_match_level, - &outer_match_level); + err = __parse_cls_flower(priv, flow, spec, f, filter_dev, + &inner_match_level, &outer_match_level); non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ? outer_match_level : inner_match_level; @@ -2383,25 +2632,26 @@ static struct mlx5_fields fields[] = { OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest, 0, udp_dport), }; -/* On input attr->max_mod_hdr_actions tells how many HW actions can be parsed at - * max from the SW pedit action. On success, attr->num_mod_hdr_actions - * says how many HW actions were actually parsed. - */ -static int offload_pedit_fields(struct pedit_headers_action *hdrs, +static int offload_pedit_fields(struct mlx5e_priv *priv, + int namespace, + struct pedit_headers_action *hdrs, struct mlx5e_tc_flow_parse_attr *parse_attr, u32 *action_flags, struct netlink_ext_ack *extack) { struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals; - int i, action_size, nactions, max_actions, first, last, next_z; + int i, action_size, first, last, next_z; void *headers_c, *headers_v, *action, *vals_p; u32 *s_masks_p, *a_masks_p, s_mask, a_mask; + struct mlx5e_tc_mod_hdr_acts *mod_acts; struct mlx5_fields *f; unsigned long mask; __be32 mask_be32; __be16 mask_be16; + int err; u8 cmd; + mod_acts = &parse_attr->mod_hdr_acts; headers_c = get_match_headers_criteria(*action_flags, &parse_attr->spec); headers_v = get_match_headers_value(*action_flags, &parse_attr->spec); @@ -2411,11 +2661,6 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs, add_vals = &hdrs[1].vals; action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto); - action = parse_attr->mod_hdr_actions + - parse_attr->num_mod_hdr_actions * action_size; - - max_actions = parse_attr->max_mod_hdr_actions; - nactions = parse_attr->num_mod_hdr_actions; for (i = 0; i < ARRAY_SIZE(fields); i++) { bool skip; @@ -2441,13 +2686,6 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs, return -EOPNOTSUPP; } - if (nactions == max_actions) { - NL_SET_ERR_MSG_MOD(extack, - "too many pedit actions, can't offload"); - printk(KERN_WARNING "mlx5: parsed %d pedit actions, can't do more\n", nactions); - return -EOPNOTSUPP; - } - skip = false; if (s_mask) { void *match_mask = headers_c + f->match_offset; @@ -2495,6 +2733,18 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs, return -EOPNOTSUPP; } + err = alloc_mod_hdr_actions(priv->mdev, namespace, mod_acts); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "too many pedit actions, can't offload"); + mlx5_core_warn(priv->mdev, + "mlx5: parsed %d pedit actions, can't do more\n", + mod_acts->num_actions); + return err; + } + + action = mod_acts->actions + + (mod_acts->num_actions * action_size); MLX5_SET(set_action_in, action, action_type, cmd); MLX5_SET(set_action_in, action, field, f->field); @@ -2517,11 +2767,9 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs, else if (f->field_bsize == 8) MLX5_SET(set_action_in, action, data, *(u8 *)vals_p >> first); - action += action_size; - nactions++; + ++mod_acts->num_actions; } - parse_attr->num_mod_hdr_actions = nactions; return 0; } @@ -2534,34 +2782,52 @@ static int mlx5e_flow_namespace_max_modify_action(struct mlx5_core_dev *mdev, return MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_modify_header_actions); } -static int alloc_mod_hdr_actions(struct mlx5e_priv *priv, - struct pedit_headers_action *hdrs, - int namespace, - struct mlx5e_tc_flow_parse_attr *parse_attr) +int alloc_mod_hdr_actions(struct mlx5_core_dev *mdev, + int namespace, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) { - int nkeys, action_size, max_actions; + int action_size, new_num_actions, max_hw_actions; + size_t new_sz, old_sz; + void *ret; - nkeys = hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits + - hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits; - action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto); + if (mod_hdr_acts->num_actions < mod_hdr_acts->max_actions) + return 0; - max_actions = mlx5e_flow_namespace_max_modify_action(priv->mdev, namespace); - /* can get up to crazingly 16 HW actions in 32 bits pedit SW key */ - max_actions = min(max_actions, nkeys * 16); + action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto); - parse_attr->mod_hdr_actions = kcalloc(max_actions, action_size, GFP_KERNEL); - if (!parse_attr->mod_hdr_actions) + max_hw_actions = mlx5e_flow_namespace_max_modify_action(mdev, + namespace); + new_num_actions = min(max_hw_actions, + mod_hdr_acts->actions ? + mod_hdr_acts->max_actions * 2 : 1); + if (mod_hdr_acts->max_actions == new_num_actions) + return -ENOSPC; + + new_sz = action_size * new_num_actions; + old_sz = mod_hdr_acts->max_actions * action_size; + ret = krealloc(mod_hdr_acts->actions, new_sz, GFP_KERNEL); + if (!ret) return -ENOMEM; - parse_attr->max_mod_hdr_actions = max_actions; + memset(ret + old_sz, 0, new_sz - old_sz); + mod_hdr_acts->actions = ret; + mod_hdr_acts->max_actions = new_num_actions; + return 0; } +void dealloc_mod_hdr_actions(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) +{ + kfree(mod_hdr_acts->actions); + mod_hdr_acts->actions = NULL; + mod_hdr_acts->num_actions = 0; + mod_hdr_acts->max_actions = 0; +} + static const struct pedit_headers zero_masks = {}; static int parse_tc_pedit_action(struct mlx5e_priv *priv, const struct flow_action_entry *act, int namespace, - struct mlx5e_tc_flow_parse_attr *parse_attr, struct pedit_headers_action *hdrs, struct netlink_ext_ack *extack) { @@ -2609,13 +2875,8 @@ static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace, int err; u8 cmd; - if (!parse_attr->mod_hdr_actions) { - err = alloc_mod_hdr_actions(priv, hdrs, namespace, parse_attr); - if (err) - goto out_err; - } - - err = offload_pedit_fields(hdrs, parse_attr, action_flags, extack); + err = offload_pedit_fields(priv, namespace, hdrs, parse_attr, + action_flags, extack); if (err < 0) goto out_dealloc_parsed_actions; @@ -2635,8 +2896,7 @@ static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace, return 0; out_dealloc_parsed_actions: - kfree(parse_attr->mod_hdr_actions); -out_err: + dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); return err; } @@ -2681,7 +2941,9 @@ struct ipv6_hoplimit_word { __u8 hop_limit; }; -static bool is_action_keys_supported(const struct flow_action_entry *act) +static int is_action_keys_supported(const struct flow_action_entry *act, + bool ct_flow, bool *modify_ip_header, + struct netlink_ext_ack *extack) { u32 mask, offset; u8 htype; @@ -2700,7 +2962,13 @@ static bool is_action_keys_supported(const struct flow_action_entry *act) if (offset != offsetof(struct iphdr, ttl) || ttl_word->protocol || ttl_word->check) { - return true; + *modify_ip_header = true; + } + + if (ct_flow && offset >= offsetof(struct iphdr, saddr)) { + NL_SET_ERR_MSG_MOD(extack, + "can't offload re-write of ipv4 address with action ct"); + return -EOPNOTSUPP; } } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) { struct ipv6_hoplimit_word *hoplimit_word = @@ -2709,15 +2977,27 @@ static bool is_action_keys_supported(const struct flow_action_entry *act) if (offset != offsetof(struct ipv6hdr, payload_len) || hoplimit_word->payload_len || hoplimit_word->nexthdr) { - return true; + *modify_ip_header = true; } + + if (ct_flow && offset >= offsetof(struct ipv6hdr, saddr)) { + NL_SET_ERR_MSG_MOD(extack, + "can't offload re-write of ipv6 address with action ct"); + return -EOPNOTSUPP; + } + } else if (ct_flow && (htype == FLOW_ACT_MANGLE_HDR_TYPE_TCP || + htype == FLOW_ACT_MANGLE_HDR_TYPE_UDP)) { + NL_SET_ERR_MSG_MOD(extack, + "can't offload re-write of transport header ports with action ct"); + return -EOPNOTSUPP; } - return false; + + return 0; } static bool modify_header_match_supported(struct mlx5_flow_spec *spec, struct flow_action *flow_action, - u32 actions, + u32 actions, bool ct_flow, struct netlink_ext_ack *extack) { const struct flow_action_entry *act; @@ -2725,7 +3005,7 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec, void *headers_v; u16 ethertype; u8 ip_proto; - int i; + int i, err; headers_v = get_match_headers_value(actions, spec); ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype); @@ -2740,10 +3020,10 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec, act->id != FLOW_ACTION_ADD) continue; - if (is_action_keys_supported(act)) { - modify_ip_header = true; - break; - } + err = is_action_keys_supported(act, ct_flow, + &modify_ip_header, extack); + if (err) + return err; } ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol); @@ -2765,23 +3045,29 @@ static bool actions_match_supported(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { + bool ct_flow; u32 actions; - if (mlx5e_is_eswitch_flow(flow)) + ct_flow = flow_flag_test(flow, CT); + if (mlx5e_is_eswitch_flow(flow)) { actions = flow->esw_attr->action; - else - actions = flow->nic_attr->action; - if (flow_flag_test(flow, EGRESS) && - !((actions & MLX5_FLOW_CONTEXT_ACTION_DECAP) || - (actions & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) || - (actions & MLX5_FLOW_CONTEXT_ACTION_DROP))) - return false; + if (flow->esw_attr->split_count && ct_flow) { + /* All registers used by ct are cleared when using + * split rules. + */ + NL_SET_ERR_MSG_MOD(extack, + "Can't offload mirroring with action ct"); + return false; + } + } else { + actions = flow->nic_attr->action; + } if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) return modify_header_match_supported(&parse_attr->spec, flow_action, actions, - extack); + ct_flow, extack); return true; } @@ -2837,8 +3123,7 @@ static int add_vlan_rewrite_action(struct mlx5e_priv *priv, int namespace, return -EOPNOTSUPP; } - err = parse_tc_pedit_action(priv, &pedit_act, namespace, parse_attr, - hdrs, NULL); + err = parse_tc_pedit_action(priv, &pedit_act, namespace, hdrs, NULL); *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; return err; @@ -2883,6 +3168,10 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, if (!flow_action_has_entries(flow_action)) return -EINVAL; + if (!flow_action_hw_stats_check(flow_action, extack, + FLOW_ACTION_HW_STATS_DELAYED_BIT)) + return -EOPNOTSUPP; + attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; flow_action_for_each(i, act, flow_action) { @@ -2900,7 +3189,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, case FLOW_ACTION_MANGLE: case FLOW_ACTION_ADD: err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_KERNEL, - parse_attr, hdrs, extack); + hdrs, extack); if (err) return err; @@ -2969,9 +3258,9 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, /* in case all pedit actions are skipped, remove the MOD_HDR * flag. */ - if (parse_attr->num_mod_hdr_actions == 0) { + if (parse_attr->mod_hdr_acts.num_actions == 0) { action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - kfree(parse_attr->mod_hdr_actions); + dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); } } @@ -3314,6 +3603,85 @@ static bool is_duplicated_output_device(struct net_device *dev, return false; } +static int mlx5_validate_goto_chain(struct mlx5_eswitch *esw, + struct mlx5e_tc_flow *flow, + const struct flow_action_entry *act, + u32 actions, + struct netlink_ext_ack *extack) +{ + u32 max_chain = mlx5_esw_chains_get_chain_range(esw); + struct mlx5_esw_flow_attr *attr = flow->esw_attr; + bool ft_flow = mlx5e_is_ft_flow(flow); + u32 dest_chain = act->chain_index; + + if (ft_flow) { + NL_SET_ERR_MSG_MOD(extack, "Goto action is not supported"); + return -EOPNOTSUPP; + } + + if (!mlx5_esw_chains_backwards_supported(esw) && + dest_chain <= attr->chain) { + NL_SET_ERR_MSG_MOD(extack, + "Goto lower numbered chain isn't supported"); + return -EOPNOTSUPP; + } + if (dest_chain > max_chain) { + NL_SET_ERR_MSG_MOD(extack, + "Requested destination chain is out of supported range"); + return -EOPNOTSUPP; + } + + if (actions & (MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT | + MLX5_FLOW_CONTEXT_ACTION_DECAP) && + !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat_and_fwd_to_table)) { + NL_SET_ERR_MSG_MOD(extack, + "Goto chain is not allowed if action has reformat or decap"); + return -EOPNOTSUPP; + } + + return 0; +} + +static int verify_uplink_forwarding(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct net_device *out_dev, + struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_esw_flow_attr *attr = flow->esw_attr; + struct mlx5e_rep_priv *rep_priv; + + /* Forwarding non encapsulated traffic between + * uplink ports is allowed only if + * termination_table_raw_traffic cap is set. + * + * Input vport was stored esw_attr->in_rep. + * In LAG case, *priv* is the private data of + * uplink which may be not the input vport. + */ + rep_priv = mlx5e_rep_to_rep_priv(attr->in_rep); + + if (!(mlx5e_eswitch_uplink_rep(rep_priv->netdev) && + mlx5e_eswitch_uplink_rep(out_dev))) + return 0; + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, + termination_table_raw_traffic)) { + NL_SET_ERR_MSG_MOD(extack, + "devices are both uplink, can't offload forwarding"); + pr_err("devices %s %s are both uplink, can't offload forwarding\n", + priv->netdev->name, out_dev->name); + return -EOPNOTSUPP; + } else if (out_dev != rep_priv->netdev) { + NL_SET_ERR_MSG_MOD(extack, + "devices are not the same uplink, can't offload forwarding"); + pr_err("devices %s %s are both uplink but not the same, can't offload forwarding\n", + priv->netdev->name, out_dev->name); + return -EOPNOTSUPP; + } + return 0; +} + static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct flow_action *flow_action, struct mlx5e_tc_flow *flow, @@ -3328,13 +3696,17 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS]; bool ft_flow = mlx5e_is_ft_flow(flow); const struct flow_action_entry *act; + bool encap = false, decap = false; + u32 action = attr->action; int err, i, if_count = 0; - bool encap = false; - u32 action = 0; if (!flow_action_has_entries(flow_action)) return -EINVAL; + if (!flow_action_hw_stats_check(flow_action, extack, + FLOW_ACTION_HW_STATS_DELAYED_BIT)) + return -EOPNOTSUPP; + flow_action_for_each(i, act, flow_action) { switch (act->id) { case FLOW_ACTION_DROP: @@ -3344,7 +3716,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, case FLOW_ACTION_MANGLE: case FLOW_ACTION_ADD: err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_FDB, - parse_attr, hdrs, extack); + hdrs, extack); if (err) return err; @@ -3382,8 +3754,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, if (attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) { NL_SET_ERR_MSG_MOD(extack, "can't support more output ports, can't offload forwarding"); - pr_err("can't support more than %d output ports, can't offload forwarding\n", - attr->out_count); + netdev_warn(priv->netdev, + "can't support more than %d output ports, can't offload forwarding\n", + attr->out_count); return -EOPNOTSUPP; } @@ -3441,11 +3814,17 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, return err; } + err = verify_uplink_forwarding(priv, flow, out_dev, extack); + if (err) + return err; + if (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) { NL_SET_ERR_MSG_MOD(extack, "devices are not on same switch HW, can't offload forwarding"); - pr_err("devices %s %s not on same switch HW, can't offload forwarding\n", - priv->netdev->name, out_dev->name); + netdev_warn(priv->netdev, + "devices %s %s not on same switch HW, can't offload forwarding\n", + priv->netdev->name, + out_dev->name); return -EOPNOTSUPP; } @@ -3464,8 +3843,10 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, } else { NL_SET_ERR_MSG_MOD(extack, "devices are not on same switch HW, can't offload forwarding"); - pr_err("devices %s %s not on same switch HW, can't offload forwarding\n", - priv->netdev->name, out_dev->name); + netdev_warn(priv->netdev, + "devices %s %s not on same switch HW, can't offload forwarding\n", + priv->netdev->name, + out_dev->name); return -EINVAL; } } @@ -3507,28 +3888,24 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, attr->split_count = attr->out_count; break; case FLOW_ACTION_TUNNEL_DECAP: - action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; + decap = true; break; - case FLOW_ACTION_GOTO: { - u32 dest_chain = act->chain_index; - u32 max_chain = mlx5_esw_chains_get_chain_range(esw); + case FLOW_ACTION_GOTO: + err = mlx5_validate_goto_chain(esw, flow, act, action, + extack); + if (err) + return err; - if (ft_flow) { - NL_SET_ERR_MSG_MOD(extack, "Goto action is not supported"); - return -EOPNOTSUPP; - } - if (dest_chain <= attr->chain) { - NL_SET_ERR_MSG(extack, "Goto earlier chain isn't supported"); - return -EOPNOTSUPP; - } - if (dest_chain > max_chain) { - NL_SET_ERR_MSG(extack, "Requested destination chain is out of supported range"); - return -EOPNOTSUPP; - } action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; - attr->dest_chain = dest_chain; + attr->dest_chain = act->chain_index; + break; + case FLOW_ACTION_CT: + err = mlx5_tc_ct_parse_action(priv, attr, act, extack); + if (err) + return err; + + flow_flag_set(flow, CT); break; - } default: NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported"); return -EOPNOTSUPP; @@ -3557,9 +3934,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, * flag. we might have set split_count either by pedit or * pop/push. if there is no pop/push either, reset it too. */ - if (parse_attr->num_mod_hdr_actions == 0) { + if (parse_attr->mod_hdr_acts.num_actions == 0) { action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - kfree(parse_attr->mod_hdr_actions); + dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); if (!((action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) || (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH))) attr->split_count = 0; @@ -3571,8 +3948,25 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, return -EOPNOTSUPP; if (attr->dest_chain) { + if (decap) { + /* It can be supported if we'll create a mapping for + * the tunnel device only (without tunnel), and set + * this tunnel id with this decap flow. + * + * On restore (miss), we'll just set this saved tunnel + * device. + */ + + NL_SET_ERR_MSG(extack, + "Decap with goto isn't supported"); + netdev_warn(priv->netdev, + "Decap with goto isn't supported"); + return -EOPNOTSUPP; + } + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { - NL_SET_ERR_MSG(extack, "Mirroring goto chain rules isn't supported"); + NL_SET_ERR_MSG_MOD(extack, + "Mirroring goto chain rules isn't supported"); return -EOPNOTSUPP; } attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; @@ -3580,7 +3974,8 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, if (!(attr->action & (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) { - NL_SET_ERR_MSG(extack, "Rule must have at least one forward/drop action"); + NL_SET_ERR_MSG_MOD(extack, + "Rule must have at least one forward/drop action"); return -EOPNOTSUPP; } @@ -3751,6 +4146,10 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, if (err) goto err_free; + err = mlx5_tc_ct_parse_match(priv, &parse_attr->spec, f, extack); + if (err) + goto err_free; + err = mlx5e_tc_add_fdb_flow(priv, flow, extack); complete_all(&flow->init_done); if (err) { @@ -4035,7 +4434,7 @@ int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, goto errout; } - if (mlx5e_is_offloaded_flow(flow)) { + if (mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, CT)) { counter = mlx5e_tc_get_counter(flow); if (!counter) goto errout; @@ -4069,7 +4468,8 @@ int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, no_peer_counter: mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); out: - flow_stats_update(&f->stats, bytes, packets, lastuse); + flow_stats_update(&f->stats, bytes, packets, lastuse, + FLOW_ACTION_HW_STATS_DELAYED); trace_mlx5e_stats_flower(f); errout: mlx5e_flow_put(priv, flow); @@ -4126,6 +4526,9 @@ static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv, return -EOPNOTSUPP; } + if (!flow_action_basic_hw_stats_check(flow_action, extack)) + return -EOPNOTSUPP; + flow_action_for_each(i, act, flow_action) { switch (act->id) { case FLOW_ACTION_POLICE: @@ -4147,8 +4550,14 @@ static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv, int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv, struct tc_cls_matchall_offload *ma) { + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct netlink_ext_ack *extack = ma->common.extack; + if (!mlx5_esw_qos_enabled(esw)) { + NL_SET_ERR_MSG_MOD(extack, "QoS is not supported on this device"); + return -EOPNOTSUPP; + } + if (ma->common.prio != 1) { NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported"); return -EINVAL; @@ -4177,7 +4586,8 @@ void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv, dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets; dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes; rpriv->prev_vf_vport_stats = cur_stats; - flow_stats_update(&ma->stats, dpkts, dbytes, jiffies); + flow_stats_update(&ma->stats, dpkts, dbytes, jiffies, + FLOW_ACTION_HW_STATS_DELAYED); } static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv, @@ -4295,12 +4705,63 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) int mlx5e_tc_esw_init(struct rhashtable *tc_ht) { - return rhashtable_init(tc_ht, &tc_ht_params); + const size_t sz_enc_opts = sizeof(struct flow_dissector_key_enc_opts); + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *priv; + struct mapping_ctx *mapping; + int err; + + uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht); + priv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv); + + err = mlx5_tc_ct_init(uplink_priv); + if (err) + goto err_ct; + + mapping = mapping_create(sizeof(struct tunnel_match_key), + TUNNEL_INFO_BITS_MASK, true); + if (IS_ERR(mapping)) { + err = PTR_ERR(mapping); + goto err_tun_mapping; + } + uplink_priv->tunnel_mapping = mapping; + + mapping = mapping_create(sz_enc_opts, ENC_OPTS_BITS_MASK, true); + if (IS_ERR(mapping)) { + err = PTR_ERR(mapping); + goto err_enc_opts_mapping; + } + uplink_priv->tunnel_enc_opts_mapping = mapping; + + err = rhashtable_init(tc_ht, &tc_ht_params); + if (err) + goto err_ht_init; + + return err; + +err_ht_init: + mapping_destroy(uplink_priv->tunnel_enc_opts_mapping); +err_enc_opts_mapping: + mapping_destroy(uplink_priv->tunnel_mapping); +err_tun_mapping: + mlx5_tc_ct_clean(uplink_priv); +err_ct: + netdev_warn(priv->netdev, + "Failed to initialize tc (eswitch), err: %d", err); + return err; } void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht) { + struct mlx5_rep_uplink_priv *uplink_priv; + rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL); + + uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht); + mapping_destroy(uplink_priv->tunnel_enc_opts_mapping); + mapping_destroy(uplink_priv->tunnel_mapping); + + mlx5_tc_ct_clean(uplink_priv); } int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags) @@ -4332,3 +4793,147 @@ void mlx5e_tc_reoffload_flows_work(struct work_struct *work) } mutex_unlock(&rpriv->unready_flows_lock); } + +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) +static bool mlx5e_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb, + struct mlx5e_tc_update_priv *tc_priv, + u32 tunnel_id) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct flow_dissector_key_enc_opts enc_opts = {}; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + struct metadata_dst *tun_dst; + struct tunnel_match_key key; + u32 tun_id, enc_opts_id; + struct net_device *dev; + int err; + + enc_opts_id = tunnel_id & ENC_OPTS_BITS_MASK; + tun_id = tunnel_id >> ENC_OPTS_BITS; + + if (!tun_id) + return true; + + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + err = mapping_find(uplink_priv->tunnel_mapping, tun_id, &key); + if (err) { + WARN_ON_ONCE(true); + netdev_dbg(priv->netdev, + "Couldn't find tunnel for tun_id: %d, err: %d\n", + tun_id, err); + return false; + } + + if (enc_opts_id) { + err = mapping_find(uplink_priv->tunnel_enc_opts_mapping, + enc_opts_id, &enc_opts); + if (err) { + netdev_dbg(priv->netdev, + "Couldn't find tunnel (opts) for tun_id: %d, err: %d\n", + enc_opts_id, err); + return false; + } + } + + tun_dst = tun_rx_dst(enc_opts.len); + if (!tun_dst) { + WARN_ON_ONCE(true); + return false; + } + + ip_tunnel_key_init(&tun_dst->u.tun_info.key, + key.enc_ipv4.src, key.enc_ipv4.dst, + key.enc_ip.tos, key.enc_ip.ttl, + 0, /* label */ + key.enc_tp.src, key.enc_tp.dst, + key32_to_tunnel_id(key.enc_key_id.keyid), + TUNNEL_KEY); + + if (enc_opts.len) + ip_tunnel_info_opts_set(&tun_dst->u.tun_info, enc_opts.data, + enc_opts.len, enc_opts.dst_opt_type); + + skb_dst_set(skb, (struct dst_entry *)tun_dst); + dev = dev_get_by_index(&init_net, key.filter_ifindex); + if (!dev) { + netdev_dbg(priv->netdev, + "Couldn't find tunnel device with ifindex: %d\n", + key.filter_ifindex); + return false; + } + + /* Set tun_dev so we do dev_put() after datapath */ + tc_priv->tun_dev = dev; + + skb->dev = dev; + + return true; +} +#endif /* CONFIG_NET_TC_SKB_EXT */ + +bool mlx5e_tc_rep_update_skb(struct mlx5_cqe64 *cqe, + struct sk_buff *skb, + struct mlx5e_tc_update_priv *tc_priv) +{ +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + u32 chain = 0, reg_c0, reg_c1, tunnel_id, tuple_id; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + struct tc_skb_ext *tc_skb_ext; + struct mlx5_eswitch *esw; + struct mlx5e_priv *priv; + int tunnel_moffset; + int err; + + reg_c0 = (be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK); + if (reg_c0 == MLX5_FS_DEFAULT_FLOW_TAG) + reg_c0 = 0; + reg_c1 = be32_to_cpu(cqe->imm_inval_pkey); + + if (!reg_c0) + return true; + + priv = netdev_priv(skb->dev); + esw = priv->mdev->priv.eswitch; + + err = mlx5_eswitch_get_chain_for_tag(esw, reg_c0, &chain); + if (err) { + netdev_dbg(priv->netdev, + "Couldn't find chain for chain tag: %d, err: %d\n", + reg_c0, err); + return false; + } + + if (chain) { + tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT); + if (!tc_skb_ext) { + WARN_ON(1); + return false; + } + + tc_skb_ext->chain = chain; + + tuple_id = reg_c1 & TUPLE_ID_MAX; + + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + if (!mlx5e_tc_ct_restore_flow(uplink_priv, skb, tuple_id)) + return false; + } + + tunnel_moffset = mlx5e_tc_attr_to_reg_mappings[TUNNEL_TO_REG].moffset; + tunnel_id = reg_c1 >> (8 * tunnel_moffset); + return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id); +#endif /* CONFIG_NET_TC_SKB_EXT */ + + return true; +} + +void mlx5_tc_rep_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv) +{ + if (tc_priv->tun_dev) + dev_put(tc_priv->tun_dev); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index 262cdb7b69b1..abdcfa4c4e0e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -91,9 +91,63 @@ int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags); void mlx5e_tc_reoffload_flows_work(struct work_struct *work); +enum mlx5e_tc_attr_to_reg { + CHAIN_TO_REG, + TUNNEL_TO_REG, + CTSTATE_TO_REG, + ZONE_TO_REG, + MARK_TO_REG, + LABELS_TO_REG, + FTEID_TO_REG, + TUPLEID_TO_REG, +}; + +struct mlx5e_tc_attr_to_reg_mapping { + int mfield; /* rewrite field */ + int moffset; /* offset of mfield */ + int mlen; /* bytes to rewrite/match */ + + int soffset; /* offset of spec for match */ +}; + +extern struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[]; + bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv, struct net_device *out_dev); +struct mlx5e_tc_update_priv { + struct net_device *tun_dev; +}; + +bool mlx5e_tc_rep_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb, + struct mlx5e_tc_update_priv *tc_priv); + +void mlx5_tc_rep_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv); + +struct mlx5e_tc_mod_hdr_acts { + int num_actions; + int max_actions; + void *actions; +}; + +int mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, + enum mlx5e_tc_attr_to_reg type, + u32 data); + +void mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec, + enum mlx5e_tc_attr_to_reg type, + u32 data, + u32 mask); + +int alloc_mod_hdr_actions(struct mlx5_core_dev *mdev, + int namespace, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts); +void dealloc_mod_hdr_actions(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts); + +struct mlx5e_tc_flow; +u32 mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow *flow); + #else /* CONFIG_MLX5_ESWITCH */ static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; } static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index ee60383adc5b..fd6b2a1898c5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -72,8 +72,8 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, { int txq_ix = netdev_pick_tx(dev, skb, NULL); struct mlx5e_priv *priv = netdev_priv(dev); - u16 num_channels; int up = 0; + int ch_ix; if (!netdev_get_num_tc(dev)) return txq_ix; @@ -86,14 +86,13 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, if (skb_vlan_tag_present(skb)) up = skb_vlan_tag_get_prio(skb); - /* txq_ix can be larger than num_channels since - * dev->num_real_tx_queues = num_channels * num_tc + /* Normalize any picked txq_ix to [0, num_channels), + * So we can return a txq_ix that matches the channel and + * packet UP. */ - num_channels = priv->channels.params.num_channels; - if (txq_ix >= num_channels) - txq_ix = priv->txq2sq[txq_ix]->ch_ix; + ch_ix = priv->txq2sq[txq_ix]->ch_ix; - return priv->channel_tc2realtxq[txq_ix][up]; + return priv->channel_tc2realtxq[ch_ix][up]; } static inline int mlx5e_skb_l2_header_offset(struct sk_buff *skb) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 800d34ed8a96..87c49e7a164c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -31,6 +31,7 @@ */ #include <linux/irq.h> +#include <linux/indirect_call_wrapper.h> #include "en.h" #include "en/xdp.h" #include "en/xsk/rx.h" @@ -100,7 +101,10 @@ static bool mlx5e_napi_xsk_post(struct mlx5e_xdpsq *xsksq, struct mlx5e_rq *xskr busy_xsk |= mlx5e_xsk_tx(xsksq, MLX5E_TX_XSK_POLL_BUDGET); mlx5e_xsk_update_tx_wakeup(xsksq); - xsk_rx_alloc_err = xskrq->post_wqes(xskrq); + xsk_rx_alloc_err = INDIRECT_CALL_2(xskrq->post_wqes, + mlx5e_post_rx_mpwqes, + mlx5e_post_rx_wqes, + xskrq); busy_xsk |= mlx5e_xsk_update_rx_wakeup(xskrq, xsk_rx_alloc_err); return busy_xsk; @@ -143,7 +147,10 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) mlx5e_poll_ico_cq(&c->icosq.cq); - busy |= rq->post_wqes(rq); + busy |= INDIRECT_CALL_2(rq->post_wqes, + mlx5e_post_rx_mpwqes, + mlx5e_post_rx_wqes, + rq); if (xsk_open) { mlx5e_poll_ico_cq(&c->xskicosq.cq); busy |= mlx5e_poll_xdpsq_cq(&xsksq->cq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/esw/Makefile new file mode 100644 index 000000000000..c78512eed8d7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +subdir-ccflags-y += -I$(src)/.. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.c index 4276194b633f..029001040737 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.c @@ -5,23 +5,26 @@ #include <linux/mlx5/mlx5_ifc.h> #include <linux/mlx5/fs.h> -#include "eswitch_offloads_chains.h" +#include "esw/chains.h" +#include "en/mapping.h" #include "mlx5_core.h" #include "fs_core.h" #include "eswitch.h" #include "en.h" +#include "en_tc.h" #define esw_chains_priv(esw) ((esw)->fdb_table.offloads.esw_chains_priv) #define esw_chains_lock(esw) (esw_chains_priv(esw)->lock) #define esw_chains_ht(esw) (esw_chains_priv(esw)->chains_ht) +#define esw_chains_mapping(esw) (esw_chains_priv(esw)->chains_mapping) #define esw_prios_ht(esw) (esw_chains_priv(esw)->prios_ht) #define fdb_pool_left(esw) (esw_chains_priv(esw)->fdb_left) #define tc_slow_fdb(esw) ((esw)->fdb_table.offloads.slow_fdb) #define tc_end_fdb(esw) (esw_chains_priv(esw)->tc_end_fdb) #define fdb_ignore_flow_level_supported(esw) \ (MLX5_CAP_ESW_FLOWTABLE_FDB((esw)->dev, ignore_flow_level)) - -#define ESW_OFFLOADS_NUM_GROUPS 4 +#define fdb_modify_header_fwd_to_table_supported(esw) \ + (MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_modify_header_fwd_to_table)) /* Firmware currently has 4 pool of 4 sizes that it supports (ESW_POOLS), * and a virtual memory region of 16M (ESW_SIZE), this region is duplicated @@ -36,6 +39,7 @@ static const unsigned int ESW_POOLS[] = { 4 * 1024 * 1024, 1 * 1024 * 1024, 64 * 1024, 128 }; +#define ESW_FT_TBL_SZ (64 * 1024) struct mlx5_esw_chains_priv { struct rhashtable chains_ht; @@ -44,6 +48,7 @@ struct mlx5_esw_chains_priv { struct mutex lock; struct mlx5_flow_table *tc_end_fdb; + struct mapping_ctx *chains_mapping; int fdb_left[ARRAY_SIZE(ESW_POOLS)]; }; @@ -54,9 +59,12 @@ struct fdb_chain { u32 chain; int ref; + int id; struct mlx5_eswitch *esw; struct list_head prios_list; + struct mlx5_flow_handle *restore_rule; + struct mlx5_modify_hdr *miss_modify_hdr; }; struct fdb_prio_key { @@ -99,6 +107,12 @@ bool mlx5_esw_chains_prios_supported(struct mlx5_eswitch *esw) return esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; } +bool mlx5_esw_chains_backwards_supported(struct mlx5_eswitch *esw) +{ + return mlx5_esw_chains_prios_supported(esw) && + fdb_ignore_flow_level_supported(esw); +} + u32 mlx5_esw_chains_get_chain_range(struct mlx5_eswitch *esw) { if (!mlx5_esw_chains_prios_supported(esw)) @@ -198,7 +212,9 @@ mlx5_esw_chains_create_fdb_table(struct mlx5_eswitch *esw, ft_attr.flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); - sz = mlx5_esw_chains_get_avail_sz_from_pool(esw, POOL_NEXT_SIZE); + sz = (chain == mlx5_esw_chains_get_ft_chain(esw)) ? + mlx5_esw_chains_get_avail_sz_from_pool(esw, ESW_FT_TBL_SZ) : + mlx5_esw_chains_get_avail_sz_from_pool(esw, POOL_NEXT_SIZE); if (!sz) return ERR_PTR(-ENOSPC); ft_attr.max_fte = sz; @@ -234,7 +250,7 @@ mlx5_esw_chains_create_fdb_table(struct mlx5_eswitch *esw, } ft_attr.autogroup.num_reserved_entries = 2; - ft_attr.autogroup.max_num_groups = ESW_OFFLOADS_NUM_GROUPS; + ft_attr.autogroup.max_num_groups = esw->params.large_group_num; fdb = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); if (IS_ERR(fdb)) { esw_warn(esw->dev, @@ -255,6 +271,83 @@ mlx5_esw_chains_destroy_fdb_table(struct mlx5_eswitch *esw, mlx5_destroy_flow_table(fdb); } +static int +create_fdb_chain_restore(struct fdb_chain *fdb_chain) +{ + char modact[MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)]; + struct mlx5_eswitch *esw = fdb_chain->esw; + struct mlx5_modify_hdr *mod_hdr; + u32 index; + int err; + + if (fdb_chain->chain == mlx5_esw_chains_get_ft_chain(esw) || + !mlx5_esw_chains_prios_supported(esw)) + return 0; + + err = mapping_add(esw_chains_mapping(esw), &fdb_chain->chain, &index); + if (err) + return err; + if (index == MLX5_FS_DEFAULT_FLOW_TAG) { + /* we got the special default flow tag id, so we won't know + * if we actually marked the packet with the restore rule + * we create. + * + * This case isn't possible with MLX5_FS_DEFAULT_FLOW_TAG = 0. + */ + err = mapping_add(esw_chains_mapping(esw), + &fdb_chain->chain, &index); + mapping_remove(esw_chains_mapping(esw), + MLX5_FS_DEFAULT_FLOW_TAG); + if (err) + return err; + } + + fdb_chain->id = index; + + MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, modact, field, + mlx5e_tc_attr_to_reg_mappings[CHAIN_TO_REG].mfield); + MLX5_SET(set_action_in, modact, offset, + mlx5e_tc_attr_to_reg_mappings[CHAIN_TO_REG].moffset * 8); + MLX5_SET(set_action_in, modact, length, + mlx5e_tc_attr_to_reg_mappings[CHAIN_TO_REG].mlen * 8); + MLX5_SET(set_action_in, modact, data, fdb_chain->id); + mod_hdr = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_FDB, + 1, modact); + if (IS_ERR(mod_hdr)) { + err = PTR_ERR(mod_hdr); + goto err_mod_hdr; + } + fdb_chain->miss_modify_hdr = mod_hdr; + + fdb_chain->restore_rule = esw_add_restore_rule(esw, fdb_chain->id); + if (IS_ERR(fdb_chain->restore_rule)) { + err = PTR_ERR(fdb_chain->restore_rule); + goto err_rule; + } + + return 0; + +err_rule: + mlx5_modify_header_dealloc(esw->dev, fdb_chain->miss_modify_hdr); +err_mod_hdr: + /* Datapath can't find this mapping, so we can safely remove it */ + mapping_remove(esw_chains_mapping(esw), fdb_chain->id); + return err; +} + +static void destroy_fdb_chain_restore(struct fdb_chain *fdb_chain) +{ + struct mlx5_eswitch *esw = fdb_chain->esw; + + if (!fdb_chain->miss_modify_hdr) + return; + + mlx5_del_flow_rules(fdb_chain->restore_rule); + mlx5_modify_header_dealloc(esw->dev, fdb_chain->miss_modify_hdr); + mapping_remove(esw_chains_mapping(esw), fdb_chain->id); +} + static struct fdb_chain * mlx5_esw_chains_create_fdb_chain(struct mlx5_eswitch *esw, u32 chain) { @@ -269,6 +362,10 @@ mlx5_esw_chains_create_fdb_chain(struct mlx5_eswitch *esw, u32 chain) fdb_chain->chain = chain; INIT_LIST_HEAD(&fdb_chain->prios_list); + err = create_fdb_chain_restore(fdb_chain); + if (err) + goto err_restore; + err = rhashtable_insert_fast(&esw_chains_ht(esw), &fdb_chain->node, chain_params); if (err) @@ -277,6 +374,8 @@ mlx5_esw_chains_create_fdb_chain(struct mlx5_eswitch *esw, u32 chain) return fdb_chain; err_insert: + destroy_fdb_chain_restore(fdb_chain); +err_restore: kvfree(fdb_chain); return ERR_PTR(err); } @@ -288,6 +387,8 @@ mlx5_esw_chains_destroy_fdb_chain(struct fdb_chain *fdb_chain) rhashtable_remove_fast(&esw_chains_ht(esw), &fdb_chain->node, chain_params); + + destroy_fdb_chain_restore(fdb_chain); kvfree(fdb_chain); } @@ -310,10 +411,11 @@ mlx5_esw_chains_get_fdb_chain(struct mlx5_eswitch *esw, u32 chain) } static struct mlx5_flow_handle * -mlx5_esw_chains_add_miss_rule(struct mlx5_flow_table *fdb, +mlx5_esw_chains_add_miss_rule(struct fdb_chain *fdb_chain, + struct mlx5_flow_table *fdb, struct mlx5_flow_table *next_fdb) { - static const struct mlx5_flow_spec spec = {}; + struct mlx5_eswitch *esw = fdb_chain->esw; struct mlx5_flow_destination dest = {}; struct mlx5_flow_act act = {}; @@ -322,7 +424,13 @@ mlx5_esw_chains_add_miss_rule(struct mlx5_flow_table *fdb, dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest.ft = next_fdb; - return mlx5_add_flow_rules(fdb, &spec, &act, &dest, 1); + if (next_fdb == tc_end_fdb(esw) && + mlx5_esw_chains_prios_supported(esw)) { + act.modify_hdr = fdb_chain->miss_modify_hdr; + act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + } + + return mlx5_add_flow_rules(fdb, NULL, &act, &dest, 1); } static int @@ -345,7 +453,8 @@ mlx5_esw_chains_update_prio_prevs(struct fdb_prio *fdb_prio, list_for_each_entry_continue_reverse(pos, &fdb_chain->prios_list, list) { - miss_rules[n] = mlx5_esw_chains_add_miss_rule(pos->fdb, + miss_rules[n] = mlx5_esw_chains_add_miss_rule(fdb_chain, + pos->fdb, next_fdb); if (IS_ERR(miss_rules[n])) { err = PTR_ERR(miss_rules[n]); @@ -459,7 +568,7 @@ mlx5_esw_chains_create_fdb_prio(struct mlx5_eswitch *esw, } /* Add miss rule to next_fdb */ - miss_rule = mlx5_esw_chains_add_miss_rule(fdb, next_fdb); + miss_rule = mlx5_esw_chains_add_miss_rule(fdb_chain, fdb, next_fdb); if (IS_ERR(miss_rule)) { err = PTR_ERR(miss_rule); goto err_miss_rule; @@ -618,12 +727,44 @@ mlx5_esw_chains_get_tc_end_ft(struct mlx5_eswitch *esw) return tc_end_fdb(esw); } +struct mlx5_flow_table * +mlx5_esw_chains_create_global_table(struct mlx5_eswitch *esw) +{ + u32 chain, prio, level; + int err; + + if (!fdb_ignore_flow_level_supported(esw)) { + err = -EOPNOTSUPP; + + esw_warn(esw->dev, + "Couldn't create global flow table, ignore_flow_level not supported."); + goto err_ignore; + } + + chain = mlx5_esw_chains_get_chain_range(esw), + prio = mlx5_esw_chains_get_prio_range(esw); + level = mlx5_esw_chains_get_level_range(esw); + + return mlx5_esw_chains_create_fdb_table(esw, chain, prio, level); + +err_ignore: + return ERR_PTR(err); +} + +void +mlx5_esw_chains_destroy_global_table(struct mlx5_eswitch *esw, + struct mlx5_flow_table *ft) +{ + mlx5_esw_chains_destroy_fdb_table(esw, ft); +} + static int mlx5_esw_chains_init(struct mlx5_eswitch *esw) { struct mlx5_esw_chains_priv *chains_priv; struct mlx5_core_dev *dev = esw->dev; u32 max_flow_counter, fdb_max; + struct mapping_ctx *mapping; int err; chains_priv = kzalloc(sizeof(*chains_priv), GFP_KERNEL); @@ -637,7 +778,7 @@ mlx5_esw_chains_init(struct mlx5_eswitch *esw) esw_debug(dev, "Init esw offloads chains, max counters(%d), groups(%d), max flow table size(%d)\n", - max_flow_counter, ESW_OFFLOADS_NUM_GROUPS, fdb_max); + max_flow_counter, esw->params.large_group_num, fdb_max); mlx5_esw_chains_init_sz_pool(esw); @@ -645,6 +786,16 @@ mlx5_esw_chains_init(struct mlx5_eswitch *esw) esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) { esw->fdb_table.flags &= ~ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; esw_warn(dev, "Tc chains and priorities offload aren't supported, update firmware if needed\n"); + } else if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) { + esw->fdb_table.flags &= ~ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; + esw_warn(dev, "Tc chains and priorities offload aren't supported\n"); + } else if (!fdb_modify_header_fwd_to_table_supported(esw)) { + /* Disabled when ttl workaround is needed, e.g + * when ESWITCH_IPV4_TTL_MODIFY_ENABLE = true in mlxconfig + */ + esw_warn(dev, + "Tc chains and priorities offload aren't supported, check firmware version, or mlxconfig settings\n"); + esw->fdb_table.flags &= ~ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; } else { esw->fdb_table.flags |= ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; esw_info(dev, "Supported tc offload range - chains: %u, prios: %u\n", @@ -660,10 +811,20 @@ mlx5_esw_chains_init(struct mlx5_eswitch *esw) if (err) goto init_prios_ht_err; + mapping = mapping_create(sizeof(u32), esw_get_max_restore_tag(esw), + true); + if (IS_ERR(mapping)) { + err = PTR_ERR(mapping); + goto mapping_err; + } + esw_chains_mapping(esw) = mapping; + mutex_init(&esw_chains_lock(esw)); return 0; +mapping_err: + rhashtable_destroy(&esw_prios_ht(esw)); init_prios_ht_err: rhashtable_destroy(&esw_chains_ht(esw)); init_chains_ht_err: @@ -675,6 +836,7 @@ static void mlx5_esw_chains_cleanup(struct mlx5_eswitch *esw) { mutex_destroy(&esw_chains_lock(esw)); + mapping_destroy(esw_chains_mapping(esw)); rhashtable_destroy(&esw_prios_ht(esw)); rhashtable_destroy(&esw_chains_ht(esw)); @@ -704,12 +866,9 @@ mlx5_esw_chains_open(struct mlx5_eswitch *esw) /* Open level 1 for split rules now if prios isn't supported */ if (!mlx5_esw_chains_prios_supported(esw)) { - ft = mlx5_esw_chains_get_table(esw, 0, 1, 1); - - if (IS_ERR(ft)) { - err = PTR_ERR(ft); + err = mlx5_esw_vport_tbl_get(esw); + if (err) goto level_1_err; - } } return 0; @@ -725,7 +884,7 @@ static void mlx5_esw_chains_close(struct mlx5_eswitch *esw) { if (!mlx5_esw_chains_prios_supported(esw)) - mlx5_esw_chains_put_table(esw, 0, 1, 1); + mlx5_esw_vport_tbl_put(esw); mlx5_esw_chains_put_table(esw, 0, 1, 0); mlx5_esw_chains_put_table(esw, mlx5_esw_chains_get_ft_chain(esw), 1, 0); } @@ -756,3 +915,30 @@ mlx5_esw_chains_destroy(struct mlx5_eswitch *esw) mlx5_esw_chains_close(esw); mlx5_esw_chains_cleanup(esw); } + +int +mlx5_esw_chains_get_chain_mapping(struct mlx5_eswitch *esw, u32 chain, + u32 *chain_mapping) +{ + return mapping_add(esw_chains_mapping(esw), &chain, chain_mapping); +} + +int +mlx5_esw_chains_put_chain_mapping(struct mlx5_eswitch *esw, u32 chain_mapping) +{ + return mapping_remove(esw_chains_mapping(esw), chain_mapping); +} + +int mlx5_eswitch_get_chain_for_tag(struct mlx5_eswitch *esw, u32 tag, + u32 *chain) +{ + int err; + + err = mapping_find(esw_chains_mapping(esw), tag, chain); + if (err) { + esw_warn(esw->dev, "Can't find chain for tag: %d\n", tag); + return -ENOENT; + } + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.h index 2e13097fe348..f8c4239846ea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.h @@ -4,8 +4,12 @@ #ifndef __ML5_ESW_CHAINS_H__ #define __ML5_ESW_CHAINS_H__ +#include "eswitch.h" + bool mlx5_esw_chains_prios_supported(struct mlx5_eswitch *esw); +bool +mlx5_esw_chains_backwards_supported(struct mlx5_eswitch *esw); u32 mlx5_esw_chains_get_prio_range(struct mlx5_eswitch *esw); u32 @@ -23,8 +27,23 @@ mlx5_esw_chains_put_table(struct mlx5_eswitch *esw, u32 chain, u32 prio, struct mlx5_flow_table * mlx5_esw_chains_get_tc_end_ft(struct mlx5_eswitch *esw); +struct mlx5_flow_table * +mlx5_esw_chains_create_global_table(struct mlx5_eswitch *esw); +void +mlx5_esw_chains_destroy_global_table(struct mlx5_eswitch *esw, + struct mlx5_flow_table *ft); + +int +mlx5_esw_chains_get_chain_mapping(struct mlx5_eswitch *esw, u32 chain, + u32 *chain_mapping); +int +mlx5_esw_chains_put_chain_mapping(struct mlx5_eswitch *esw, + u32 chain_mapping); + int mlx5_esw_chains_create(struct mlx5_eswitch *esw); void mlx5_esw_chains_destroy(struct mlx5_eswitch *esw); -#endif /* __ML5_ESW_CHAINS_H__ */ +int +mlx5_eswitch_get_chain_for_tag(struct mlx5_eswitch *esw, u32 tag, u32 *chain); +#endif /* __ML5_ESW_CHAINS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index e49acd0c5da5..7f618a443bfd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -39,6 +39,7 @@ #include "lib/eq.h" #include "eswitch.h" #include "fs_core.h" +#include "devlink.h" #include "ecpf.h" enum { @@ -1333,7 +1334,6 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, goto out; } - memset(spec, 0, sizeof(*spec)); flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; /* Attach drop flow counter */ @@ -1345,7 +1345,7 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, dest_num++; } vport->ingress.legacy.drop_rule = - mlx5_add_flow_rules(vport->ingress.acl, spec, + mlx5_add_flow_rules(vport->ingress.acl, NULL, &flow_act, dst, dest_num); if (IS_ERR(vport->ingress.legacy.drop_rule)) { err = PTR_ERR(vport->ingress.legacy.drop_rule); @@ -1408,7 +1408,6 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, struct mlx5_flow_destination drop_ctr_dst = {0}; struct mlx5_flow_destination *dst = NULL; struct mlx5_flow_act flow_act = {0}; - struct mlx5_flow_spec *spec; int dest_num = 0; int err = 0; @@ -1437,11 +1436,6 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, if (err) return err; - /* Drop others rule (star rule) */ - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - if (!spec) - goto out; - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; /* Attach egress drop flow counter */ @@ -1453,7 +1447,7 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, dest_num++; } vport->egress.legacy.drop_rule = - mlx5_add_flow_rules(vport->egress.acl, spec, + mlx5_add_flow_rules(vport->egress.acl, NULL, &flow_act, dst, dest_num); if (IS_ERR(vport->egress.legacy.drop_rule)) { err = PTR_ERR(vport->egress.legacy.drop_rule); @@ -1462,8 +1456,7 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, vport->vport, err); vport->egress.legacy.drop_rule = NULL; } -out: - kvfree(spec); + return err; } @@ -1669,34 +1662,6 @@ static void node_guid_gen_from_mac(u64 *node_guid, u8 mac[ETH_ALEN]) ((u8 *)node_guid)[0] = mac[5]; } -static void esw_apply_vport_conf(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) -{ - u16 vport_num = vport->vport; - int flags; - - if (mlx5_esw_is_manager_vport(esw, vport_num)) - return; - - mlx5_modify_vport_admin_state(esw->dev, - MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, - vport_num, 1, - vport->info.link_state); - - /* Host PF has its own mac/guid. */ - if (vport_num) { - mlx5_modify_nic_vport_mac_address(esw->dev, vport_num, - vport->info.mac); - mlx5_modify_nic_vport_node_guid(esw->dev, vport_num, - vport->info.node_guid); - } - - flags = (vport->info.vlan || vport->info.qos) ? - SET_VLAN_STRIP | SET_VLAN_INSERT : 0; - modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, vport->info.qos, - flags); -} - static int esw_vport_create_legacy_acl_tables(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { @@ -1706,8 +1671,7 @@ static int esw_vport_create_legacy_acl_tables(struct mlx5_eswitch *esw, if (mlx5_esw_is_manager_vport(esw, vport->vport)) return 0; - if (!mlx5_esw_is_manager_vport(esw, vport->vport) && - MLX5_CAP_ESW_INGRESS_ACL(esw->dev, flow_counter)) { + if (MLX5_CAP_ESW_INGRESS_ACL(esw->dev, flow_counter)) { vport->ingress.legacy.drop_counter = mlx5_fc_create(esw->dev, false); if (IS_ERR(vport->ingress.legacy.drop_counter)) { esw_warn(esw->dev, @@ -1721,8 +1685,7 @@ static int esw_vport_create_legacy_acl_tables(struct mlx5_eswitch *esw, if (ret) goto ingress_err; - if (!mlx5_esw_is_manager_vport(esw, vport->vport) && - MLX5_CAP_ESW_EGRESS_ACL(esw->dev, flow_counter)) { + if (MLX5_CAP_ESW_EGRESS_ACL(esw->dev, flow_counter)) { vport->egress.legacy.drop_counter = mlx5_fc_create(esw->dev, false); if (IS_ERR(vport->egress.legacy.drop_counter)) { esw_warn(esw->dev, @@ -1783,29 +1746,75 @@ static void esw_vport_cleanup_acl(struct mlx5_eswitch *esw, esw_vport_destroy_offloads_acl_tables(esw, vport); } -static int esw_enable_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport, - enum mlx5_eswitch_vport_event enabled_events) +static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + u16 vport_num = vport->vport; + int flags; + int err; + + err = esw_vport_setup_acl(esw, vport); + if (err) + return err; + + /* Attach vport to the eswitch rate limiter */ + esw_vport_enable_qos(esw, vport, vport->info.max_rate, vport->qos.bw_share); + + if (mlx5_esw_is_manager_vport(esw, vport_num)) + return 0; + + mlx5_modify_vport_admin_state(esw->dev, + MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, + vport_num, 1, + vport->info.link_state); + + /* Host PF has its own mac/guid. */ + if (vport_num) { + mlx5_modify_nic_vport_mac_address(esw->dev, vport_num, + vport->info.mac); + mlx5_modify_nic_vport_node_guid(esw->dev, vport_num, + vport->info.node_guid); + } + + flags = (vport->info.vlan || vport->info.qos) ? + SET_VLAN_STRIP | SET_VLAN_INSERT : 0; + modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, + vport->info.qos, flags); + + return 0; +} + +/* Don't cleanup vport->info, it's needed to restore vport configuration */ +static void esw_vport_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { u16 vport_num = vport->vport; + + if (!mlx5_esw_is_manager_vport(esw, vport_num)) + mlx5_modify_vport_admin_state(esw->dev, + MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, + vport_num, 1, + MLX5_VPORT_ADMIN_STATE_DOWN); + + esw_vport_disable_qos(esw, vport); + esw_vport_cleanup_acl(esw, vport); +} + +static int esw_enable_vport(struct mlx5_eswitch *esw, u16 vport_num, + enum mlx5_eswitch_vport_event enabled_events) +{ + struct mlx5_vport *vport; int ret; + vport = mlx5_eswitch_get_vport(esw, vport_num); + mutex_lock(&esw->state_lock); WARN_ON(vport->enabled); esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num); - /* Restore old vport configuration */ - esw_apply_vport_conf(esw, vport); - - ret = esw_vport_setup_acl(esw, vport); + ret = esw_vport_setup(esw, vport); if (ret) goto done; - /* Attach vport to the eswitch rate limiter */ - if (esw_vport_enable_qos(esw, vport, vport->info.max_rate, - vport->qos.bw_share)) - esw_warn(esw->dev, "Failed to attach vport %d to eswitch rate limiter", vport_num); - /* Sync with current vport context */ vport->enabled_events = enabled_events; vport->enabled = true; @@ -1826,10 +1835,11 @@ done: return ret; } -static void esw_disable_vport(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) +static void esw_disable_vport(struct mlx5_eswitch *esw, u16 vport_num) { - u16 vport_num = vport->vport; + struct mlx5_vport *vport; + + vport = mlx5_eswitch_get_vport(esw, vport_num); mutex_lock(&esw->state_lock); if (!vport->enabled) @@ -1847,16 +1857,7 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, */ esw_vport_change_handle_locked(vport); vport->enabled_events = 0; - esw_vport_disable_qos(esw, vport); - - if (!mlx5_esw_is_manager_vport(esw, vport->vport) && - esw->mode == MLX5_ESWITCH_LEGACY) - mlx5_modify_vport_admin_state(esw->dev, - MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, - vport_num, 1, - MLX5_VPORT_ADMIN_STATE_DOWN); - - esw_vport_cleanup_acl(esw, vport); + esw_vport_cleanup(esw, vport); esw->enabled_vports--; done: @@ -1944,6 +1945,59 @@ static void mlx5_eswitch_clear_vf_vports_info(struct mlx5_eswitch *esw) /* Public E-Switch API */ #define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev)) +int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num, + enum mlx5_eswitch_vport_event enabled_events) +{ + int err; + + err = esw_enable_vport(esw, vport_num, enabled_events); + if (err) + return err; + + err = esw_offloads_load_rep(esw, vport_num); + if (err) + goto err_rep; + + return err; + +err_rep: + esw_disable_vport(esw, vport_num); + return err; +} + +void mlx5_eswitch_unload_vport(struct mlx5_eswitch *esw, u16 vport_num) +{ + esw_offloads_unload_rep(esw, vport_num); + esw_disable_vport(esw, vport_num); +} + +void mlx5_eswitch_unload_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs) +{ + int i; + + mlx5_esw_for_each_vf_vport_num_reverse(esw, i, num_vfs) + mlx5_eswitch_unload_vport(esw, i); +} + +int mlx5_eswitch_load_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs, + enum mlx5_eswitch_vport_event enabled_events) +{ + int err; + int i; + + mlx5_esw_for_each_vf_vport_num(esw, i, num_vfs) { + err = mlx5_eswitch_load_vport(esw, i, enabled_events); + if (err) + goto vf_err; + } + + return 0; + +vf_err: + mlx5_eswitch_unload_vf_vports(esw, i - 1); + return err; +} + /* mlx5_eswitch_enable_pf_vf_vports() enables vports of PF, ECPF and VFs * whichever are present on the eswitch. */ @@ -1951,46 +2005,33 @@ int mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw, enum mlx5_eswitch_vport_event enabled_events) { - struct mlx5_vport *vport; - int num_vfs; int ret; - int i; /* Enable PF vport */ - vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); - ret = esw_enable_vport(esw, vport, enabled_events); + ret = mlx5_eswitch_load_vport(esw, MLX5_VPORT_PF, enabled_events); if (ret) return ret; /* Enable ECPF vport */ if (mlx5_ecpf_vport_exists(esw->dev)) { - vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF); - ret = esw_enable_vport(esw, vport, enabled_events); + ret = mlx5_eswitch_load_vport(esw, MLX5_VPORT_ECPF, enabled_events); if (ret) goto ecpf_err; } /* Enable VF vports */ - mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) { - ret = esw_enable_vport(esw, vport, enabled_events); - if (ret) - goto vf_err; - } + ret = mlx5_eswitch_load_vf_vports(esw, esw->esw_funcs.num_vfs, + enabled_events); + if (ret) + goto vf_err; return 0; vf_err: - num_vfs = i - 1; - mlx5_esw_for_each_vf_vport_reverse(esw, i, vport, num_vfs) - esw_disable_vport(esw, vport); - - if (mlx5_ecpf_vport_exists(esw->dev)) { - vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF); - esw_disable_vport(esw, vport); - } + if (mlx5_ecpf_vport_exists(esw->dev)) + mlx5_eswitch_unload_vport(esw, MLX5_VPORT_ECPF); ecpf_err: - vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); - esw_disable_vport(esw, vport); + mlx5_eswitch_unload_vport(esw, MLX5_VPORT_PF); return ret; } @@ -1999,19 +2040,81 @@ ecpf_err: */ void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw) { - struct mlx5_vport *vport; - int i; + mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs); - mlx5_esw_for_all_vports_reverse(esw, i, vport) - esw_disable_vport(esw, vport); + if (mlx5_ecpf_vport_exists(esw->dev)) + mlx5_eswitch_unload_vport(esw, MLX5_VPORT_ECPF); + + mlx5_eswitch_unload_vport(esw, MLX5_VPORT_PF); } -int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode) +static void mlx5_eswitch_get_devlink_param(struct mlx5_eswitch *esw) { + struct devlink *devlink = priv_to_devlink(esw->dev); + union devlink_param_value val; int err; - if (!ESW_ALLOWED(esw) || - !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) { + err = devlink_param_driverinit_value_get(devlink, + MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM, + &val); + if (!err) { + esw->params.large_group_num = val.vu32; + } else { + esw_warn(esw->dev, + "Devlink can't get param fdb_large_groups, uses default (%d).\n", + ESW_OFFLOADS_DEFAULT_NUM_GROUPS); + esw->params.large_group_num = ESW_OFFLOADS_DEFAULT_NUM_GROUPS; + } +} + +static void +mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, int num_vfs) +{ + const u32 *out; + + WARN_ON_ONCE(esw->mode != MLX5_ESWITCH_NONE); + + if (num_vfs < 0) + return; + + if (!mlx5_core_is_ecpf_esw_manager(esw->dev)) { + esw->esw_funcs.num_vfs = num_vfs; + return; + } + + out = mlx5_esw_query_functions(esw->dev); + if (IS_ERR(out)) + return; + + esw->esw_funcs.num_vfs = MLX5_GET(query_esw_functions_out, out, + host_params_context.host_num_of_vfs); + kvfree(out); +} + +/** + * mlx5_eswitch_enable_locked - Enable eswitch + * @esw: Pointer to eswitch + * @mode: Eswitch mode to enable + * @num_vfs: Enable eswitch for given number of VFs. This is optional. + * Valid value are 0, > 0 and MLX5_ESWITCH_IGNORE_NUM_VFS. + * Caller should pass num_vfs > 0 when enabling eswitch for + * vf vports. Caller should pass num_vfs = 0, when eswitch + * is enabled without sriov VFs or when caller + * is unaware of the sriov state of the host PF on ECPF based + * eswitch. Caller should pass < 0 when num_vfs should be + * completely ignored. This is typically the case when eswitch + * is enabled without sriov regardless of PF/ECPF system. + * mlx5_eswitch_enable_locked() Enables eswitch in either legacy or offloads + * mode. If num_vfs >=0 is provided, it setup VF related eswitch vports. + * It returns 0 on success or error code on failure. + */ +int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int mode, int num_vfs) +{ + int err; + + lockdep_assert_held(&esw->mode_lock); + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) { esw_warn(esw->dev, "FDB is not supported, aborting ...\n"); return -EOPNOTSUPP; } @@ -2022,6 +2125,10 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode) if (!MLX5_CAP_ESW_EGRESS_ACL(esw->dev, ft_support)) esw_warn(esw->dev, "engress ACL is not supported by FW\n"); + mlx5_eswitch_get_devlink_param(esw); + + mlx5_eswitch_update_num_of_vfs(esw, num_vfs); + esw_create_tsar(esw); esw->mode = mode; @@ -2058,11 +2165,34 @@ abort: return err; } -void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf) +/** + * mlx5_eswitch_enable - Enable eswitch + * @esw: Pointer to eswitch + * @num_vfs: Enable eswitch swich for given number of VFs. + * Caller must pass num_vfs > 0 when enabling eswitch for + * vf vports. + * mlx5_eswitch_enable() returns 0 on success or error code on failure. + */ +int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) +{ + int ret; + + if (!ESW_ALLOWED(esw)) + return 0; + + mutex_lock(&esw->mode_lock); + ret = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY, num_vfs); + mutex_unlock(&esw->mode_lock); + return ret; +} + +void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw, bool clear_vf) { int old_mode; - if (!ESW_ALLOWED(esw) || esw->mode == MLX5_ESWITCH_NONE) + lockdep_assert_held_write(&esw->mode_lock); + + if (esw->mode == MLX5_ESWITCH_NONE) return; esw_info(esw->dev, "Disable: mode(%s), nvfs(%d), active vports(%d)\n", @@ -2091,6 +2221,16 @@ void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf) mlx5_eswitch_clear_vf_vports_info(esw); } +void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf) +{ + if (!ESW_ALLOWED(esw)) + return; + + mutex_lock(&esw->mode_lock); + mlx5_eswitch_disable_locked(esw, clear_vf); + mutex_unlock(&esw->mode_lock); +} + int mlx5_eswitch_init(struct mlx5_core_dev *dev) { struct mlx5_eswitch *esw; @@ -2142,6 +2282,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) hash_init(esw->offloads.mod_hdr.hlist); atomic64_set(&esw->offloads.num_flows, 0); mutex_init(&esw->state_lock); + mutex_init(&esw->mode_lock); mlx5_esw_for_all_vports(esw, i, vport) { vport->vport = mlx5_eswitch_index_to_vport_num(esw, i); @@ -2176,6 +2317,8 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) esw->dev->priv.eswitch = NULL; destroy_workqueue(esw->work_queue); esw_offloads_cleanup_reps(esw); + mutex_destroy(&esw->mode_lock); + mutex_destroy(&esw->state_lock); mutex_destroy(&esw->offloads.mod_hdr.lock); mutex_destroy(&esw->offloads.encap_tbl_lock); kfree(esw->vports); @@ -2410,12 +2553,11 @@ static int _mlx5_eswitch_set_vepa_locked(struct mlx5_eswitch *esw, } /* Star rule to forward all traffic to uplink vport */ - memset(spec, 0, sizeof(*spec)); memset(&dest, 0, sizeof(dest)); dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; dest.vport.num = MLX5_VPORT_UPLINK; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - flow_rule = mlx5_add_flow_rules(esw->fdb_table.legacy.vepa_fdb, spec, + flow_rule = mlx5_add_flow_rules(esw->fdb_table.legacy.vepa_fdb, NULL, &flow_act, &dest, 1); if (IS_ERR(flow_rule)) { err = PTR_ERR(flow_rule); @@ -2600,9 +2742,13 @@ static int mlx5_eswitch_query_vport_drop_stats(struct mlx5_core_dev *dev, u64 bytes = 0; int err = 0; - if (!vport->enabled || esw->mode != MLX5_ESWITCH_LEGACY) + if (esw->mode != MLX5_ESWITCH_LEGACY) return 0; + mutex_lock(&esw->state_lock); + if (!vport->enabled) + goto unlock; + if (vport->egress.legacy.drop_counter) mlx5_fc_query(dev, vport->egress.legacy.drop_counter, &stats->rx_dropped, &bytes); @@ -2613,20 +2759,22 @@ static int mlx5_eswitch_query_vport_drop_stats(struct mlx5_core_dev *dev, if (!MLX5_CAP_GEN(dev, receive_discard_vport_down) && !MLX5_CAP_GEN(dev, transmit_discard_vport_down)) - return 0; + goto unlock; err = mlx5_query_vport_down_stats(dev, vport->vport, 1, &rx_discard_vport_down, &tx_discard_vport_down); if (err) - return err; + goto unlock; if (MLX5_CAP_GEN(dev, receive_discard_vport_down)) stats->rx_dropped += rx_discard_vport_down; if (MLX5_CAP_GEN(dev, transmit_discard_vport_down)) stats->tx_dropped += tx_discard_vport_down; - return 0; +unlock: + mutex_unlock(&esw->state_lock); + return err; } int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, @@ -2742,22 +2890,4 @@ bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0, dev1->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS); } -void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs) -{ - const u32 *out; - - WARN_ON_ONCE(esw->mode != MLX5_ESWITCH_NONE); - - if (!mlx5_core_is_ecpf_esw_manager(esw->dev)) { - esw->esw_funcs.num_vfs = num_vfs; - return; - } - - out = mlx5_esw_query_functions(esw->dev); - if (IS_ERR(out)) - return; - esw->esw_funcs.num_vfs = MLX5_GET(query_esw_functions_out, out, - host_params_context.host_num_of_vfs); - kvfree(out); -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 4472710ccc9c..39f42f985fbd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -42,6 +42,7 @@ #include <linux/mlx5/vport.h> #include <linux/mlx5/fs.h> #include "lib/mpfs.h" +#include "en/tc_ct.h" #define FDB_TC_MAX_CHAIN 3 #define FDB_FT_CHAIN (FDB_TC_MAX_CHAIN + 1) @@ -55,6 +56,8 @@ #ifdef CONFIG_MLX5_ESWITCH +#define ESW_OFFLOADS_DEFAULT_NUM_GROUPS 15 + #define MLX5_MAX_UC_PER_VPORT(dev) \ (1 << MLX5_CAP_GEN(dev, log_max_current_uc_list)) @@ -183,12 +186,22 @@ struct mlx5_eswitch_fdb { int vlan_push_pop_refcount; struct mlx5_esw_chains_priv *esw_chains_priv; + struct { + DECLARE_HASHTABLE(table, 8); + /* Protects vports.table */ + struct mutex lock; + } vports; + } offloads; }; u32 flags; }; struct mlx5_esw_offload { + struct mlx5_flow_table *ft_offloads_restore; + struct mlx5_flow_group *restore_group; + struct mlx5_modify_hdr *restore_copy_hdr_id; + struct mlx5_flow_table *ft_offloads; struct mlx5_flow_group *vport_rx_group; struct mlx5_eswitch_rep *vport_reps; @@ -224,6 +237,7 @@ struct mlx5_esw_functions { enum { MLX5_ESWITCH_VPORT_MATCH_METADATA = BIT(0), + MLX5_ESWITCH_REG_C1_LOOPBACK_ENABLED = BIT(1), }; struct mlx5_eswitch { @@ -244,6 +258,11 @@ struct mlx5_eswitch { */ struct mutex state_lock; + /* Protects eswitch mode change that occurs via one or more + * user commands, i.e. sriov state change, devlink commands. + */ + struct mutex mode_lock; + struct { bool enabled; u32 root_tsar_id; @@ -255,6 +274,9 @@ struct mlx5_eswitch { u16 manager_vport; u16 first_host_vport; struct mlx5_esw_functions esw_funcs; + struct { + u32 large_group_num; + } params; }; void esw_offloads_disable(struct mlx5_eswitch *esw); @@ -279,7 +301,11 @@ int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, /* E-Switch API */ int mlx5_eswitch_init(struct mlx5_core_dev *dev); void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); -int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode); + +#define MLX5_ESWITCH_IGNORE_NUM_VFS (-1) +int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int mode, int num_vfs); +int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs); +void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw, bool clear_vf); void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf); int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, u16 vport, u8 mac[ETH_ALEN]); @@ -315,6 +341,7 @@ struct mlx5_termtbl_handle; bool mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw, + struct mlx5_esw_flow_attr *attr, struct mlx5_flow_act *flow_act, struct mlx5_flow_spec *spec); @@ -375,6 +402,8 @@ enum { enum { MLX5_ESW_ATTR_FLAG_VLAN_HANDLED = BIT(0), MLX5_ESW_ATTR_FLAG_SLOW_PATH = BIT(1), + MLX5_ESW_ATTR_FLAG_NO_IN_PORT = BIT(2), + MLX5_ESW_ATTR_FLAG_HAIRPIN = BIT(3), }; struct mlx5_esw_flow_attr { @@ -405,6 +434,9 @@ struct mlx5_esw_flow_attr { u16 prio; u32 dest_chain; u32 flags; + struct mlx5_flow_table *fdb; + struct mlx5_flow_table *dest_ft; + struct mlx5_ct_attr ct_attr; struct mlx5e_tc_flow_parse_attr *parse_attr; }; @@ -414,7 +446,6 @@ int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode); int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, struct netlink_ext_ack *extack); int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode); -int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, u8 *mode); int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, enum devlink_eswitch_encap_mode encap, struct netlink_ext_ack *extack); @@ -433,6 +464,11 @@ int mlx5_esw_create_vport_egress_acl_vlan(struct mlx5_eswitch *esw, struct mlx5_vport *vport, u16 vlan_id, u32 flow_action); +static inline bool mlx5_esw_qos_enabled(struct mlx5_eswitch *esw) +{ + return esw->qos.enabled; +} + static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev, u8 vlan_depth) { @@ -608,7 +644,6 @@ mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num); bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num); -void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs); int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type, void *data); int @@ -623,11 +658,30 @@ void esw_vport_destroy_offloads_acl_tables(struct mlx5_eswitch *esw, struct mlx5_vport *vport); +int mlx5_esw_vport_tbl_get(struct mlx5_eswitch *esw); +void mlx5_esw_vport_tbl_put(struct mlx5_eswitch *esw); + +struct mlx5_flow_handle * +esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag); +u32 +esw_get_max_restore_tag(struct mlx5_eswitch *esw); + +int esw_offloads_load_rep(struct mlx5_eswitch *esw, u16 vport_num); +void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num); + +int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num, + enum mlx5_eswitch_vport_event enabled_events); +void mlx5_eswitch_unload_vport(struct mlx5_eswitch *esw, u16 vport_num); + +int mlx5_eswitch_load_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs, + enum mlx5_eswitch_vport_event enabled_events); +void mlx5_eswitch_unload_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs); + #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {} -static inline int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode) { return 0; } +static inline int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) { return 0; } static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf) {} static inline bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { return true; } static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; } @@ -636,8 +690,11 @@ static inline const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev) return ERR_PTR(-EOPNOTSUPP); } -static inline void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs) {} - +static inline struct mlx5_flow_handle * +esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag) +{ + return ERR_PTR(-EOPNOTSUPP); +} #endif /* CONFIG_MLX5_ESWITCH */ #endif /* __MLX5_ESWITCH_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 1a57b2bd74b8..f171eb2234b0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -37,7 +37,7 @@ #include <linux/mlx5/fs.h> #include "mlx5_core.h" #include "eswitch.h" -#include "eswitch_offloads_chains.h" +#include "esw/chains.h" #include "rdma.h" #include "en.h" #include "fs_core.h" @@ -50,6 +50,181 @@ #define MLX5_ESW_MISS_FLOWS (2) #define UPLINK_REP_INDEX 0 +/* Per vport tables */ + +#define MLX5_ESW_VPORT_TABLE_SIZE 128 + +/* This struct is used as a key to the hash table and we need it to be packed + * so hash result is consistent + */ +struct mlx5_vport_key { + u32 chain; + u16 prio; + u16 vport; + u16 vhca_id; +} __packed; + +struct mlx5_vport_table { + struct hlist_node hlist; + struct mlx5_flow_table *fdb; + u32 num_rules; + struct mlx5_vport_key key; +}; + +#define MLX5_ESW_VPORT_TBL_NUM_GROUPS 4 + +static struct mlx5_flow_table * +esw_vport_tbl_create(struct mlx5_eswitch *esw, struct mlx5_flow_namespace *ns) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_table *fdb; + + ft_attr.autogroup.max_num_groups = MLX5_ESW_VPORT_TBL_NUM_GROUPS; + ft_attr.max_fte = MLX5_ESW_VPORT_TABLE_SIZE; + ft_attr.prio = FDB_PER_VPORT; + fdb = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); + if (IS_ERR(fdb)) { + esw_warn(esw->dev, "Failed to create per vport FDB Table err %ld\n", + PTR_ERR(fdb)); + } + + return fdb; +} + +static u32 flow_attr_to_vport_key(struct mlx5_eswitch *esw, + struct mlx5_esw_flow_attr *attr, + struct mlx5_vport_key *key) +{ + key->vport = attr->in_rep->vport; + key->chain = attr->chain; + key->prio = attr->prio; + key->vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id); + return jhash(key, sizeof(*key), 0); +} + +/* caller must hold vports.lock */ +static struct mlx5_vport_table * +esw_vport_tbl_lookup(struct mlx5_eswitch *esw, struct mlx5_vport_key *skey, u32 key) +{ + struct mlx5_vport_table *e; + + hash_for_each_possible(esw->fdb_table.offloads.vports.table, e, hlist, key) + if (!memcmp(&e->key, skey, sizeof(*skey))) + return e; + + return NULL; +} + +static void +esw_vport_tbl_put(struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *attr) +{ + struct mlx5_vport_table *e; + struct mlx5_vport_key key; + u32 hkey; + + mutex_lock(&esw->fdb_table.offloads.vports.lock); + hkey = flow_attr_to_vport_key(esw, attr, &key); + e = esw_vport_tbl_lookup(esw, &key, hkey); + if (!e || --e->num_rules) + goto out; + + hash_del(&e->hlist); + mlx5_destroy_flow_table(e->fdb); + kfree(e); +out: + mutex_unlock(&esw->fdb_table.offloads.vports.lock); +} + +static struct mlx5_flow_table * +esw_vport_tbl_get(struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *attr) +{ + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table *fdb; + struct mlx5_vport_table *e; + struct mlx5_vport_key skey; + u32 hkey; + + mutex_lock(&esw->fdb_table.offloads.vports.lock); + hkey = flow_attr_to_vport_key(esw, attr, &skey); + e = esw_vport_tbl_lookup(esw, &skey, hkey); + if (e) { + e->num_rules++; + goto out; + } + + e = kzalloc(sizeof(*e), GFP_KERNEL); + if (!e) { + fdb = ERR_PTR(-ENOMEM); + goto err_alloc; + } + + ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); + if (!ns) { + esw_warn(dev, "Failed to get FDB namespace\n"); + fdb = ERR_PTR(-ENOENT); + goto err_ns; + } + + fdb = esw_vport_tbl_create(esw, ns); + if (IS_ERR(fdb)) + goto err_ns; + + e->fdb = fdb; + e->num_rules = 1; + e->key = skey; + hash_add(esw->fdb_table.offloads.vports.table, &e->hlist, hkey); +out: + mutex_unlock(&esw->fdb_table.offloads.vports.lock); + return e->fdb; + +err_ns: + kfree(e); +err_alloc: + mutex_unlock(&esw->fdb_table.offloads.vports.lock); + return fdb; +} + +int mlx5_esw_vport_tbl_get(struct mlx5_eswitch *esw) +{ + struct mlx5_esw_flow_attr attr = {}; + struct mlx5_eswitch_rep rep = {}; + struct mlx5_flow_table *fdb; + struct mlx5_vport *vport; + int i; + + attr.prio = 1; + attr.in_rep = &rep; + mlx5_esw_for_all_vports(esw, i, vport) { + attr.in_rep->vport = vport->vport; + fdb = esw_vport_tbl_get(esw, &attr); + if (IS_ERR(fdb)) + goto out; + } + return 0; + +out: + mlx5_esw_vport_tbl_put(esw); + return PTR_ERR(fdb); +} + +void mlx5_esw_vport_tbl_put(struct mlx5_eswitch *esw) +{ + struct mlx5_esw_flow_attr attr = {}; + struct mlx5_eswitch_rep rep = {}; + struct mlx5_vport *vport; + int i; + + attr.prio = 1; + attr.in_rep = &rep; + mlx5_esw_for_all_vports(esw, i, vport) { + attr.in_rep->vport = vport->vport; + esw_vport_tbl_put(esw, &attr); + } +} + +/* End: Per vport tables */ + static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw, u16 vport_num) { @@ -85,7 +260,8 @@ mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw, attr->in_rep->vport)); misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); - MLX5_SET_TO_ONES(fte_match_set_misc2, misc2, metadata_reg_c_0); + MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); @@ -124,6 +300,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, bool split = !!(attr->split_count); struct mlx5_flow_handle *rule; struct mlx5_flow_table *fdb; + bool hairpin = false; int j, i = 0; if (esw->mode != MLX5_ESWITCH_OFFLOADS) @@ -148,7 +325,12 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { struct mlx5_flow_table *ft; - if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) { + if (attr->dest_ft) { + flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[i].ft = attr->dest_ft; + i++; + } else if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) { flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest[i].ft = mlx5_esw_chains_get_tc_end_ft(esw); @@ -191,8 +373,6 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, i++; } - mlx5_eswitch_set_rule_source_port(esw, spec, attr); - if (attr->outer_match_level != MLX5_MATCH_NONE) spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; if (attr->inner_match_level != MLX5_MATCH_NONE) @@ -201,27 +381,45 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) flow_act.modify_hdr = attr->modify_hdr; - fdb = mlx5_esw_chains_get_table(esw, attr->chain, attr->prio, - !!split); + if (split) { + fdb = esw_vport_tbl_get(esw, attr); + } else { + if (attr->chain || attr->prio) + fdb = mlx5_esw_chains_get_table(esw, attr->chain, + attr->prio, 0); + else + fdb = attr->fdb; + + if (!(attr->flags & MLX5_ESW_ATTR_FLAG_NO_IN_PORT)) + mlx5_eswitch_set_rule_source_port(esw, spec, attr); + } if (IS_ERR(fdb)) { rule = ERR_CAST(fdb); goto err_esw_get; } - if (mlx5_eswitch_termtbl_required(esw, &flow_act, spec)) + if (mlx5_eswitch_termtbl_required(esw, attr, &flow_act, spec)) { rule = mlx5_eswitch_add_termtbl_rule(esw, fdb, spec, attr, &flow_act, dest, i); - else + hairpin = true; + } else { rule = mlx5_add_flow_rules(fdb, spec, &flow_act, dest, i); + } if (IS_ERR(rule)) goto err_add_rule; else atomic64_inc(&esw->offloads.num_flows); + if (hairpin) + attr->flags |= MLX5_ESW_ATTR_FLAG_HAIRPIN; + return rule; err_add_rule: - mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, !!split); + if (split) + esw_vport_tbl_put(esw, attr); + else if (attr->chain || attr->prio) + mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, 0); err_esw_get: if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) && attr->dest_chain) mlx5_esw_chains_put_table(esw, attr->dest_chain, 1, 0); @@ -247,7 +445,7 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, goto err_get_fast; } - fwd_fdb = mlx5_esw_chains_get_table(esw, attr->chain, attr->prio, 1); + fwd_fdb = esw_vport_tbl_get(esw, attr); if (IS_ERR(fwd_fdb)) { rule = ERR_CAST(fwd_fdb); goto err_get_fwd; @@ -285,7 +483,7 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, return rule; add_err: - mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, 1); + esw_vport_tbl_put(esw, attr); err_get_fwd: mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, 0); err_get_fast: @@ -303,20 +501,25 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw, mlx5_del_flow_rules(rule); - /* unref the term table */ - for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) { - if (attr->dests[i].termtbl) - mlx5_eswitch_termtbl_put(esw, attr->dests[i].termtbl); + if (attr->flags & MLX5_ESW_ATTR_FLAG_HAIRPIN) { + /* unref the term table */ + for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) { + if (attr->dests[i].termtbl) + mlx5_eswitch_termtbl_put(esw, attr->dests[i].termtbl); + } } atomic64_dec(&esw->offloads.num_flows); if (fwd_rule) { - mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, 1); + esw_vport_tbl_put(esw, attr); mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, 0); } else { - mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, - !!split); + if (split) + esw_vport_tbl_put(esw, attr); + else if (attr->chain || attr->prio) + mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, + 0); if (attr->dest_chain) mlx5_esw_chains_put_table(esw, attr->dest_chain, 1, 0); } @@ -578,14 +781,21 @@ void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule) mlx5_del_flow_rules(rule); } +static bool mlx5_eswitch_reg_c1_loopback_supported(struct mlx5_eswitch *esw) +{ + return MLX5_CAP_ESW_FLOWTABLE(esw->dev, fdb_to_vport_reg_c_id) & + MLX5_FDB_TO_VPORT_REG_C_1; +} + static int esw_set_passing_vport_metadata(struct mlx5_eswitch *esw, bool enable) { u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {}; u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {}; - u8 fdb_to_vport_reg_c_id; + u8 curr, wanted; int err; - if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) + if (!mlx5_eswitch_reg_c1_loopback_supported(esw) && + !mlx5_eswitch_vport_match_metadata_enabled(esw)) return 0; err = mlx5_eswitch_query_esw_vport_context(esw->dev, 0, false, @@ -593,22 +803,33 @@ static int esw_set_passing_vport_metadata(struct mlx5_eswitch *esw, bool enable) if (err) return err; - fdb_to_vport_reg_c_id = MLX5_GET(query_esw_vport_context_out, out, - esw_vport_context.fdb_to_vport_reg_c_id); + curr = MLX5_GET(query_esw_vport_context_out, out, + esw_vport_context.fdb_to_vport_reg_c_id); + wanted = MLX5_FDB_TO_VPORT_REG_C_0; + if (mlx5_eswitch_reg_c1_loopback_supported(esw)) + wanted |= MLX5_FDB_TO_VPORT_REG_C_1; if (enable) - fdb_to_vport_reg_c_id |= MLX5_FDB_TO_VPORT_REG_C_0; + curr |= wanted; else - fdb_to_vport_reg_c_id &= ~MLX5_FDB_TO_VPORT_REG_C_0; + curr &= ~wanted; MLX5_SET(modify_esw_vport_context_in, in, - esw_vport_context.fdb_to_vport_reg_c_id, fdb_to_vport_reg_c_id); + esw_vport_context.fdb_to_vport_reg_c_id, curr); MLX5_SET(modify_esw_vport_context_in, in, field_select.fdb_to_vport_reg_c_id, 1); - return mlx5_eswitch_modify_esw_vport_context(esw->dev, 0, false, - in, sizeof(in)); + err = mlx5_eswitch_modify_esw_vport_context(esw->dev, 0, false, in, + sizeof(in)); + if (!err) { + if (enable && (curr & MLX5_FDB_TO_VPORT_REG_C_1)) + esw->flags |= MLX5_ESWITCH_REG_C1_LOOPBACK_ENABLED; + else + esw->flags &= ~MLX5_ESWITCH_REG_C1_LOOPBACK_ENABLED; + } + + return err; } static void peer_miss_rules_setup(struct mlx5_eswitch *esw, @@ -621,7 +842,8 @@ static void peer_miss_rules_setup(struct mlx5_eswitch *esw, if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); - MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; } else { @@ -836,6 +1058,59 @@ out: return err; } +struct mlx5_flow_handle * +esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag) +{ + struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, }; + struct mlx5_flow_table *ft = esw->offloads.ft_offloads_restore; + struct mlx5_flow_context *flow_context; + struct mlx5_flow_handle *flow_rule; + struct mlx5_flow_destination dest; + struct mlx5_flow_spec *spec; + void *misc; + + if (!mlx5_eswitch_reg_c1_loopback_supported(esw)) + return ERR_PTR(-EOPNOTSUPP); + + spec = kzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + ESW_CHAIN_TAG_METADATA_MASK); + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, tag); + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + flow_act.modify_hdr = esw->offloads.restore_copy_hdr_id; + + flow_context = &spec->flow_context; + flow_context->flags |= FLOW_CONTEXT_HAS_TAG; + flow_context->flow_tag = tag; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = esw->offloads.ft_offloads; + + flow_rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); + kfree(spec); + + if (IS_ERR(flow_rule)) + esw_warn(esw->dev, + "Failed to create restore rule for tag: %d, err(%d)\n", + tag, (int)PTR_ERR(flow_rule)); + + return flow_rule; +} + +u32 +esw_get_max_restore_tag(struct mlx5_eswitch *esw) +{ + return ESW_CHAIN_TAG_METADATA_MASK; +} + #define MAX_PF_SQ 256 #define MAX_SQ_NVPORTS 32 @@ -851,8 +1126,9 @@ static void esw_set_flow_group_source_port(struct mlx5_eswitch *esw, match_criteria_enable, MLX5_MATCH_MISC_PARAMETERS_2); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, - misc_parameters_2.metadata_reg_c_0); + MLX5_SET(fte_match_param, match_criteria, + misc_parameters_2.metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); } else { MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, @@ -1057,6 +1333,7 @@ static int esw_create_offloads_table(struct mlx5_eswitch *esw, int nvports) } ft_attr.max_fte = nvports + MLX5_ESW_MISS_FLOWS; + ft_attr.prio = 1; ft_offloads = mlx5_create_flow_table(ns, &ft_attr); if (IS_ERR(ft_offloads)) { @@ -1134,7 +1411,8 @@ mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport, mlx5_eswitch_get_vport_metadata_for_match(esw, vport)); misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); - MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; } else { @@ -1160,6 +1438,148 @@ out: return flow_rule; } + +static int mlx5_eswitch_inline_mode_get(const struct mlx5_eswitch *esw, u8 *mode) +{ + u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2; + struct mlx5_core_dev *dev = esw->dev; + int vport; + + if (!MLX5_CAP_GEN(dev, vport_group_manager)) + return -EOPNOTSUPP; + + if (esw->mode == MLX5_ESWITCH_NONE) + return -EOPNOTSUPP; + + switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) { + case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: + mlx5_mode = MLX5_INLINE_MODE_NONE; + goto out; + case MLX5_CAP_INLINE_MODE_L2: + mlx5_mode = MLX5_INLINE_MODE_L2; + goto out; + case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT: + goto query_vports; + } + +query_vports: + mlx5_query_nic_vport_min_inline(dev, esw->first_host_vport, &prev_mlx5_mode); + mlx5_esw_for_each_host_func_vport(esw, vport, esw->esw_funcs.num_vfs) { + mlx5_query_nic_vport_min_inline(dev, vport, &mlx5_mode); + if (prev_mlx5_mode != mlx5_mode) + return -EINVAL; + prev_mlx5_mode = mlx5_mode; + } + +out: + *mode = mlx5_mode; + return 0; +} + +static void esw_destroy_restore_table(struct mlx5_eswitch *esw) +{ + struct mlx5_esw_offload *offloads = &esw->offloads; + + if (!mlx5_eswitch_reg_c1_loopback_supported(esw)) + return; + + mlx5_modify_header_dealloc(esw->dev, offloads->restore_copy_hdr_id); + mlx5_destroy_flow_group(offloads->restore_group); + mlx5_destroy_flow_table(offloads->ft_offloads_restore); +} + +static int esw_create_restore_table(struct mlx5_eswitch *esw) +{ + u8 modact[MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)] = {}; + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *ns; + struct mlx5_modify_hdr *mod_hdr; + void *match_criteria, *misc; + struct mlx5_flow_table *ft; + struct mlx5_flow_group *g; + u32 *flow_group_in; + int err = 0; + + if (!mlx5_eswitch_reg_c1_loopback_supported(esw)) + return 0; + + ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_OFFLOADS); + if (!ns) { + esw_warn(esw->dev, "Failed to get offloads flow namespace\n"); + return -EOPNOTSUPP; + } + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) { + err = -ENOMEM; + goto out_free; + } + + ft_attr.max_fte = 1 << ESW_CHAIN_TAG_METADATA_BITS; + ft = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + esw_warn(esw->dev, "Failed to create restore table, err %d\n", + err); + goto out_free; + } + + memset(flow_group_in, 0, inlen); + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, + match_criteria); + misc = MLX5_ADDR_OF(fte_match_param, match_criteria, + misc_parameters_2); + + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + ESW_CHAIN_TAG_METADATA_MASK); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, + ft_attr.max_fte - 1); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS_2); + g = mlx5_create_flow_group(ft, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create restore flow group, err: %d\n", + err); + goto err_group; + } + + MLX5_SET(copy_action_in, modact, action_type, MLX5_ACTION_TYPE_COPY); + MLX5_SET(copy_action_in, modact, src_field, + MLX5_ACTION_IN_FIELD_METADATA_REG_C_1); + MLX5_SET(copy_action_in, modact, dst_field, + MLX5_ACTION_IN_FIELD_METADATA_REG_B); + mod_hdr = mlx5_modify_header_alloc(esw->dev, + MLX5_FLOW_NAMESPACE_KERNEL, 1, + modact); + if (IS_ERR(mod_hdr)) { + esw_warn(dev, "Failed to create restore mod header, err: %d\n", + err); + err = PTR_ERR(mod_hdr); + goto err_mod_hdr; + } + + esw->offloads.ft_offloads_restore = ft; + esw->offloads.restore_group = g; + esw->offloads.restore_copy_hdr_id = mod_hdr; + + kvfree(flow_group_in); + + return 0; + +err_mod_hdr: + mlx5_destroy_flow_group(g); +err_group: + mlx5_destroy_flow_table(ft); +out_free: + kvfree(flow_group_in); + + return err; +} + static int esw_offloads_start(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) { @@ -1172,13 +1592,14 @@ static int esw_offloads_start(struct mlx5_eswitch *esw, return -EINVAL; } - mlx5_eswitch_disable(esw, false); - mlx5_eswitch_update_num_of_vfs(esw, esw->dev->priv.sriov.num_vfs); - err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_OFFLOADS); + mlx5_eswitch_disable_locked(esw, false); + err = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_OFFLOADS, + esw->dev->priv.sriov.num_vfs); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to offloads"); - err1 = mlx5_eswitch_enable(esw, MLX5_ESWITCH_LEGACY); + err1 = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY, + MLX5_ESWITCH_IGNORE_NUM_VFS); if (err1) { NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch back to legacy"); @@ -1233,187 +1654,66 @@ static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw, esw->offloads.rep_ops[rep_type]->unload(rep); } -static void __unload_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type) -{ - struct mlx5_eswitch_rep *rep; - - if (mlx5_ecpf_vport_exists(esw->dev)) { - rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_ECPF); - __esw_offloads_unload_rep(esw, rep, rep_type); - } - - if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { - rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF); - __esw_offloads_unload_rep(esw, rep, rep_type); - } - - rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); - __esw_offloads_unload_rep(esw, rep, rep_type); -} - -static void __unload_reps_vf_vport(struct mlx5_eswitch *esw, int nvports, - u8 rep_type) +static void __unload_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type) { struct mlx5_eswitch_rep *rep; int i; - mlx5_esw_for_each_vf_rep_reverse(esw, i, rep, nvports) + mlx5_esw_for_each_vf_rep_reverse(esw, i, rep, esw->esw_funcs.num_vfs) __esw_offloads_unload_rep(esw, rep, rep_type); -} - -static void esw_offloads_unload_vf_reps(struct mlx5_eswitch *esw, int nvports) -{ - u8 rep_type = NUM_REP_TYPES; - - while (rep_type-- > 0) - __unload_reps_vf_vport(esw, nvports, rep_type); -} - -static void __unload_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type) -{ - __unload_reps_vf_vport(esw, esw->esw_funcs.num_vfs, rep_type); - - /* Special vports must be the last to unload. */ - __unload_reps_special_vport(esw, rep_type); -} - -static void esw_offloads_unload_all_reps(struct mlx5_eswitch *esw) -{ - u8 rep_type = NUM_REP_TYPES; - - while (rep_type-- > 0) - __unload_reps_all_vport(esw, rep_type); -} - -static int __esw_offloads_load_rep(struct mlx5_eswitch *esw, - struct mlx5_eswitch_rep *rep, u8 rep_type) -{ - int err = 0; - - if (atomic_cmpxchg(&rep->rep_data[rep_type].state, - REP_REGISTERED, REP_LOADED) == REP_REGISTERED) { - err = esw->offloads.rep_ops[rep_type]->load(esw->dev, rep); - if (err) - atomic_set(&rep->rep_data[rep_type].state, - REP_REGISTERED); - } - - return err; -} - -static int __load_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type) -{ - struct mlx5_eswitch_rep *rep; - int err; - - rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); - err = __esw_offloads_load_rep(esw, rep, rep_type); - if (err) - return err; - - if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { - rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF); - err = __esw_offloads_load_rep(esw, rep, rep_type); - if (err) - goto err_pf; - } if (mlx5_ecpf_vport_exists(esw->dev)) { rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_ECPF); - err = __esw_offloads_load_rep(esw, rep, rep_type); - if (err) - goto err_ecpf; + __esw_offloads_unload_rep(esw, rep, rep_type); } - return 0; - -err_ecpf: if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF); __esw_offloads_unload_rep(esw, rep, rep_type); } -err_pf: rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); __esw_offloads_unload_rep(esw, rep, rep_type); - return err; } -static int __load_reps_vf_vport(struct mlx5_eswitch *esw, int nvports, - u8 rep_type) +int esw_offloads_load_rep(struct mlx5_eswitch *esw, u16 vport_num) { struct mlx5_eswitch_rep *rep; - int err, i; - - mlx5_esw_for_each_vf_rep(esw, i, rep, nvports) { - err = __esw_offloads_load_rep(esw, rep, rep_type); - if (err) - goto err_vf; - } - - return 0; - -err_vf: - __unload_reps_vf_vport(esw, --i, rep_type); - return err; -} - -static int __load_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type) -{ + int rep_type; int err; - /* Special vports must be loaded first, uplink rep creates mdev resource. */ - err = __load_reps_special_vport(esw, rep_type); - if (err) - return err; + if (esw->mode != MLX5_ESWITCH_OFFLOADS) + return 0; - err = __load_reps_vf_vport(esw, esw->esw_funcs.num_vfs, rep_type); - if (err) - goto err_vfs; + rep = mlx5_eswitch_get_rep(esw, vport_num); + for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) + if (atomic_cmpxchg(&rep->rep_data[rep_type].state, + REP_REGISTERED, REP_LOADED) == REP_REGISTERED) { + err = esw->offloads.rep_ops[rep_type]->load(esw->dev, rep); + if (err) + goto err_reps; + } return 0; -err_vfs: - __unload_reps_special_vport(esw, rep_type); - return err; -} - -static int esw_offloads_load_vf_reps(struct mlx5_eswitch *esw, int nvports) -{ - u8 rep_type = 0; - int err; - - for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) { - err = __load_reps_vf_vport(esw, nvports, rep_type); - if (err) - goto err_reps; - } - - return err; - err_reps: - while (rep_type-- > 0) - __unload_reps_vf_vport(esw, nvports, rep_type); + atomic_set(&rep->rep_data[rep_type].state, REP_REGISTERED); + for (--rep_type; rep_type >= 0; rep_type--) + __esw_offloads_unload_rep(esw, rep, rep_type); return err; } -static int esw_offloads_load_all_reps(struct mlx5_eswitch *esw) +void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num) { - u8 rep_type = 0; - int err; - - for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) { - err = __load_reps_all_vport(esw, rep_type); - if (err) - goto err_reps; - } + struct mlx5_eswitch_rep *rep; + int rep_type; - return err; + if (esw->mode != MLX5_ESWITCH_OFFLOADS) + return; -err_reps: - while (rep_type-- > 0) - __unload_reps_all_vport(esw, rep_type); - return err; + rep = mlx5_eswitch_get_rep(esw, vport_num); + for (rep_type = NUM_REP_TYPES - 1; rep_type >= 0; rep_type--) + __esw_offloads_unload_rep(esw, rep, rep_type); } #define ESW_OFFLOADS_DEVCOM_PAIR (0) @@ -1601,14 +1901,21 @@ static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { u8 action[MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)] = {}; - static const struct mlx5_flow_spec spec = {}; struct mlx5_flow_act flow_act = {}; int err = 0; + u32 key; + + key = mlx5_eswitch_get_vport_metadata_for_match(esw, vport->vport); + key >>= ESW_SOURCE_PORT_METADATA_OFFSET; MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET); - MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_C_0); - MLX5_SET(set_action_in, action, data, - mlx5_eswitch_get_vport_metadata_for_match(esw, vport->vport)); + MLX5_SET(set_action_in, action, field, + MLX5_ACTION_IN_FIELD_METADATA_REG_C_0); + MLX5_SET(set_action_in, action, data, key); + MLX5_SET(set_action_in, action, offset, + ESW_SOURCE_PORT_METADATA_OFFSET); + MLX5_SET(set_action_in, action, length, + ESW_SOURCE_PORT_METADATA_BITS); vport->ingress.offloads.modify_metadata = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS, @@ -1625,7 +1932,7 @@ static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, flow_act.modify_hdr = vport->ingress.offloads.modify_metadata; vport->ingress.offloads.modify_metadata_rule = mlx5_add_flow_rules(vport->ingress.acl, - &spec, &flow_act, NULL, 0); + NULL, &flow_act, NULL, 0); if (IS_ERR(vport->ingress.offloads.modify_metadata_rule)) { err = PTR_ERR(vport->ingress.offloads.modify_metadata_rule); esw_warn(esw->dev, @@ -1837,6 +2144,18 @@ esw_check_vport_match_metadata_supported(const struct mlx5_eswitch *esw) return true; } +static bool +esw_check_vport_match_metadata_mandatory(const struct mlx5_eswitch *esw) +{ + return mlx5_core_mp_enabled(esw->dev); +} + +static bool esw_use_vport_metadata(const struct mlx5_eswitch *esw) +{ + return esw_check_vport_match_metadata_mandatory(esw) && + esw_check_vport_match_metadata_supported(esw); +} + int esw_vport_create_offloads_acl_tables(struct mlx5_eswitch *esw, struct mlx5_vport *vport) @@ -1875,7 +2194,7 @@ static int esw_create_uplink_offloads_acl_tables(struct mlx5_eswitch *esw) struct mlx5_vport *vport; int err; - if (esw_check_vport_match_metadata_supported(esw)) + if (esw_use_vport_metadata(esw)) esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA; vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK); @@ -1911,27 +2230,34 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw) if (err) return err; - err = esw_create_offloads_fdb_tables(esw, total_vports); + err = esw_create_offloads_table(esw, total_vports); if (err) - goto create_fdb_err; + goto create_offloads_err; - err = esw_create_offloads_table(esw, total_vports); + err = esw_create_restore_table(esw); if (err) - goto create_ft_err; + goto create_restore_err; + + err = esw_create_offloads_fdb_tables(esw, total_vports); + if (err) + goto create_fdb_err; err = esw_create_vport_rx_group(esw, total_vports); if (err) goto create_fg_err; + mutex_init(&esw->fdb_table.offloads.vports.lock); + hash_init(esw->fdb_table.offloads.vports.table); + return 0; create_fg_err: - esw_destroy_offloads_table(esw); - -create_ft_err: esw_destroy_offloads_fdb_tables(esw); - create_fdb_err: + esw_destroy_restore_table(esw); +create_restore_err: + esw_destroy_offloads_table(esw); +create_offloads_err: esw_destroy_uplink_offloads_acl_tables(esw); return err; @@ -1939,9 +2265,11 @@ create_fdb_err: static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw) { + mutex_destroy(&esw->fdb_table.offloads.vports.lock); esw_destroy_vport_rx_group(esw); - esw_destroy_offloads_table(esw); esw_destroy_offloads_fdb_tables(esw); + esw_destroy_restore_table(esw); + esw_destroy_offloads_table(esw); esw_destroy_uplink_offloads_acl_tables(esw); } @@ -1961,11 +2289,12 @@ esw_vfs_changed_event_handler(struct mlx5_eswitch *esw, const u32 *out) /* Number of VFs can only change from "0 to x" or "x to 0". */ if (esw->esw_funcs.num_vfs > 0) { - esw_offloads_unload_vf_reps(esw, esw->esw_funcs.num_vfs); + mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs); } else { int err; - err = esw_offloads_load_vf_reps(esw, new_num_vfs); + err = mlx5_eswitch_load_vf_vports(esw, new_num_vfs, + MLX5_VPORT_UC_ADDR_CHANGE); if (err) return; } @@ -2023,40 +2352,43 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) else esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE; + mutex_init(&esw->offloads.termtbl_mutex); mlx5_rdma_enable_roce(esw->dev); - err = esw_offloads_steering_init(esw); - if (err) - goto err_steering_init; err = esw_set_passing_vport_metadata(esw, true); if (err) goto err_vport_metadata; + err = esw_offloads_steering_init(esw); + if (err) + goto err_steering_init; + /* Representor will control the vport link state */ mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) vport->info.link_state = MLX5_VPORT_ADMIN_STATE_DOWN; - err = mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_VPORT_UC_ADDR_CHANGE); + /* Uplink vport rep must load first. */ + err = esw_offloads_load_rep(esw, MLX5_VPORT_UPLINK); if (err) - goto err_vports; + goto err_uplink; - err = esw_offloads_load_all_reps(esw); + err = mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_VPORT_UC_ADDR_CHANGE); if (err) - goto err_reps; + goto err_vports; esw_offloads_devcom_init(esw); - mutex_init(&esw->offloads.termtbl_mutex); return 0; -err_reps: - mlx5_eswitch_disable_pf_vf_vports(esw); err_vports: + esw_offloads_unload_rep(esw, MLX5_VPORT_UPLINK); +err_uplink: esw_set_passing_vport_metadata(esw, false); -err_vport_metadata: - esw_offloads_steering_cleanup(esw); err_steering_init: + esw_offloads_steering_cleanup(esw); +err_vport_metadata: mlx5_rdma_disable_roce(esw->dev); + mutex_destroy(&esw->offloads.termtbl_mutex); return err; } @@ -2065,11 +2397,13 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw, { int err, err1; - mlx5_eswitch_disable(esw, false); - err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_LEGACY); + mlx5_eswitch_disable_locked(esw, false); + err = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY, + MLX5_ESWITCH_IGNORE_NUM_VFS); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to legacy"); - err1 = mlx5_eswitch_enable(esw, MLX5_ESWITCH_OFFLOADS); + err1 = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_OFFLOADS, + MLX5_ESWITCH_IGNORE_NUM_VFS); if (err1) { NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch back to offloads"); @@ -2082,11 +2416,12 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw, void esw_offloads_disable(struct mlx5_eswitch *esw) { esw_offloads_devcom_cleanup(esw); - esw_offloads_unload_all_reps(esw); mlx5_eswitch_disable_pf_vf_vports(esw); + esw_offloads_unload_rep(esw, MLX5_VPORT_UPLINK); esw_set_passing_vport_metadata(esw, false); esw_offloads_steering_cleanup(esw); mlx5_rdma_disable_roce(esw->dev); + mutex_destroy(&esw->offloads.termtbl_mutex); esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE; } @@ -2166,60 +2501,82 @@ static int esw_inline_mode_to_devlink(u8 mlx5_mode, u8 *mode) return 0; } -static int mlx5_devlink_eswitch_check(struct devlink *devlink) +static int mlx5_eswitch_check(const struct mlx5_core_dev *dev) { - struct mlx5_core_dev *dev = devlink_priv(devlink); - if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) return -EOPNOTSUPP; if(!MLX5_ESWITCH_MANAGER(dev)) return -EPERM; - if (dev->priv.eswitch->mode == MLX5_ESWITCH_NONE && - !mlx5_core_is_ecpf_esw_manager(dev)) - return -EOPNOTSUPP; - return 0; } +static int eswitch_devlink_esw_mode_check(const struct mlx5_eswitch *esw) +{ + /* devlink commands in NONE eswitch mode are currently supported only + * on ECPF. + */ + return (esw->mode == MLX5_ESWITCH_NONE && + !mlx5_core_is_ecpf_esw_manager(esw->dev)) ? -EOPNOTSUPP : 0; +} + int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_eswitch *esw = dev->priv.eswitch; u16 cur_mlx5_mode, mlx5_mode = 0; int err; - err = mlx5_devlink_eswitch_check(devlink); + err = mlx5_eswitch_check(dev); if (err) return err; - cur_mlx5_mode = dev->priv.eswitch->mode; - if (esw_mode_from_devlink(mode, &mlx5_mode)) return -EINVAL; + mutex_lock(&esw->mode_lock); + err = eswitch_devlink_esw_mode_check(esw); + if (err) + goto unlock; + + cur_mlx5_mode = esw->mode; + if (cur_mlx5_mode == mlx5_mode) - return 0; + goto unlock; if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) - return esw_offloads_start(dev->priv.eswitch, extack); + err = esw_offloads_start(esw, extack); else if (mode == DEVLINK_ESWITCH_MODE_LEGACY) - return esw_offloads_stop(dev->priv.eswitch, extack); + err = esw_offloads_stop(esw, extack); else - return -EINVAL; + err = -EINVAL; + +unlock: + mutex_unlock(&esw->mode_lock); + return err; } int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) { struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_eswitch *esw = dev->priv.eswitch; int err; - err = mlx5_devlink_eswitch_check(devlink); + err = mlx5_eswitch_check(dev); if (err) return err; - return esw_mode_to_devlink(dev->priv.eswitch->mode, mode); + mutex_lock(&esw->mode_lock); + err = eswitch_devlink_esw_mode_check(dev->priv.eswitch); + if (err) + goto unlock; + + err = esw_mode_to_devlink(esw->mode, mode); +unlock: + mutex_unlock(&esw->mode_lock); + return err; } int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, @@ -2230,18 +2587,24 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, int err, vport, num_vport; u8 mlx5_mode; - err = mlx5_devlink_eswitch_check(devlink); + err = mlx5_eswitch_check(dev); if (err) return err; + mutex_lock(&esw->mode_lock); + err = eswitch_devlink_esw_mode_check(esw); + if (err) + goto out; + switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) { case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: if (mode == DEVLINK_ESWITCH_INLINE_MODE_NONE) - return 0; + goto out; /* fall through */ case MLX5_CAP_INLINE_MODE_L2: NL_SET_ERR_MSG_MOD(extack, "Inline mode can't be set"); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto out; case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT: break; } @@ -2249,7 +2612,8 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, if (atomic64_read(&esw->offloads.num_flows) > 0) { NL_SET_ERR_MSG_MOD(extack, "Can't set inline mode when flows are configured"); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto out; } err = esw_inline_mode_from_devlink(mode, &mlx5_mode); @@ -2266,6 +2630,7 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, } esw->offloads.inline_mode = mlx5_mode; + mutex_unlock(&esw->mode_lock); return 0; revert_inline_mode: @@ -2275,6 +2640,7 @@ revert_inline_mode: vport, esw->offloads.inline_mode); out: + mutex_unlock(&esw->mode_lock); return err; } @@ -2284,48 +2650,19 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode) struct mlx5_eswitch *esw = dev->priv.eswitch; int err; - err = mlx5_devlink_eswitch_check(devlink); + err = mlx5_eswitch_check(dev); if (err) return err; - return esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode); -} - -int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, u8 *mode) -{ - u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2; - struct mlx5_core_dev *dev = esw->dev; - int vport; - - if (!MLX5_CAP_GEN(dev, vport_group_manager)) - return -EOPNOTSUPP; - - if (esw->mode == MLX5_ESWITCH_NONE) - return -EOPNOTSUPP; - - switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) { - case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: - mlx5_mode = MLX5_INLINE_MODE_NONE; - goto out; - case MLX5_CAP_INLINE_MODE_L2: - mlx5_mode = MLX5_INLINE_MODE_L2; - goto out; - case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT: - goto query_vports; - } - -query_vports: - mlx5_query_nic_vport_min_inline(dev, esw->first_host_vport, &prev_mlx5_mode); - mlx5_esw_for_each_host_func_vport(esw, vport, esw->esw_funcs.num_vfs) { - mlx5_query_nic_vport_min_inline(dev, vport, &mlx5_mode); - if (prev_mlx5_mode != mlx5_mode) - return -EINVAL; - prev_mlx5_mode = mlx5_mode; - } + mutex_lock(&esw->mode_lock); + err = eswitch_devlink_esw_mode_check(esw); + if (err) + goto unlock; -out: - *mode = mlx5_mode; - return 0; + err = esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode); +unlock: + mutex_unlock(&esw->mode_lock); + return err; } int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, @@ -2336,30 +2673,40 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, struct mlx5_eswitch *esw = dev->priv.eswitch; int err; - err = mlx5_devlink_eswitch_check(devlink); + err = mlx5_eswitch_check(dev); if (err) return err; + mutex_lock(&esw->mode_lock); + err = eswitch_devlink_esw_mode_check(esw); + if (err) + goto unlock; + if (encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE && (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) || - !MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))) - return -EOPNOTSUPP; + !MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))) { + err = -EOPNOTSUPP; + goto unlock; + } - if (encap && encap != DEVLINK_ESWITCH_ENCAP_MODE_BASIC) - return -EOPNOTSUPP; + if (encap && encap != DEVLINK_ESWITCH_ENCAP_MODE_BASIC) { + err = -EOPNOTSUPP; + goto unlock; + } if (esw->mode == MLX5_ESWITCH_LEGACY) { esw->offloads.encap = encap; - return 0; + goto unlock; } if (esw->offloads.encap == encap) - return 0; + goto unlock; if (atomic64_read(&esw->offloads.num_flows) > 0) { NL_SET_ERR_MSG_MOD(extack, "Can't set encapsulation when flows are configured"); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto unlock; } esw_destroy_offloads_fdb_tables(esw); @@ -2375,6 +2722,8 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, (void)esw_create_offloads_fdb_tables(esw, esw->nvports); } +unlock: + mutex_unlock(&esw->mode_lock); return err; } @@ -2385,14 +2734,36 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, struct mlx5_eswitch *esw = dev->priv.eswitch; int err; - err = mlx5_devlink_eswitch_check(devlink); + err = mlx5_eswitch_check(dev); if (err) return err; + mutex_lock(&esw->mode_lock); + err = eswitch_devlink_esw_mode_check(esw); + if (err) + goto unlock; + *encap = esw->offloads.encap; +unlock: + mutex_unlock(&esw->mode_lock); return 0; } +static bool +mlx5_eswitch_vport_has_rep(const struct mlx5_eswitch *esw, u16 vport_num) +{ + /* Currently, only ECPF based device has representor for host PF. */ + if (vport_num == MLX5_VPORT_PF && + !mlx5_core_is_ecpf_esw_manager(esw->dev)) + return false; + + if (vport_num == MLX5_VPORT_ECPF && + !mlx5_ecpf_vport_exists(esw->dev)) + return false; + + return true; +} + void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw, const struct mlx5_eswitch_rep_ops *ops, u8 rep_type) @@ -2403,8 +2774,10 @@ void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw, esw->offloads.rep_ops[rep_type] = ops; mlx5_esw_for_all_reps(esw, i, rep) { - rep_data = &rep->rep_data[rep_type]; - atomic_set(&rep_data->state, REP_REGISTERED); + if (likely(mlx5_eswitch_vport_has_rep(esw, i))) { + rep_data = &rep->rep_data[rep_type]; + atomic_set(&rep_data->state, REP_REGISTERED); + } } } EXPORT_SYMBOL(mlx5_eswitch_register_vport_reps); @@ -2464,15 +2837,53 @@ bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num) vport_num <= esw->dev->priv.sriov.max_vfs; } +bool mlx5_eswitch_reg_c1_loopback_enabled(const struct mlx5_eswitch *esw) +{ + return !!(esw->flags & MLX5_ESWITCH_REG_C1_LOOPBACK_ENABLED); +} +EXPORT_SYMBOL(mlx5_eswitch_reg_c1_loopback_enabled); + bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw) { return !!(esw->flags & MLX5_ESWITCH_VPORT_MATCH_METADATA); } EXPORT_SYMBOL(mlx5_eswitch_vport_match_metadata_enabled); -u32 mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw, +u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw, u16 vport_num) { - return ((MLX5_CAP_GEN(esw->dev, vhca_id) & 0xffff) << 16) | vport_num; + u32 vport_num_mask = GENMASK(ESW_VPORT_BITS - 1, 0); + u32 vhca_id_mask = GENMASK(ESW_VHCA_ID_BITS - 1, 0); + u32 vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id); + u32 val; + + /* Make sure the vhca_id fits the ESW_VHCA_ID_BITS */ + WARN_ON_ONCE(vhca_id >= BIT(ESW_VHCA_ID_BITS)); + + /* Trim vhca_id to ESW_VHCA_ID_BITS */ + vhca_id &= vhca_id_mask; + + /* Make sure pf and ecpf map to end of ESW_VPORT_BITS range so they + * don't overlap with VF numbers, and themselves, after trimming. + */ + WARN_ON_ONCE((MLX5_VPORT_UPLINK & vport_num_mask) < + vport_num_mask - 1); + WARN_ON_ONCE((MLX5_VPORT_ECPF & vport_num_mask) < + vport_num_mask - 1); + WARN_ON_ONCE((MLX5_VPORT_UPLINK & vport_num_mask) == + (MLX5_VPORT_ECPF & vport_num_mask)); + + /* Make sure that the VF vport_num fits ESW_VPORT_BITS and don't + * overlap with pf and ecpf. + */ + if (vport_num != MLX5_VPORT_UPLINK && + vport_num != MLX5_VPORT_ECPF) + WARN_ON_ONCE(vport_num >= vport_num_mask - 1); + + /* We can now trim vport_num to ESW_VPORT_BITS */ + vport_num &= vport_num_mask; + + val = (vhca_id << ESW_VPORT_BITS) | vport_num; + return val << (32 - ESW_SOURCE_PORT_METADATA_BITS); } EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c index dc08ed9339ab..17a0d2bc102b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c @@ -3,6 +3,7 @@ #include <linux/mlx5/fs.h> #include "eswitch.h" +#include "fs_core.h" struct mlx5_termtbl_handle { struct hlist_node termtbl_hlist; @@ -28,6 +29,10 @@ mlx5_eswitch_termtbl_hash(struct mlx5_flow_act *flow_act, sizeof(dest->vport.num), hash); hash = jhash((const void *)&dest->vport.vhca_id, sizeof(dest->vport.num), hash); + if (dest->vport.pkt_reformat) + hash = jhash(dest->vport.pkt_reformat, + sizeof(*dest->vport.pkt_reformat), + hash); return hash; } @@ -37,11 +42,19 @@ mlx5_eswitch_termtbl_cmp(struct mlx5_flow_act *flow_act1, struct mlx5_flow_act *flow_act2, struct mlx5_flow_destination *dest2) { - return flow_act1->action != flow_act2->action || - dest1->vport.num != dest2->vport.num || - dest1->vport.vhca_id != dest2->vport.vhca_id || - memcmp(&flow_act1->vlan, &flow_act2->vlan, - sizeof(flow_act1->vlan)); + int ret; + + ret = flow_act1->action != flow_act2->action || + dest1->vport.num != dest2->vport.num || + dest1->vport.vhca_id != dest2->vport.vhca_id || + memcmp(&flow_act1->vlan, &flow_act2->vlan, + sizeof(flow_act1->vlan)); + if (ret) + return ret; + + return dest1->vport.pkt_reformat && dest2->vport.pkt_reformat ? + memcmp(dest1->vport.pkt_reformat, dest2->vport.pkt_reformat, + sizeof(*dest1->vport.pkt_reformat)) : 0; } static int @@ -49,7 +62,6 @@ mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev, struct mlx5_termtbl_handle *tt, struct mlx5_flow_act *flow_act) { - static const struct mlx5_flow_spec spec = {}; struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_flow_namespace *root_ns; int err; @@ -63,7 +75,8 @@ mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev, /* As this is the terminating action then the termination table is the * same prio as the slow path */ - ft_attr.flags = MLX5_FLOW_TABLE_TERMINATION; + ft_attr.flags = MLX5_FLOW_TABLE_TERMINATION | + MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; ft_attr.prio = FDB_SLOW_PATH; ft_attr.max_fte = 1; ft_attr.autogroup.max_num_groups = 1; @@ -73,9 +86,8 @@ mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev, return -EOPNOTSUPP; } - tt->rule = mlx5_add_flow_rules(tt->termtbl, &spec, flow_act, + tt->rule = mlx5_add_flow_rules(tt->termtbl, NULL, flow_act, &tt->dest, 1); - if (IS_ERR(tt->rule)) { esw_warn(dev, "Failed to create termination table rule\n"); goto add_flow_err; @@ -93,7 +105,8 @@ add_flow_err: static struct mlx5_termtbl_handle * mlx5_eswitch_termtbl_get_create(struct mlx5_eswitch *esw, struct mlx5_flow_act *flow_act, - struct mlx5_flow_destination *dest) + struct mlx5_flow_destination *dest, + struct mlx5_esw_flow_attr *attr) { struct mlx5_termtbl_handle *tt; bool found = false; @@ -101,7 +114,6 @@ mlx5_eswitch_termtbl_get_create(struct mlx5_eswitch *esw, int err; mutex_lock(&esw->offloads.termtbl_mutex); - hash_key = mlx5_eswitch_termtbl_hash(flow_act, dest); hash_for_each_possible(esw->offloads.termtbl_tbl, tt, termtbl_hlist, hash_key) { @@ -123,6 +135,7 @@ mlx5_eswitch_termtbl_get_create(struct mlx5_eswitch *esw, tt->dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; tt->dest.vport.num = dest->vport.num; tt->dest.vport.vhca_id = dest->vport.vhca_id; + tt->dest.vport.flags = dest->vport.flags; memcpy(&tt->flow_act, flow_act, sizeof(*flow_act)); err = mlx5_eswitch_termtbl_create(esw->dev, tt, flow_act); @@ -157,31 +170,50 @@ mlx5_eswitch_termtbl_put(struct mlx5_eswitch *esw, } } +static bool mlx5_eswitch_termtbl_is_encap_reformat(struct mlx5_pkt_reformat *rt) +{ + switch (rt->reformat_type) { + case MLX5_REFORMAT_TYPE_L2_TO_VXLAN: + case MLX5_REFORMAT_TYPE_L2_TO_NVGRE: + case MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL: + case MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL: + return true; + default: + return false; + } +} + static void mlx5_eswitch_termtbl_actions_move(struct mlx5_flow_act *src, struct mlx5_flow_act *dst) { - if (!(src->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH)) - return; - - src->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; - dst->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; - memcpy(&dst->vlan[0], &src->vlan[0], sizeof(src->vlan[0])); - memset(&src->vlan[0], 0, sizeof(src->vlan[0])); - - if (!(src->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2)) - return; + if (src->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) { + src->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; + dst->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; + memcpy(&dst->vlan[0], &src->vlan[0], sizeof(src->vlan[0])); + memset(&src->vlan[0], 0, sizeof(src->vlan[0])); + + if (src->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2) { + src->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2; + dst->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2; + memcpy(&dst->vlan[1], &src->vlan[1], sizeof(src->vlan[1])); + memset(&src->vlan[1], 0, sizeof(src->vlan[1])); + } + } - src->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2; - dst->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2; - memcpy(&dst->vlan[1], &src->vlan[1], sizeof(src->vlan[1])); - memset(&src->vlan[1], 0, sizeof(src->vlan[1])); + if (src->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT && + mlx5_eswitch_termtbl_is_encap_reformat(src->pkt_reformat)) { + src->action &= ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + dst->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + dst->pkt_reformat = src->pkt_reformat; + src->pkt_reformat = NULL; + } } static bool mlx5_eswitch_offload_is_uplink_port(const struct mlx5_eswitch *esw, const struct mlx5_flow_spec *spec) { - u32 port_mask, port_value; + u16 port_mask, port_value; if (MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source)) return spec->flow_context.flow_source == @@ -191,20 +223,32 @@ static bool mlx5_eswitch_offload_is_uplink_port(const struct mlx5_eswitch *esw, misc_parameters.source_port); port_value = MLX5_GET(fte_match_param, spec->match_value, misc_parameters.source_port); - return (port_mask & port_value & 0xffff) == MLX5_VPORT_UPLINK; + return (port_mask & port_value) == MLX5_VPORT_UPLINK; } bool mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw, + struct mlx5_esw_flow_attr *attr, struct mlx5_flow_act *flow_act, struct mlx5_flow_spec *spec) { - if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, termination_table)) + int i; + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, termination_table) || + attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH || + !mlx5_eswitch_offload_is_uplink_port(esw, spec)) return false; /* push vlan on RX */ - return (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) && - mlx5_eswitch_offload_is_uplink_port(esw, spec); + if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) + return true; + + /* hairpin */ + for (i = attr->split_count; i < attr->out_count; i++) + if (attr->dests[i].rep->vport == MLX5_VPORT_UPLINK) + return true; + + return false; } struct mlx5_flow_handle * @@ -234,7 +278,7 @@ mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw, /* get the terminating table for the action list */ tt = mlx5_eswitch_termtbl_get_create(esw, &term_tbl_act, - &dest[i]); + &dest[i], attr); if (IS_ERR(tt)) { esw_warn(esw->dev, "Failed to create termination table\n"); goto revert_changes; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c index 4c61d25d2e88..b794888fa3ba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c @@ -57,7 +57,7 @@ struct mlx5_fpga_ipsec_cmd_context { struct completion complete; struct mlx5_fpga_device *dev; struct list_head list; /* Item in pending_cmds */ - u8 command[0]; + u8 command[]; }; struct mlx5_fpga_esp_xfrm; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 9dc24241dc91..62ce2b9417ab 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -110,9 +110,9 @@ #define ANCHOR_NUM_PRIOS 1 #define ANCHOR_MIN_LEVEL (BY_PASS_MIN_LEVEL + 1) -#define OFFLOADS_MAX_FT 1 -#define OFFLOADS_NUM_PRIOS 1 -#define OFFLOADS_MIN_LEVEL (ANCHOR_MIN_LEVEL + 1) +#define OFFLOADS_MAX_FT 2 +#define OFFLOADS_NUM_PRIOS 2 +#define OFFLOADS_MIN_LEVEL (ANCHOR_MIN_LEVEL + OFFLOADS_NUM_PRIOS) #define LAG_PRIO_NUM_LEVELS 1 #define LAG_NUM_PRIOS 1 @@ -145,7 +145,7 @@ static struct init_tree_node { ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, ADD_MULTIPLE_PRIO(LAG_NUM_PRIOS, LAG_PRIO_NUM_LEVELS))), - ADD_PRIO(0, OFFLOADS_MIN_LEVEL, 0, {}, + ADD_PRIO(0, OFFLOADS_MIN_LEVEL, 0, FS_CHAINING_CAPS, ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, ADD_MULTIPLE_PRIO(OFFLOADS_NUM_PRIOS, OFFLOADS_MAX_FT))), @@ -1322,7 +1322,7 @@ add_rule_fte(struct fs_fte *fte, fte->node.active = true; fte->status |= FS_FTE_STATUS_EXISTING; - atomic_inc(&fte->node.version); + atomic_inc(&fg->node.version); out: return handle; @@ -1577,28 +1577,19 @@ struct match_list { struct mlx5_flow_group *g; }; -struct match_list_head { - struct list_head list; - struct match_list first; -}; - -static void free_match_list(struct match_list_head *head, bool ft_locked) +static void free_match_list(struct match_list *head, bool ft_locked) { - if (!list_empty(&head->list)) { - struct match_list *iter, *match_tmp; + struct match_list *iter, *match_tmp; - list_del(&head->first.list); - tree_put_node(&head->first.g->node, ft_locked); - list_for_each_entry_safe(iter, match_tmp, &head->list, - list) { - tree_put_node(&iter->g->node, ft_locked); - list_del(&iter->list); - kfree(iter); - } + list_for_each_entry_safe(iter, match_tmp, &head->list, + list) { + tree_put_node(&iter->g->node, ft_locked); + list_del(&iter->list); + kfree(iter); } } -static int build_match_list(struct match_list_head *match_head, +static int build_match_list(struct match_list *match_head, struct mlx5_flow_table *ft, const struct mlx5_flow_spec *spec, bool ft_locked) @@ -1615,14 +1606,8 @@ static int build_match_list(struct match_list_head *match_head, rhl_for_each_entry_rcu(g, tmp, list, hash) { struct match_list *curr_match; - if (likely(list_empty(&match_head->list))) { - if (!tree_get_node(&g->node)) - continue; - match_head->first.g = g; - list_add_tail(&match_head->first.list, - &match_head->list); + if (unlikely(!tree_get_node(&g->node))) continue; - } curr_match = kmalloc(sizeof(*curr_match), GFP_ATOMIC); if (!curr_match) { @@ -1630,10 +1615,6 @@ static int build_match_list(struct match_list_head *match_head, err = -ENOMEM; goto out; } - if (!tree_get_node(&g->node)) { - kfree(curr_match); - continue; - } curr_match->g = g; list_add_tail(&curr_match->list, &match_head->list); } @@ -1699,7 +1680,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, struct match_list *iter; bool take_write = false; struct fs_fte *fte; - u64 version; + u64 version = 0; int err; fte = alloc_fte(ft, spec, flow_act); @@ -1707,10 +1688,12 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, return ERR_PTR(-ENOMEM); search_again_locked: - version = matched_fgs_get_version(match_head); if (flow_act->flags & FLOW_ACT_NO_APPEND) goto skip_search; - /* Try to find a fg that already contains a matching fte */ + version = matched_fgs_get_version(match_head); + /* Try to find an fte with identical match value and attempt update its + * action. + */ list_for_each_entry(iter, match_head, list) { struct fs_fte *fte_tmp; @@ -1738,10 +1721,12 @@ skip_search: goto out; } - /* Check the fgs version, for case the new FTE with the - * same values was added while the fgs weren't locked + /* Check the fgs version. If version have changed it could be that an + * FTE with the same match value was added while the fgs weren't + * locked. */ - if (version != matched_fgs_get_version(match_head)) { + if (!(flow_act->flags & FLOW_ACT_NO_APPEND) && + version != matched_fgs_get_version(match_head)) { take_write = true; goto search_again_locked; } @@ -1785,9 +1770,9 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, { struct mlx5_flow_steering *steering = get_steering(&ft->node); - struct mlx5_flow_group *g; struct mlx5_flow_handle *rule; - struct match_list_head match_head; + struct match_list match_head; + struct mlx5_flow_group *g; bool take_write = false; struct fs_fte *fte; int version; @@ -1892,12 +1877,16 @@ mlx5_add_flow_rules(struct mlx5_flow_table *ft, int num_dest) { struct mlx5_flow_root_namespace *root = find_root(&ft->node); + static const struct mlx5_flow_spec zero_spec = {}; struct mlx5_flow_destination gen_dest = {}; struct mlx5_flow_table *next_ft = NULL; struct mlx5_flow_handle *handle = NULL; u32 sw_action = flow_act->action; struct fs_prio *prio; + if (!spec) + spec = &zero_spec; + fs_get_obj(prio, ft->node.parent); if (flow_act->action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { if (!fwd_next_prio_supported(ft)) @@ -2700,6 +2689,17 @@ static int init_fdb_root_ns(struct mlx5_flow_steering *steering) goto out_err; } + /* We put this priority last, knowing that nothing will get here + * unless explicitly forwarded to. This is possible because the + * slow path tables have catch all rules and nothing gets passed + * those tables. + */ + maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_PER_VPORT, 1); + if (IS_ERR(maj_prio)) { + err = PTR_ERR(maj_prio); + goto out_err; + } + set_prio_attrs(steering->fdb_root_ns); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index ab69effb056d..f43caefd07a1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -470,7 +470,7 @@ struct mlx5_fc_bulk { u32 base_id; int bulk_len; unsigned long *bitmask; - struct mlx5_fc fcs[0]; + struct mlx5_fc fcs[]; }; static void mlx5_fc_init(struct mlx5_fc *counter, struct mlx5_fc_bulk *bulk, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index 909a7f284614..90e3d0233101 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -613,6 +613,44 @@ static void mlx5_fsm_release(struct mlxfw_dev *mlxfw_dev, u32 fwhandle) fwhandle, 0); } +#define MLX5_FSM_REACTIVATE_TOUT 5000 /* msecs */ +static int mlx5_fsm_reactivate(struct mlxfw_dev *mlxfw_dev, u8 *status) +{ + unsigned long exp_time = jiffies + msecs_to_jiffies(MLX5_FSM_REACTIVATE_TOUT); + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + u32 out[MLX5_ST_SZ_DW(mirc_reg)]; + u32 in[MLX5_ST_SZ_DW(mirc_reg)]; + int err; + + if (!MLX5_CAP_MCAM_REG2(dev, mirc)) + return -EOPNOTSUPP; + + memset(in, 0, sizeof(in)); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MIRC, 0, 1); + if (err) + return err; + + do { + memset(out, 0, sizeof(out)); + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MIRC, 0, 0); + if (err) + return err; + + *status = MLX5_GET(mirc_reg, out, status_code); + if (*status != MLXFW_FSM_REACTIVATE_STATUS_BUSY) + return 0; + + msleep(20); + } while (time_before(jiffies, exp_time)); + + return 0; +} + static const struct mlxfw_dev_ops mlx5_mlxfw_dev_ops = { .component_query = mlx5_component_query, .fsm_lock = mlx5_fsm_lock, @@ -620,6 +658,7 @@ static const struct mlxfw_dev_ops mlx5_mlxfw_dev_ops = { .fsm_block_download = mlx5_fsm_block_download, .fsm_component_verify = mlx5_fsm_component_verify, .fsm_activate = mlx5_fsm_activate, + .fsm_reactivate = mlx5_fsm_reactivate, .fsm_query_state = mlx5_fsm_query_state, .fsm_cancel = mlx5_fsm_cancel, .fsm_release = mlx5_fsm_release @@ -634,6 +673,7 @@ int mlx5_firmware_flash(struct mlx5_core_dev *dev, .ops = &mlx5_mlxfw_dev_ops, .psid = dev->board_id, .psid_size = strlen(dev->board_id), + .devlink = priv_to_devlink(dev), }, .mlx5_core_dev = dev }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index d9f4e8c59c1f..fa1665caac46 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -627,7 +627,7 @@ static void mlx5_fw_reporters_create(struct mlx5_core_dev *dev) health->fw_reporter = devlink_health_reporter_create(devlink, &mlx5_fw_reporter_ops, - 0, false, dev); + 0, dev); if (IS_ERR(health->fw_reporter)) mlx5_core_warn(dev, "Failed to create fw reporter, err = %ld\n", PTR_ERR(health->fw_reporter)); @@ -636,7 +636,7 @@ static void mlx5_fw_reporters_create(struct mlx5_core_dev *dev) devlink_health_reporter_create(devlink, &mlx5_fw_fatal_reporter_ops, MLX5_REPORTER_FW_GRACEFUL_PERIOD, - true, dev); + dev); if (IS_ERR(health->fw_fatal_reporter)) mlx5_core_warn(dev, "Failed to create fw fatal reporter, err = %ld\n", PTR_ERR(health->fw_fatal_reporter)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c index 90cb50fe17fd..1eef66ee849e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c @@ -235,6 +235,9 @@ static int mlx5i_get_link_ksettings(struct net_device *netdev, } const struct ethtool_ops mlx5i_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_MAX_FRAMES | + ETHTOOL_COALESCE_USE_ADAPTIVE, .get_drvinfo = mlx5i_get_drvinfo, .get_strings = mlx5i_get_strings, .get_sset_count = mlx5i_get_sset_count, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 56078b23f1a0..673aaa815f57 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -483,7 +483,7 @@ static int mlx5i_change_mtu(struct net_device *netdev, int new_mtu) new_channels.params = *params; new_channels.params.sw_mtu = new_mtu; - err = mlx5e_safe_switch_channels(priv, &new_channels, NULL); + err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL); if (err) goto out; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h index c87962cab921..de7e01a027bb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h @@ -56,7 +56,7 @@ struct mlx5i_priv { u32 qkey; u16 pkey_index; struct mlx5i_pkey_qpn_ht *qpn_htbl; - char *mlx5e_priv[0]; + char *mlx5e_priv[]; }; int mlx5i_create_tis(struct mlx5_core_dev *mdev, u32 underlay_qpn, u32 *tisn); @@ -107,7 +107,7 @@ struct mlx5i_tx_wqe { struct mlx5_wqe_datagram_seg datagram; struct mlx5_wqe_eth_pad pad; struct mlx5_wqe_eth_seg eth; - struct mlx5_wqe_data_seg data[0]; + struct mlx5_wqe_data_seg data[]; }; static inline void mlx5i_sq_fetch_wqe(struct mlx5e_txqsq *sq, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c index 416676c35b1f..e9089a793632 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c @@ -93,9 +93,8 @@ static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev, static void mlx5_lag_fib_event_flush(struct notifier_block *nb) { struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb); - struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp); - flush_workqueue(ldev->wq); + flush_workqueue(mp->wq); } struct mlx5_fib_event_work { @@ -293,7 +292,7 @@ static int mlx5_lag_fib_event(struct notifier_block *nb, return NOTIFY_DONE; } - queue_work(ldev->wq, &fib_work->work); + queue_work(mp->wq, &fib_work->work); return NOTIFY_DONE; } @@ -306,11 +305,17 @@ int mlx5_lag_mp_init(struct mlx5_lag *ldev) if (mp->fib_nb.notifier_call) return 0; + mp->wq = create_singlethread_workqueue("mlx5_lag_mp"); + if (!mp->wq) + return -ENOMEM; + mp->fib_nb.notifier_call = mlx5_lag_fib_event; err = register_fib_notifier(&init_net, &mp->fib_nb, mlx5_lag_fib_event_flush, NULL); - if (err) + if (err) { + destroy_workqueue(mp->wq); mp->fib_nb.notifier_call = NULL; + } return err; } @@ -323,5 +328,6 @@ void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev) return; unregister_fib_notifier(&init_net, &mp->fib_nb); + destroy_workqueue(mp->wq); mp->fib_nb.notifier_call = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h index 79be89e9c7a4..258ac7b2964e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h @@ -16,6 +16,7 @@ enum mlx5_lag_port_affinity { struct lag_mp { struct notifier_block fib_nb; struct fib_info *mfi; /* used in tracking fib events */ + struct workqueue_struct *wq; }; #ifdef CONFIG_MLX5_ESWITCH diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c index 3fc575d1c3ec..dcea87ec5977 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c @@ -42,7 +42,7 @@ int mlx5_create_encryption_key(struct mlx5_core_dev *mdev, MLX5_SET(encryption_key_obj, obj, key_size, general_obj_key_size); MLX5_SET(encryption_key_obj, obj, key_type, - MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_DEK); + MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_TLS); MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c index e065c2f68f5a..6cbccba56f70 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c @@ -21,7 +21,7 @@ struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev) struct mlx5_dm *dm; if (!(MLX5_CAP_GEN_64(dev, general_obj_types) & MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM)) - return 0; + return NULL; dm = kzalloc(sizeof(*dm), GFP_KERNEL); if (!dm) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index f554cfddcf4e..7af4210c1b96 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -70,6 +70,7 @@ #include "diag/fw_tracer.h" #include "ecpf.h" #include "lib/hv_vhca.h" +#include "diag/rsc_dump.h" MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver"); @@ -880,6 +881,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) dev->tracer = mlx5_fw_tracer_create(dev); dev->hv_vhca = mlx5_hv_vhca_create(dev); + dev->rsc_dump = mlx5_rsc_dump_create(dev); return 0; @@ -909,6 +911,7 @@ err_devcom: static void mlx5_cleanup_once(struct mlx5_core_dev *dev) { + mlx5_rsc_dump_destroy(dev); mlx5_hv_vhca_destroy(dev->hv_vhca); mlx5_fw_tracer_destroy(dev->tracer); mlx5_dm_cleanup(dev); @@ -1079,6 +1082,12 @@ static int mlx5_load(struct mlx5_core_dev *dev) mlx5_hv_vhca_init(dev->hv_vhca); + err = mlx5_rsc_dump_init(dev); + if (err) { + mlx5_core_err(dev, "Failed to init Resource dump\n"); + goto err_rsc_dump; + } + err = mlx5_fpga_device_start(dev); if (err) { mlx5_core_err(dev, "fpga device start failed %d\n", err); @@ -1134,6 +1143,8 @@ err_tls_start: err_ipsec_start: mlx5_fpga_device_stop(dev); err_fpga_start: + mlx5_rsc_dump_cleanup(dev); +err_rsc_dump: mlx5_hv_vhca_cleanup(dev->hv_vhca); mlx5_fw_tracer_cleanup(dev->tracer); err_fw_tracer: @@ -1155,6 +1166,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev) mlx5_accel_ipsec_cleanup(dev); mlx5_accel_tls_cleanup(dev); mlx5_fpga_device_stop(dev); + mlx5_rsc_dump_cleanup(dev); mlx5_hv_vhca_cleanup(dev->hv_vhca); mlx5_fw_tracer_cleanup(dev->tracer); mlx5_eq_table_destroy(dev); @@ -1199,15 +1211,10 @@ int mlx5_load_one(struct mlx5_core_dev *dev, bool boot) goto err_devlink_reg; } - if (mlx5_device_registered(dev)) { + if (mlx5_device_registered(dev)) mlx5_attach_device(dev); - } else { - err = mlx5_register_device(dev); - if (err) { - mlx5_core_err(dev, "register device failed %d\n", err); - goto err_reg_dev; - } - } + else + mlx5_register_device(dev); set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); out: @@ -1215,9 +1222,6 @@ out: return err; -err_reg_dev: - if (boot) - mlx5_devlink_unregister(priv_to_devlink(dev)); err_devlink_reg: mlx5_unload(dev); err_load: @@ -1231,7 +1235,7 @@ function_teardown: return err; } -int mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup) +void mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup) { if (cleanup) { mlx5_unregister_device(dev); @@ -1260,7 +1264,6 @@ int mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup) mlx5_function_teardown(dev, cleanup); out: mutex_unlock(&dev->intf_state_mutex); - return 0; } static int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) @@ -1282,7 +1285,6 @@ static int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) mutex_init(&priv->alloc_mutex); mutex_init(&priv->pgdir_mutex); INIT_LIST_HEAD(&priv->pgdir_list); - spin_lock_init(&priv->mkey_lock); priv->dbg_root = debugfs_create_dir(dev_name(dev->device), mlx5_debugfs_root); @@ -1381,12 +1383,7 @@ static void remove_one(struct pci_dev *pdev) mlx5_crdump_disable(dev); mlx5_devlink_unregister(devlink); - if (mlx5_unload_one(dev, true)) { - mlx5_core_err(dev, "mlx5_unload_one failed\n"); - mlx5_health_flush(dev); - return; - } - + mlx5_unload_one(dev, true); mlx5_pci_close(dev); mlx5_mdev_uninit(dev); mlx5_devlink_free(devlink); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index da67b28d6e23..a8fb43a85d1d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -182,7 +182,7 @@ void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv); void mlx5_attach_device(struct mlx5_core_dev *dev); void mlx5_detach_device(struct mlx5_core_dev *dev); bool mlx5_device_registered(struct mlx5_core_dev *dev); -int mlx5_register_device(struct mlx5_core_dev *dev); +void mlx5_register_device(struct mlx5_core_dev *dev); void mlx5_unregister_device(struct mlx5_core_dev *dev); void mlx5_add_dev_by_protocol(struct mlx5_core_dev *dev, int protocol); void mlx5_remove_dev_by_protocol(struct mlx5_core_dev *dev, int protocol); @@ -244,6 +244,6 @@ enum { u8 mlx5_get_nic_state(struct mlx5_core_dev *dev); void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state); -int mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup); +void mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup); int mlx5_load_one(struct mlx5_core_dev *dev, bool boot); #endif /* __MLX5_CORE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index 42cc3c7ac5b6..366f2cbfc6db 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -36,54 +36,31 @@ #include <linux/mlx5/cmd.h> #include "mlx5_core.h" -int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, - struct mlx5_core_mkey *mkey, - struct mlx5_async_ctx *async_ctx, u32 *in, - int inlen, u32 *out, int outlen, - mlx5_async_cbk_t callback, - struct mlx5_async_work *context) +int mlx5_core_create_mkey(struct mlx5_core_dev *dev, + struct mlx5_core_mkey *mkey, + u32 *in, int inlen) { u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {0}; u32 mkey_index; void *mkc; int err; - u8 key; - - spin_lock_irq(&dev->priv.mkey_lock); - key = dev->priv.mkey_key++; - spin_unlock_irq(&dev->priv.mkey_lock); - mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY); - MLX5_SET(mkc, mkc, mkey_7_0, key); - - if (callback) - return mlx5_cmd_exec_cb(async_ctx, in, inlen, out, outlen, - callback, context); err = mlx5_cmd_exec(dev, in, inlen, lout, sizeof(lout)); if (err) return err; + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index); mkey->iova = MLX5_GET64(mkc, mkc, start_addr); mkey->size = MLX5_GET64(mkc, mkc, len); - mkey->key = mlx5_idx_to_mkey(mkey_index) | key; + mkey->key |= mlx5_idx_to_mkey(mkey_index); mkey->pd = MLX5_GET(mkc, mkc, pd); - mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n", - mkey_index, key, mkey->key); + mlx5_core_dbg(dev, "out 0x%x, mkey 0x%x\n", mkey_index, mkey->key); return 0; } -EXPORT_SYMBOL(mlx5_core_create_mkey_cb); - -int mlx5_core_create_mkey(struct mlx5_core_dev *dev, - struct mlx5_core_mkey *mkey, - u32 *in, int inlen) -{ - return mlx5_core_create_mkey_cb(dev, mkey, NULL, in, inlen, - NULL, 0, NULL, NULL); -} EXPORT_SYMBOL(mlx5_core_create_mkey); int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c index 01c380425f9d..f3b29d9ade1f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c @@ -101,22 +101,39 @@ int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } +static bool mlx5_rl_are_equal_raw(struct mlx5_rl_entry *entry, void *rl_in, + u16 uid) +{ + return (!memcmp(entry->rl_raw, rl_in, sizeof(entry->rl_raw)) && + entry->uid == uid); +} + /* Finds an entry where we can register the given rate * If the rate already exists, return the entry where it is registered, * otherwise return the first available entry. * If the table is full, return NULL */ static struct mlx5_rl_entry *find_rl_entry(struct mlx5_rl_table *table, - struct mlx5_rate_limit *rl) + void *rl_in, u16 uid, bool dedicated) { struct mlx5_rl_entry *ret_entry = NULL; bool empty_found = false; int i; for (i = 0; i < table->max_size; i++) { - if (mlx5_rl_are_equal(&table->rl_entry[i].rl, rl)) - return &table->rl_entry[i]; - if (!empty_found && !table->rl_entry[i].rl.rate) { + if (dedicated) { + if (!table->rl_entry[i].refcount) + return &table->rl_entry[i]; + continue; + } + + if (table->rl_entry[i].refcount) { + if (table->rl_entry[i].dedicated) + continue; + if (mlx5_rl_are_equal_raw(&table->rl_entry[i], rl_in, + uid)) + return &table->rl_entry[i]; + } else if (!empty_found) { empty_found = true; ret_entry = &table->rl_entry[i]; } @@ -126,18 +143,19 @@ static struct mlx5_rl_entry *find_rl_entry(struct mlx5_rl_table *table, } static int mlx5_set_pp_rate_limit_cmd(struct mlx5_core_dev *dev, - u16 index, - struct mlx5_rate_limit *rl) + struct mlx5_rl_entry *entry, bool set) { - u32 in[MLX5_ST_SZ_DW(set_pp_rate_limit_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(set_pp_rate_limit_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(set_pp_rate_limit_in)] = {}; + u32 out[MLX5_ST_SZ_DW(set_pp_rate_limit_out)] = {}; + void *pp_context; + pp_context = MLX5_ADDR_OF(set_pp_rate_limit_in, in, ctx); MLX5_SET(set_pp_rate_limit_in, in, opcode, MLX5_CMD_OP_SET_PP_RATE_LIMIT); - MLX5_SET(set_pp_rate_limit_in, in, rate_limit_index, index); - MLX5_SET(set_pp_rate_limit_in, in, rate_limit, rl->rate); - MLX5_SET(set_pp_rate_limit_in, in, burst_upper_bound, rl->max_burst_sz); - MLX5_SET(set_pp_rate_limit_in, in, typical_packet_size, rl->typical_pkt_sz); + MLX5_SET(set_pp_rate_limit_in, in, uid, entry->uid); + MLX5_SET(set_pp_rate_limit_in, in, rate_limit_index, entry->index); + if (set) + memcpy(pp_context, entry->rl_raw, sizeof(entry->rl_raw)); return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } @@ -158,23 +176,25 @@ bool mlx5_rl_are_equal(struct mlx5_rate_limit *rl_0, } EXPORT_SYMBOL(mlx5_rl_are_equal); -int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u16 *index, - struct mlx5_rate_limit *rl) +int mlx5_rl_add_rate_raw(struct mlx5_core_dev *dev, void *rl_in, u16 uid, + bool dedicated_entry, u16 *index) { struct mlx5_rl_table *table = &dev->priv.rl_table; struct mlx5_rl_entry *entry; int err = 0; + u32 rate; + rate = MLX5_GET(set_pp_rate_limit_context, rl_in, rate_limit); mutex_lock(&table->rl_lock); - if (!rl->rate || !mlx5_rl_is_in_range(dev, rl->rate)) { + if (!rate || !mlx5_rl_is_in_range(dev, rate)) { mlx5_core_err(dev, "Invalid rate: %u, should be %u to %u\n", - rl->rate, table->min_rate, table->max_rate); + rate, table->min_rate, table->max_rate); err = -EINVAL; goto out; } - entry = find_rl_entry(table, rl); + entry = find_rl_entry(table, rl_in, uid, dedicated_entry); if (!entry) { mlx5_core_err(dev, "Max number of %u rates reached\n", table->max_size); @@ -185,16 +205,24 @@ int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u16 *index, /* rate already configured */ entry->refcount++; } else { + memcpy(entry->rl_raw, rl_in, sizeof(entry->rl_raw)); + entry->uid = uid; /* new rate limit */ - err = mlx5_set_pp_rate_limit_cmd(dev, entry->index, rl); + err = mlx5_set_pp_rate_limit_cmd(dev, entry, true); if (err) { - mlx5_core_err(dev, "Failed configuring rate limit(err %d): rate %u, max_burst_sz %u, typical_pkt_sz %u\n", - err, rl->rate, rl->max_burst_sz, - rl->typical_pkt_sz); + mlx5_core_err( + dev, + "Failed configuring rate limit(err %d): rate %u, max_burst_sz %u, typical_pkt_sz %u\n", + err, rate, + MLX5_GET(set_pp_rate_limit_context, rl_in, + burst_upper_bound), + MLX5_GET(set_pp_rate_limit_context, rl_in, + typical_packet_size)); goto out; } - entry->rl = *rl; + entry->refcount = 1; + entry->dedicated = dedicated_entry; } *index = entry->index; @@ -202,20 +230,61 @@ out: mutex_unlock(&table->rl_lock); return err; } +EXPORT_SYMBOL(mlx5_rl_add_rate_raw); + +void mlx5_rl_remove_rate_raw(struct mlx5_core_dev *dev, u16 index) +{ + struct mlx5_rl_table *table = &dev->priv.rl_table; + struct mlx5_rl_entry *entry; + + mutex_lock(&table->rl_lock); + entry = &table->rl_entry[index - 1]; + entry->refcount--; + if (!entry->refcount) + /* need to remove rate */ + mlx5_set_pp_rate_limit_cmd(dev, entry, false); + mutex_unlock(&table->rl_lock); +} +EXPORT_SYMBOL(mlx5_rl_remove_rate_raw); + +int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u16 *index, + struct mlx5_rate_limit *rl) +{ + u8 rl_raw[MLX5_ST_SZ_BYTES(set_pp_rate_limit_context)] = {}; + + MLX5_SET(set_pp_rate_limit_context, rl_raw, rate_limit, rl->rate); + MLX5_SET(set_pp_rate_limit_context, rl_raw, burst_upper_bound, + rl->max_burst_sz); + MLX5_SET(set_pp_rate_limit_context, rl_raw, typical_packet_size, + rl->typical_pkt_sz); + + return mlx5_rl_add_rate_raw(dev, rl_raw, + MLX5_CAP_QOS(dev, packet_pacing_uid) ? + MLX5_SHARED_RESOURCE_UID : 0, + false, index); +} EXPORT_SYMBOL(mlx5_rl_add_rate); void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, struct mlx5_rate_limit *rl) { + u8 rl_raw[MLX5_ST_SZ_BYTES(set_pp_rate_limit_context)] = {}; struct mlx5_rl_table *table = &dev->priv.rl_table; struct mlx5_rl_entry *entry = NULL; - struct mlx5_rate_limit reset_rl = {0}; /* 0 is a reserved value for unlimited rate */ if (rl->rate == 0) return; + MLX5_SET(set_pp_rate_limit_context, rl_raw, rate_limit, rl->rate); + MLX5_SET(set_pp_rate_limit_context, rl_raw, burst_upper_bound, + rl->max_burst_sz); + MLX5_SET(set_pp_rate_limit_context, rl_raw, typical_packet_size, + rl->typical_pkt_sz); + mutex_lock(&table->rl_lock); - entry = find_rl_entry(table, rl); + entry = find_rl_entry(table, rl_raw, + MLX5_CAP_QOS(dev, packet_pacing_uid) ? + MLX5_SHARED_RESOURCE_UID : 0, false); if (!entry || !entry->refcount) { mlx5_core_warn(dev, "Rate %u, max_burst_sz %u typical_pkt_sz %u are not configured\n", rl->rate, rl->max_burst_sz, rl->typical_pkt_sz); @@ -223,11 +292,9 @@ void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, struct mlx5_rate_limit *rl) } entry->refcount--; - if (!entry->refcount) { + if (!entry->refcount) /* need to remove rate */ - mlx5_set_pp_rate_limit_cmd(dev, entry->index, &reset_rl); - entry->rl = reset_rl; - } + mlx5_set_pp_rate_limit_cmd(dev, entry, false); out: mutex_unlock(&table->rl_lock); @@ -273,14 +340,13 @@ int mlx5_init_rl_table(struct mlx5_core_dev *dev) void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev) { struct mlx5_rl_table *table = &dev->priv.rl_table; - struct mlx5_rate_limit rl = {0}; int i; /* Clear all configured rates */ for (i = 0; i < table->max_size; i++) - if (table->rl_entry[i].rl.rate) - mlx5_set_pp_rate_limit_cmd(dev, table->rl_entry[i].index, - &rl); + if (table->rl_entry[i].refcount) + mlx5_set_pp_rate_limit_cmd(dev, &table->rl_entry[i], + false); kfree(dev->priv.rl_table.rl_entry); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index 03f037811f1d..3094d20297a9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -77,8 +77,7 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) if (!MLX5_ESWITCH_MANAGER(dev)) goto enable_vfs_hca; - mlx5_eswitch_update_num_of_vfs(dev->priv.eswitch, num_vfs); - err = mlx5_eswitch_enable(dev->priv.eswitch, MLX5_ESWITCH_LEGACY); + err = mlx5_eswitch_enable(dev->priv.eswitch, num_vfs); if (err) { mlx5_core_warn(dev, "failed to enable eswitch SRIOV (%d)\n", err); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c index 2d93228ff633..554811de4c9d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c @@ -672,7 +672,7 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, dest_action = action; if (!action->dest_tbl.is_fw_tbl) { if (action->dest_tbl.tbl->dmn != dmn) { - mlx5dr_dbg(dmn, + mlx5dr_err(dmn, "Destination table belongs to a different domain\n"); goto out_invalid_arg; } @@ -703,7 +703,7 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, action->dest_tbl.fw_tbl.rx_icm_addr = output.sw_owner_icm_root_0; } else { - mlx5dr_dbg(dmn, + mlx5dr_err(dmn, "Failed mlx5_cmd_query_flow_table ret: %d\n", ret); return ret; @@ -772,7 +772,7 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, /* Check action duplication */ if (++action_type_set[action_type] > max_actions_type) { - mlx5dr_dbg(dmn, "Action type %d supports only max %d time(s)\n", + mlx5dr_err(dmn, "Action type %d supports only max %d time(s)\n", action_type, max_actions_type); goto out_invalid_arg; } @@ -781,7 +781,7 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, if (dr_action_validate_and_get_next_state(action_domain, action_type, &state)) { - mlx5dr_dbg(dmn, "Invalid action sequence provided\n"); + mlx5dr_err(dmn, "Invalid action sequence provided\n"); return -EOPNOTSUPP; } } @@ -797,7 +797,7 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, rx_rule && recalc_cs_required && dest_action) { ret = dr_action_handle_cs_recalc(dmn, dest_action, &attr.final_icm_addr); if (ret) { - mlx5dr_dbg(dmn, + mlx5dr_err(dmn, "Failed to handle checksum recalculation err %d\n", ret); return ret; @@ -964,6 +964,24 @@ struct mlx5dr_action *mlx5dr_action_create_drop(void) } struct mlx5dr_action * +mlx5dr_action_create_dest_table_num(struct mlx5dr_domain *dmn, u32 table_num) +{ + struct mlx5dr_action *action; + + action = dr_action_create_generic(DR_ACTION_TYP_FT); + if (!action) + return NULL; + + action->dest_tbl.is_fw_tbl = true; + action->dest_tbl.fw_tbl.dmn = dmn; + action->dest_tbl.fw_tbl.id = table_num; + action->dest_tbl.fw_tbl.type = FS_FT_FDB; + refcount_inc(&dmn->refcount); + + return action; +} + +struct mlx5dr_action * mlx5dr_action_create_dest_table(struct mlx5dr_table *tbl) { struct mlx5dr_action *action; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c index a9da961d4d2f..48b6358b6845 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c @@ -59,7 +59,7 @@ static int dr_domain_init_resources(struct mlx5dr_domain *dmn) ret = mlx5_core_alloc_pd(dmn->mdev, &dmn->pdn); if (ret) { - mlx5dr_dbg(dmn, "Couldn't allocate PD\n"); + mlx5dr_err(dmn, "Couldn't allocate PD, ret: %d", ret); return ret; } @@ -192,7 +192,7 @@ static int dr_domain_query_fdb_caps(struct mlx5_core_dev *mdev, ret = dr_domain_query_vports(dmn); if (ret) { - mlx5dr_dbg(dmn, "Failed to query vports caps\n"); + mlx5dr_err(dmn, "Failed to query vports caps (err: %d)", ret); goto free_vports_caps; } @@ -213,7 +213,7 @@ static int dr_domain_caps_init(struct mlx5_core_dev *mdev, int ret; if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) { - mlx5dr_dbg(dmn, "Failed to allocate domain, bad link type\n"); + mlx5dr_err(dmn, "Failed to allocate domain, bad link type\n"); return -EOPNOTSUPP; } @@ -257,7 +257,7 @@ static int dr_domain_caps_init(struct mlx5_core_dev *mdev, dmn->info.tx.ste_type = MLX5DR_STE_TYPE_TX; vport_cap = mlx5dr_get_vport_cap(&dmn->info.caps, 0); if (!vport_cap) { - mlx5dr_dbg(dmn, "Failed to get esw manager vport\n"); + mlx5dr_err(dmn, "Failed to get esw manager vport\n"); return -ENOENT; } @@ -268,7 +268,7 @@ static int dr_domain_caps_init(struct mlx5_core_dev *mdev, dmn->info.tx.drop_icm_addr = dmn->info.caps.esw_tx_drop_address; break; default: - mlx5dr_dbg(dmn, "Invalid domain\n"); + mlx5dr_err(dmn, "Invalid domain\n"); ret = -EINVAL; break; } @@ -300,7 +300,7 @@ mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type) mutex_init(&dmn->mutex); if (dr_domain_caps_init(mdev, dmn)) { - mlx5dr_dbg(dmn, "Failed init domain, no caps\n"); + mlx5dr_err(dmn, "Failed init domain, no caps\n"); goto free_domain; } @@ -348,8 +348,11 @@ int mlx5dr_domain_sync(struct mlx5dr_domain *dmn, u32 flags) mutex_lock(&dmn->mutex); ret = mlx5dr_send_ring_force_drain(dmn); mutex_unlock(&dmn->mutex); - if (ret) + if (ret) { + mlx5dr_err(dmn, "Force drain failed flags: %d, ret: %d\n", + flags, ret); return ret; + } } if (flags & MLX5DR_DOMAIN_SYNC_FLAGS_HW) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c index d7c7467e2d53..30d2d7376f56 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c @@ -468,7 +468,7 @@ mlx5dr_icm_alloc_chunk(struct mlx5dr_icm_pool *pool, err = mlx5dr_cmd_sync_steering(pool->dmn->mdev); if (err) { dr_icm_chill_buckets_abort(pool, bucket, buckets); - mlx5dr_dbg(pool->dmn, "Sync_steering failed\n"); + mlx5dr_err(pool->dmn, "Sync_steering failed\n"); chunk = NULL; goto out; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c index c6dbd856df94..a95938874798 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c @@ -388,14 +388,14 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, mlx5dr_ste_build_empty_always_hit(&sb[idx++], rx); if (idx == 0) { - mlx5dr_dbg(dmn, "Cannot generate any valid rules from mask\n"); + mlx5dr_err(dmn, "Cannot generate any valid rules from mask\n"); return -EINVAL; } /* Check that all mask fields were consumed */ for (i = 0; i < sizeof(struct mlx5dr_match_param); i++) { if (((u8 *)&mask)[i] != 0) { - mlx5dr_info(dmn, "Mask contains unsupported parameters\n"); + mlx5dr_err(dmn, "Mask contains unsupported parameters\n"); return -EOPNOTSUPP; } } @@ -563,7 +563,7 @@ static int dr_matcher_set_all_ste_builders(struct mlx5dr_matcher *matcher, dr_matcher_set_ste_builders(matcher, nic_matcher, DR_RULE_IPV6, DR_RULE_IPV6); if (!nic_matcher->ste_builder) { - mlx5dr_dbg(dmn, "Cannot generate IPv4 or IPv6 rules with given mask\n"); + mlx5dr_err(dmn, "Cannot generate IPv4 or IPv6 rules with given mask\n"); return -EINVAL; } @@ -634,13 +634,13 @@ static int dr_matcher_init(struct mlx5dr_matcher *matcher, int ret; if (matcher->match_criteria >= DR_MATCHER_CRITERIA_MAX) { - mlx5dr_info(dmn, "Invalid match criteria attribute\n"); + mlx5dr_err(dmn, "Invalid match criteria attribute\n"); return -EINVAL; } if (mask) { if (mask->match_sz > sizeof(struct mlx5dr_match_param)) { - mlx5dr_info(dmn, "Invalid match size attribute\n"); + mlx5dr_err(dmn, "Invalid match size attribute\n"); return -EINVAL; } mlx5dr_ste_copy_param(matcher->match_criteria, @@ -671,7 +671,7 @@ static int dr_matcher_init(struct mlx5dr_matcher *matcher, struct mlx5dr_matcher * mlx5dr_matcher_create(struct mlx5dr_table *tbl, - u16 priority, + u32 priority, u8 match_criteria_enable, struct mlx5dr_match_parameters *mask) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c index e4cff7abb348..cce3ee7a6614 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c @@ -826,8 +826,8 @@ again: ste_location, send_ste_list); if (!new_htbl) { mlx5dr_htbl_put(cur_htbl); - mlx5dr_info(dmn, "failed creating rehash table, htbl-log_size: %d\n", - cur_htbl->chunk_size); + mlx5dr_err(dmn, "Failed creating rehash table, htbl-log_size: %d\n", + cur_htbl->chunk_size); } else { cur_htbl = new_htbl; } @@ -877,7 +877,7 @@ static bool dr_rule_verify(struct mlx5dr_matcher *matcher, if (!value_size || (value_size > sizeof(struct mlx5dr_match_param) || (value_size % sizeof(u32)))) { - mlx5dr_dbg(matcher->tbl->dmn, "Rule parameters length is incorrect\n"); + mlx5dr_err(matcher->tbl->dmn, "Rule parameters length is incorrect\n"); return false; } @@ -888,7 +888,7 @@ static bool dr_rule_verify(struct mlx5dr_matcher *matcher, e_idx = min(s_idx + sizeof(param->outer), value_size); if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) { - mlx5dr_dbg(matcher->tbl->dmn, "Rule outer parameters contains a value not specified by mask\n"); + mlx5dr_err(matcher->tbl->dmn, "Rule outer parameters contains a value not specified by mask\n"); return false; } } @@ -898,7 +898,7 @@ static bool dr_rule_verify(struct mlx5dr_matcher *matcher, e_idx = min(s_idx + sizeof(param->misc), value_size); if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) { - mlx5dr_dbg(matcher->tbl->dmn, "Rule misc parameters contains a value not specified by mask\n"); + mlx5dr_err(matcher->tbl->dmn, "Rule misc parameters contains a value not specified by mask\n"); return false; } } @@ -908,7 +908,7 @@ static bool dr_rule_verify(struct mlx5dr_matcher *matcher, e_idx = min(s_idx + sizeof(param->inner), value_size); if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) { - mlx5dr_dbg(matcher->tbl->dmn, "Rule inner parameters contains a value not specified by mask\n"); + mlx5dr_err(matcher->tbl->dmn, "Rule inner parameters contains a value not specified by mask\n"); return false; } } @@ -918,7 +918,7 @@ static bool dr_rule_verify(struct mlx5dr_matcher *matcher, e_idx = min(s_idx + sizeof(param->misc2), value_size); if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) { - mlx5dr_dbg(matcher->tbl->dmn, "Rule misc2 parameters contains a value not specified by mask\n"); + mlx5dr_err(matcher->tbl->dmn, "Rule misc2 parameters contains a value not specified by mask\n"); return false; } } @@ -928,7 +928,7 @@ static bool dr_rule_verify(struct mlx5dr_matcher *matcher, e_idx = min(s_idx + sizeof(param->misc3), value_size); if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) { - mlx5dr_dbg(matcher->tbl->dmn, "Rule misc3 parameters contains a value not specified by mask\n"); + mlx5dr_err(matcher->tbl->dmn, "Rule misc3 parameters contains a value not specified by mask\n"); return false; } } @@ -1221,7 +1221,7 @@ remove_action_members: dr_rule_remove_action_members(rule); free_rule: kfree(rule); - mlx5dr_info(dmn, "Failed creating rule\n"); + mlx5dr_err(dmn, "Failed creating rule\n"); return NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c index 095ec7b1399d..c0ab9cf74929 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c @@ -136,7 +136,7 @@ static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev, err = mlx5_wq_qp_create(mdev, &wqp, temp_qpc, &dr_qp->wq, &dr_qp->wq_ctrl); if (err) { - mlx5_core_info(mdev, "Can't create QP WQ\n"); + mlx5_core_warn(mdev, "Can't create QP WQ\n"); goto err_wq; } @@ -652,8 +652,10 @@ static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn) /* Init */ ret = dr_modify_qp_rst2init(dmn->mdev, dr_qp, port); - if (ret) + if (ret) { + mlx5dr_err(dmn, "Failed modify QP rst2init\n"); return ret; + } /* RTR */ ret = mlx5dr_cmd_query_gid(dmn->mdev, port, gid_index, &rtr_attr.dgid_attr); @@ -668,8 +670,10 @@ static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn) rtr_attr.udp_src_port = dmn->info.caps.roce_min_src_udp; ret = dr_cmd_modify_qp_init2rtr(dmn->mdev, dr_qp, &rtr_attr); - if (ret) + if (ret) { + mlx5dr_err(dmn, "Failed modify QP init2rtr\n"); return ret; + } /* RTS */ rts_attr.timeout = 14; @@ -677,8 +681,10 @@ static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn) rts_attr.rnr_retry = 7; ret = dr_cmd_modify_qp_rtr2rts(dmn->mdev, dr_qp, &rts_attr); - if (ret) + if (ret) { + mlx5dr_err(dmn, "Failed modify QP rtr2rts\n"); return ret; + } return 0; } @@ -862,6 +868,7 @@ int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn) cq_size = QUEUE_SIZE + 1; dmn->send_ring->cq = dr_create_cq(dmn->mdev, dmn->uar, cq_size); if (!dmn->send_ring->cq) { + mlx5dr_err(dmn, "Failed creating CQ\n"); ret = -ENOMEM; goto free_send_ring; } @@ -873,6 +880,7 @@ int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn) dmn->send_ring->qp = dr_create_rc_qp(dmn->mdev, &init_attr); if (!dmn->send_ring->qp) { + mlx5dr_err(dmn, "Failed creating QP\n"); ret = -ENOMEM; goto clean_cq; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c index aade62a9ee5c..c0e3a1e7389d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c @@ -728,7 +728,7 @@ int mlx5dr_ste_build_pre_check(struct mlx5dr_domain *dmn, { if (!value && (match_criteria & DR_MATCHER_CRITERIA_MISC)) { if (mask->misc.source_port && mask->misc.source_port != 0xffff) { - mlx5dr_dbg(dmn, "Partial mask source_port is not supported\n"); + mlx5dr_err(dmn, "Partial mask source_port is not supported\n"); return -EINVAL; } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c index 14ce2d7dbb66..c2fe48d7b75a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c @@ -128,16 +128,20 @@ static int dr_table_init_nic(struct mlx5dr_domain *dmn, DR_CHUNK_SIZE_1, MLX5DR_STE_LU_TYPE_DONT_CARE, 0); - if (!nic_tbl->s_anchor) + if (!nic_tbl->s_anchor) { + mlx5dr_err(dmn, "Failed allocating htbl\n"); return -ENOMEM; + } info.type = CONNECT_MISS; info.miss_icm_addr = nic_dmn->default_icm_addr; ret = mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, nic_tbl->s_anchor, &info, true); - if (ret) + if (ret) { + mlx5dr_err(dmn, "Failed int and send htbl\n"); goto free_s_anchor; + } mlx5dr_htbl_get(nic_tbl->s_anchor); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h index dffe35145d19..3fa739951b34 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h @@ -705,7 +705,7 @@ struct mlx5dr_matcher { struct mlx5dr_matcher_rx_tx rx; struct mlx5dr_matcher_rx_tx tx; struct list_head matcher_list; - u16 prio; + u32 prio; struct mlx5dr_match_param mask; u8 match_criteria; refcount_t refcount; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c index c2027192e21e..3b3f5b9d4f95 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c @@ -140,7 +140,7 @@ static int mlx5_cmd_dr_create_flow_group(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_group *fg) { struct mlx5dr_matcher *matcher; - u16 priority = MLX5_GET(create_flow_group_in, in, + u32 priority = MLX5_GET(create_flow_group_in, in, start_flow_index); u8 match_criteria_enable = MLX5_GET(create_flow_group_in, in, @@ -384,6 +384,7 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { list_for_each_entry(dst, &fte->node.children, node.list) { enum mlx5_flow_destination_type type = dst->dest_attr.type; + u32 ft_id; if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX || num_term_actions >= MLX5_FLOW_CONTEXT_ACTION_MAX) { @@ -420,6 +421,17 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, num_term_actions++; break; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM: + ft_id = dst->dest_attr.ft_num; + tmp_action = mlx5dr_action_create_dest_table_num(domain, + ft_id); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + term_actions[num_term_actions++].dest = tmp_action; + break; default: err = -EOPNOTSUPP; goto free_actions; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h index e1edc9c247b7..7deaca9ade3b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h @@ -38,8 +38,6 @@ struct mlx5dr_action_dest { struct mlx5dr_action *reformat; }; -#ifdef CONFIG_MLX5_SW_STEERING - struct mlx5dr_domain * mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type); @@ -59,7 +57,7 @@ u32 mlx5dr_table_get_id(struct mlx5dr_table *table); struct mlx5dr_matcher * mlx5dr_matcher_create(struct mlx5dr_table *table, - u16 priority, + u32 priority, u8 match_criteria_enable, struct mlx5dr_match_parameters *mask); @@ -77,6 +75,9 @@ int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl, struct mlx5dr_action *action); struct mlx5dr_action * +mlx5dr_action_create_dest_table_num(struct mlx5dr_domain *dmn, u32 table_num); + +struct mlx5dr_action * mlx5dr_action_create_dest_table(struct mlx5dr_table *table); struct mlx5dr_action * @@ -125,103 +126,4 @@ mlx5dr_is_supported(struct mlx5_core_dev *dev) return MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner); } -#else /* CONFIG_MLX5_SW_STEERING */ - -static inline struct mlx5dr_domain * -mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type) { return NULL; } - -static inline int -mlx5dr_domain_destroy(struct mlx5dr_domain *domain) { return 0; } - -static inline int -mlx5dr_domain_sync(struct mlx5dr_domain *domain, u32 flags) { return 0; } - -static inline void -mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn, - struct mlx5dr_domain *peer_dmn) { } - -static inline struct mlx5dr_table * -mlx5dr_table_create(struct mlx5dr_domain *domain, u32 level, u32 flags) { return NULL; } - -static inline int -mlx5dr_table_destroy(struct mlx5dr_table *table) { return 0; } - -static inline u32 -mlx5dr_table_get_id(struct mlx5dr_table *table) { return 0; } - -static inline struct mlx5dr_matcher * -mlx5dr_matcher_create(struct mlx5dr_table *table, - u16 priority, - u8 match_criteria_enable, - struct mlx5dr_match_parameters *mask) { return NULL; } - -static inline int -mlx5dr_matcher_destroy(struct mlx5dr_matcher *matcher) { return 0; } - -static inline struct mlx5dr_rule * -mlx5dr_rule_create(struct mlx5dr_matcher *matcher, - struct mlx5dr_match_parameters *value, - size_t num_actions, - struct mlx5dr_action *actions[]) { return NULL; } - -static inline int -mlx5dr_rule_destroy(struct mlx5dr_rule *rule) { return 0; } - -static inline int -mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl, - struct mlx5dr_action *action) { return 0; } - -static inline struct mlx5dr_action * -mlx5dr_action_create_dest_table(struct mlx5dr_table *table) { return NULL; } - -static inline struct mlx5dr_action * -mlx5dr_action_create_dest_flow_fw_table(struct mlx5dr_domain *domain, - struct mlx5_flow_table *ft) { return NULL; } - -static inline struct mlx5dr_action * -mlx5dr_action_create_dest_vport(struct mlx5dr_domain *domain, - u32 vport, u8 vhca_id_valid, - u16 vhca_id) { return NULL; } - -static inline struct mlx5dr_action * -mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn, - struct mlx5dr_action_dest *dests, - u32 num_of_dests) { return NULL; } - -static inline struct mlx5dr_action * -mlx5dr_action_create_drop(void) { return NULL; } - -static inline struct mlx5dr_action * -mlx5dr_action_create_tag(u32 tag_value) { return NULL; } - -static inline struct mlx5dr_action * -mlx5dr_action_create_flow_counter(u32 counter_id) { return NULL; } - -static inline struct mlx5dr_action * -mlx5dr_action_create_packet_reformat(struct mlx5dr_domain *dmn, - enum mlx5dr_action_reformat_type reformat_type, - size_t data_sz, - void *data) { return NULL; } - -static inline struct mlx5dr_action * -mlx5dr_action_create_modify_header(struct mlx5dr_domain *domain, - u32 flags, - size_t actions_sz, - __be64 actions[]) { return NULL; } - -static inline struct mlx5dr_action * -mlx5dr_action_create_pop_vlan(void) { return NULL; } - -static inline struct mlx5dr_action * -mlx5dr_action_create_push_vlan(struct mlx5dr_domain *domain, - __be32 vlan_hdr) { return NULL; } - -static inline int -mlx5dr_action_destroy(struct mlx5dr_action *action) { return 0; } - -static inline bool -mlx5dr_is_supported(struct mlx5_core_dev *dev) { return false; } - -#endif /* CONFIG_MLX5_SW_STEERING */ - #endif /* _MLX5DR_H_ */ diff --git a/drivers/net/ethernet/mellanox/mlxfw/Kconfig b/drivers/net/ethernet/mellanox/mlxfw/Kconfig index 0367f835a846..5b604501f33e 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/Kconfig +++ b/drivers/net/ethernet/mellanox/mlxfw/Kconfig @@ -12,3 +12,4 @@ config MLXFW To compile this driver as a module, choose M here: the module will be called mlxfw. select XZ_DEC + select NET_DEVLINK diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h index c50e74ab02c4..7654841a05c2 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h @@ -6,6 +6,30 @@ #include <linux/firmware.h> #include <linux/netlink.h> +#include <linux/device.h> +#include <net/devlink.h> + +struct mlxfw_dev { + const struct mlxfw_dev_ops *ops; + const char *psid; + u16 psid_size; + struct devlink *devlink; +}; + +static inline +struct device *mlxfw_dev_dev(struct mlxfw_dev *mlxfw_dev) +{ + return mlxfw_dev->devlink->dev; +} + +#define MLXFW_PRFX "mlxfw: " + +#define mlxfw_info(mlxfw_dev, fmt, ...) \ + dev_info(mlxfw_dev_dev(mlxfw_dev), MLXFW_PRFX fmt, ## __VA_ARGS__) +#define mlxfw_err(mlxfw_dev, fmt, ...) \ + dev_err(mlxfw_dev_dev(mlxfw_dev), MLXFW_PRFX fmt, ## __VA_ARGS__) +#define mlxfw_dbg(mlxfw_dev, fmt, ...) \ + dev_dbg(mlxfw_dev_dev(mlxfw_dev), MLXFW_PRFX fmt, ## __VA_ARGS__) enum mlxfw_fsm_state { MLXFW_FSM_STATE_IDLE, @@ -31,7 +55,19 @@ enum mlxfw_fsm_state_err { MLXFW_FSM_STATE_ERR_MAX, }; -struct mlxfw_dev; +enum mlxfw_fsm_reactivate_status { + MLXFW_FSM_REACTIVATE_STATUS_OK, + MLXFW_FSM_REACTIVATE_STATUS_BUSY, + MLXFW_FSM_REACTIVATE_STATUS_PROHIBITED_FW_VER_ERR, + MLXFW_FSM_REACTIVATE_STATUS_FIRST_PAGE_COPY_FAILED, + MLXFW_FSM_REACTIVATE_STATUS_FIRST_PAGE_ERASE_FAILED, + MLXFW_FSM_REACTIVATE_STATUS_FIRST_PAGE_RESTORE_FAILED, + MLXFW_FSM_REACTIVATE_STATUS_CANDIDATE_FW_DEACTIVATION_FAILED, + MLXFW_FSM_REACTIVATE_STATUS_FW_ALREADY_ACTIVATED, + MLXFW_FSM_REACTIVATE_STATUS_ERR_DEVICE_RESET_REQUIRED, + MLXFW_FSM_REACTIVATE_STATUS_ERR_FW_PROGRAMMING_NEEDED, + MLXFW_FSM_REACTIVATE_STATUS_MAX, +}; struct mlxfw_dev_ops { int (*component_query)(struct mlxfw_dev *mlxfw_dev, u16 component_index, @@ -51,6 +87,8 @@ struct mlxfw_dev_ops { int (*fsm_activate)(struct mlxfw_dev *mlxfw_dev, u32 fwhandle); + int (*fsm_reactivate)(struct mlxfw_dev *mlxfw_dev, u8 *status); + int (*fsm_query_state)(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, enum mlxfw_fsm_state *fsm_state, enum mlxfw_fsm_state_err *fsm_state_err); @@ -58,16 +96,6 @@ struct mlxfw_dev_ops { void (*fsm_cancel)(struct mlxfw_dev *mlxfw_dev, u32 fwhandle); void (*fsm_release)(struct mlxfw_dev *mlxfw_dev, u32 fwhandle); - - void (*status_notify)(struct mlxfw_dev *mlxfw_dev, - const char *msg, const char *comp_name, - u32 done_bytes, u32 total_bytes); -}; - -struct mlxfw_dev { - const struct mlxfw_dev_ops *ops; - const char *psid; - u16 psid_size; }; #if IS_REACHABLE(CONFIG_MLXFW) diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c index 29e95d0a6ad1..046a0cb82ed8 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c @@ -16,38 +16,70 @@ (MLXFW_FSM_STATE_WAIT_TIMEOUT_MS / MLXFW_FSM_STATE_WAIT_CYCLE_MS) #define MLXFW_FSM_MAX_COMPONENT_SIZE (10 * (1 << 20)) -static const char * const mlxfw_fsm_state_err_str[] = { - [MLXFW_FSM_STATE_ERR_ERROR] = - "general error", - [MLXFW_FSM_STATE_ERR_REJECTED_DIGEST_ERR] = - "component hash mismatch", - [MLXFW_FSM_STATE_ERR_REJECTED_NOT_APPLICABLE] = - "component not applicable", - [MLXFW_FSM_STATE_ERR_REJECTED_UNKNOWN_KEY] = - "unknown key", - [MLXFW_FSM_STATE_ERR_REJECTED_AUTH_FAILED] = - "authentication failed", - [MLXFW_FSM_STATE_ERR_REJECTED_UNSIGNED] = - "component was not signed", - [MLXFW_FSM_STATE_ERR_REJECTED_KEY_NOT_APPLICABLE] = - "key not applicable", - [MLXFW_FSM_STATE_ERR_REJECTED_BAD_FORMAT] = - "bad format", - [MLXFW_FSM_STATE_ERR_BLOCKED_PENDING_RESET] = - "pending reset", - [MLXFW_FSM_STATE_ERR_MAX] = - "unknown error" +static const int mlxfw_fsm_state_errno[] = { + [MLXFW_FSM_STATE_ERR_ERROR] = -EIO, + [MLXFW_FSM_STATE_ERR_REJECTED_DIGEST_ERR] = -EBADMSG, + [MLXFW_FSM_STATE_ERR_REJECTED_NOT_APPLICABLE] = -ENOENT, + [MLXFW_FSM_STATE_ERR_REJECTED_UNKNOWN_KEY] = -ENOKEY, + [MLXFW_FSM_STATE_ERR_REJECTED_AUTH_FAILED] = -EACCES, + [MLXFW_FSM_STATE_ERR_REJECTED_UNSIGNED] = -EKEYREVOKED, + [MLXFW_FSM_STATE_ERR_REJECTED_KEY_NOT_APPLICABLE] = -EKEYREJECTED, + [MLXFW_FSM_STATE_ERR_REJECTED_BAD_FORMAT] = -ENOEXEC, + [MLXFW_FSM_STATE_ERR_BLOCKED_PENDING_RESET] = -EBUSY, + [MLXFW_FSM_STATE_ERR_MAX] = -EINVAL }; -static void mlxfw_status_notify(struct mlxfw_dev *mlxfw_dev, - const char *msg, const char *comp_name, - u32 done_bytes, u32 total_bytes) +#define MLXFW_ERR_PRFX "Firmware flash failed: " +#define MLXFW_ERR_MSG(fwdev, extack, msg, err) do { \ + mlxfw_err(fwdev, "%s, err (%d)\n", MLXFW_ERR_PRFX msg, err); \ + NL_SET_ERR_MSG_MOD(extack, MLXFW_ERR_PRFX msg); \ +} while (0) + +static int mlxfw_fsm_state_err(struct mlxfw_dev *mlxfw_dev, + struct netlink_ext_ack *extack, + enum mlxfw_fsm_state_err err) { - if (!mlxfw_dev->ops->status_notify) - return; - mlxfw_dev->ops->status_notify(mlxfw_dev, msg, comp_name, - done_bytes, total_bytes); -} + enum mlxfw_fsm_state_err fsm_state_err; + + fsm_state_err = min_t(enum mlxfw_fsm_state_err, err, + MLXFW_FSM_STATE_ERR_MAX); + + switch (fsm_state_err) { + case MLXFW_FSM_STATE_ERR_ERROR: + MLXFW_ERR_MSG(mlxfw_dev, extack, "general error", err); + break; + case MLXFW_FSM_STATE_ERR_REJECTED_DIGEST_ERR: + MLXFW_ERR_MSG(mlxfw_dev, extack, "component hash mismatch", err); + break; + case MLXFW_FSM_STATE_ERR_REJECTED_NOT_APPLICABLE: + MLXFW_ERR_MSG(mlxfw_dev, extack, "component not applicable", err); + break; + case MLXFW_FSM_STATE_ERR_REJECTED_UNKNOWN_KEY: + MLXFW_ERR_MSG(mlxfw_dev, extack, "unknown key", err); + break; + case MLXFW_FSM_STATE_ERR_REJECTED_AUTH_FAILED: + MLXFW_ERR_MSG(mlxfw_dev, extack, "authentication failed", err); + break; + case MLXFW_FSM_STATE_ERR_REJECTED_UNSIGNED: + MLXFW_ERR_MSG(mlxfw_dev, extack, "component was not signed", err); + break; + case MLXFW_FSM_STATE_ERR_REJECTED_KEY_NOT_APPLICABLE: + MLXFW_ERR_MSG(mlxfw_dev, extack, "key not applicable", err); + break; + case MLXFW_FSM_STATE_ERR_REJECTED_BAD_FORMAT: + MLXFW_ERR_MSG(mlxfw_dev, extack, "bad format", err); + break; + case MLXFW_FSM_STATE_ERR_BLOCKED_PENDING_RESET: + MLXFW_ERR_MSG(mlxfw_dev, extack, "pending reset", err); + break; + case MLXFW_FSM_STATE_ERR_OK: /* fall through */ + case MLXFW_FSM_STATE_ERR_MAX: + MLXFW_ERR_MSG(mlxfw_dev, extack, "unknown error", err); + break; + }; + + return mlxfw_fsm_state_errno[fsm_state_err]; +}; static int mlxfw_fsm_state_wait(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, enum mlxfw_fsm_state fsm_state, @@ -62,21 +94,18 @@ static int mlxfw_fsm_state_wait(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, retry: err = mlxfw_dev->ops->fsm_query_state(mlxfw_dev, fwhandle, &curr_fsm_state, &fsm_state_err); - if (err) + if (err) { + MLXFW_ERR_MSG(mlxfw_dev, extack, "FSM state query failed", err); return err; - - if (fsm_state_err != MLXFW_FSM_STATE_ERR_OK) { - fsm_state_err = min_t(enum mlxfw_fsm_state_err, - fsm_state_err, MLXFW_FSM_STATE_ERR_MAX); - pr_err("Firmware flash failed: %s\n", - mlxfw_fsm_state_err_str[fsm_state_err]); - NL_SET_ERR_MSG_MOD(extack, "Firmware flash failed"); - return -EINVAL; } + + if (fsm_state_err != MLXFW_FSM_STATE_ERR_OK) + return mlxfw_fsm_state_err(mlxfw_dev, extack, fsm_state_err); + if (curr_fsm_state != fsm_state) { if (--times == 0) { - pr_err("Timeout reached on FSM state change"); - NL_SET_ERR_MSG_MOD(extack, "Timeout reached on FSM state change"); + MLXFW_ERR_MSG(mlxfw_dev, extack, + "Timeout reached on FSM state change", -ETIMEDOUT); return -ETIMEDOUT; } msleep(MLXFW_FSM_STATE_WAIT_CYCLE_MS); @@ -85,6 +114,92 @@ retry: return 0; } +static int +mlxfw_fsm_reactivate_err(struct mlxfw_dev *mlxfw_dev, + struct netlink_ext_ack *extack, u8 err) +{ + enum mlxfw_fsm_reactivate_status status; + +#define MXFW_REACT_PRFX "Reactivate FSM: " +#define MLXFW_REACT_ERR(msg, err) \ + MLXFW_ERR_MSG(mlxfw_dev, extack, MXFW_REACT_PRFX msg, err) + + status = min_t(enum mlxfw_fsm_reactivate_status, err, + MLXFW_FSM_REACTIVATE_STATUS_MAX); + + switch (status) { + case MLXFW_FSM_REACTIVATE_STATUS_BUSY: + MLXFW_REACT_ERR("busy", err); + break; + case MLXFW_FSM_REACTIVATE_STATUS_PROHIBITED_FW_VER_ERR: + MLXFW_REACT_ERR("prohibited fw ver", err); + break; + case MLXFW_FSM_REACTIVATE_STATUS_FIRST_PAGE_COPY_FAILED: + MLXFW_REACT_ERR("first page copy failed", err); + break; + case MLXFW_FSM_REACTIVATE_STATUS_FIRST_PAGE_ERASE_FAILED: + MLXFW_REACT_ERR("first page erase failed", err); + break; + case MLXFW_FSM_REACTIVATE_STATUS_FIRST_PAGE_RESTORE_FAILED: + MLXFW_REACT_ERR("first page restore failed", err); + break; + case MLXFW_FSM_REACTIVATE_STATUS_CANDIDATE_FW_DEACTIVATION_FAILED: + MLXFW_REACT_ERR("candidate fw deactivation failed", err); + break; + case MLXFW_FSM_REACTIVATE_STATUS_ERR_DEVICE_RESET_REQUIRED: + MLXFW_REACT_ERR("device reset required", err); + break; + case MLXFW_FSM_REACTIVATE_STATUS_ERR_FW_PROGRAMMING_NEEDED: + MLXFW_REACT_ERR("fw programming needed", err); + break; + case MLXFW_FSM_REACTIVATE_STATUS_FW_ALREADY_ACTIVATED: + MLXFW_REACT_ERR("fw already activated", err); + break; + case MLXFW_FSM_REACTIVATE_STATUS_OK: /* fall through */ + case MLXFW_FSM_REACTIVATE_STATUS_MAX: + MLXFW_REACT_ERR("unexpected error", err); + break; + }; + return -EREMOTEIO; +}; + +static int mlxfw_fsm_reactivate(struct mlxfw_dev *mlxfw_dev, + struct netlink_ext_ack *extack, + bool *supported) +{ + u8 status; + int err; + + if (!mlxfw_dev->ops->fsm_reactivate) + return 0; + + err = mlxfw_dev->ops->fsm_reactivate(mlxfw_dev, &status); + if (err == -EOPNOTSUPP) { + *supported = false; + return 0; + } + + if (err) { + MLXFW_ERR_MSG(mlxfw_dev, extack, + "Could not reactivate firmware flash", err); + return err; + } + + if (status == MLXFW_FSM_REACTIVATE_STATUS_OK || + status == MLXFW_FSM_REACTIVATE_STATUS_FW_ALREADY_ACTIVATED) + return 0; + + return mlxfw_fsm_reactivate_err(mlxfw_dev, extack, status); +} + +static void mlxfw_status_notify(struct mlxfw_dev *mlxfw_dev, + const char *msg, const char *comp_name, + u32 done_bytes, u32 total_bytes) +{ + devlink_flash_update_status_notify(mlxfw_dev->devlink, msg, comp_name, + done_bytes, total_bytes); +} + #define MLXFW_ALIGN_DOWN(x, align_bits) ((x) & ~((1 << (align_bits)) - 1)) #define MLXFW_ALIGN_UP(x, align_bits) \ MLXFW_ALIGN_DOWN((x) + ((1 << (align_bits)) - 1), (align_bits)) @@ -92,6 +207,7 @@ retry: static int mlxfw_flash_component(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, struct mlxfw_mfa2_component *comp, + bool reactivate_supp, struct netlink_ext_ack *extack) { u16 comp_max_write_size; @@ -108,34 +224,43 @@ static int mlxfw_flash_component(struct mlxfw_dev *mlxfw_dev, err = mlxfw_dev->ops->component_query(mlxfw_dev, comp->index, &comp_max_size, &comp_align_bits, &comp_max_write_size); - if (err) + if (err) { + MLXFW_ERR_MSG(mlxfw_dev, extack, "FSM component query failed", err); return err; + } comp_max_size = min_t(u32, comp_max_size, MLXFW_FSM_MAX_COMPONENT_SIZE); if (comp->data_size > comp_max_size) { - pr_err("Component %d is of size %d which is bigger than limit %d\n", - comp->index, comp->data_size, comp_max_size); - NL_SET_ERR_MSG_MOD(extack, "Component is bigger than limit"); + MLXFW_ERR_MSG(mlxfw_dev, extack, + "Component size is bigger than limit", -EINVAL); return -EINVAL; } comp_max_write_size = MLXFW_ALIGN_DOWN(comp_max_write_size, comp_align_bits); - pr_debug("Component update\n"); + mlxfw_dbg(mlxfw_dev, "Component update\n"); mlxfw_status_notify(mlxfw_dev, "Updating component", comp_name, 0, 0); err = mlxfw_dev->ops->fsm_component_update(mlxfw_dev, fwhandle, comp->index, comp->data_size); - if (err) + if (err) { + if (!reactivate_supp) + MLXFW_ERR_MSG(mlxfw_dev, extack, + "FSM component update failed, FW reactivate is not supported", + err); + else + MLXFW_ERR_MSG(mlxfw_dev, extack, + "FSM component update failed", err); return err; + } err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle, MLXFW_FSM_STATE_DOWNLOAD, extack); if (err) goto err_out; - pr_debug("Component download\n"); + mlxfw_dbg(mlxfw_dev, "Component download\n"); mlxfw_status_notify(mlxfw_dev, "Downloading component", comp_name, 0, comp->data_size); for (offset = 0; @@ -147,19 +272,25 @@ static int mlxfw_flash_component(struct mlxfw_dev *mlxfw_dev, err = mlxfw_dev->ops->fsm_block_download(mlxfw_dev, fwhandle, block_ptr, block_size, offset); - if (err) + if (err) { + MLXFW_ERR_MSG(mlxfw_dev, extack, + "Component download failed", err); goto err_out; + } mlxfw_status_notify(mlxfw_dev, "Downloading component", comp_name, offset + block_size, comp->data_size); } - pr_debug("Component verify\n"); + mlxfw_dbg(mlxfw_dev, "Component verify\n"); mlxfw_status_notify(mlxfw_dev, "Verifying component", comp_name, 0, 0); err = mlxfw_dev->ops->fsm_component_verify(mlxfw_dev, fwhandle, comp->index); - if (err) + if (err) { + MLXFW_ERR_MSG(mlxfw_dev, extack, + "FSM component verify failed", err); goto err_out; + } err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle, MLXFW_FSM_STATE_LOCKED, extack); @@ -174,6 +305,7 @@ err_out: static int mlxfw_flash_components(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, struct mlxfw_mfa2_file *mfa2_file, + bool reactivate_supp, struct netlink_ext_ack *extack) { u32 component_count; @@ -184,8 +316,8 @@ static int mlxfw_flash_components(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, mlxfw_dev->psid_size, &component_count); if (err) { - pr_err("Could not find device PSID in MFA2 file\n"); - NL_SET_ERR_MSG_MOD(extack, "Could not find device PSID in MFA2 file"); + MLXFW_ERR_MSG(mlxfw_dev, extack, + "Could not find device PSID in MFA2 file", err); return err; } @@ -194,11 +326,17 @@ static int mlxfw_flash_components(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, comp = mlxfw_mfa2_file_component_get(mfa2_file, mlxfw_dev->psid, mlxfw_dev->psid_size, i); - if (IS_ERR(comp)) - return PTR_ERR(comp); + if (IS_ERR(comp)) { + err = PTR_ERR(comp); + MLXFW_ERR_MSG(mlxfw_dev, extack, + "Failed to get MFA2 component", err); + return err; + } - pr_info("Flashing component type %d\n", comp->index); - err = mlxfw_flash_component(mlxfw_dev, fwhandle, comp, extack); + mlxfw_info(mlxfw_dev, "Flashing component type %d\n", + comp->index); + err = mlxfw_flash_component(mlxfw_dev, fwhandle, comp, + reactivate_supp, extack); mlxfw_mfa2_file_component_put(comp); if (err) return err; @@ -211,26 +349,32 @@ int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev, struct netlink_ext_ack *extack) { struct mlxfw_mfa2_file *mfa2_file; + bool reactivate_supp = true; u32 fwhandle; int err; if (!mlxfw_mfa2_check(firmware)) { - pr_err("Firmware file is not MFA2\n"); - NL_SET_ERR_MSG_MOD(extack, "Firmware file is not MFA2"); + MLXFW_ERR_MSG(mlxfw_dev, extack, + "Firmware file is not MFA2", -EINVAL); return -EINVAL; } mfa2_file = mlxfw_mfa2_file_init(firmware); - if (IS_ERR(mfa2_file)) - return PTR_ERR(mfa2_file); + if (IS_ERR(mfa2_file)) { + err = PTR_ERR(mfa2_file); + MLXFW_ERR_MSG(mlxfw_dev, extack, + "Failed to initialize MFA2 firmware file", err); + return err; + } - pr_info("Initialize firmware flash process\n"); + mlxfw_info(mlxfw_dev, "Initialize firmware flash process\n"); + devlink_flash_update_begin_notify(mlxfw_dev->devlink); mlxfw_status_notify(mlxfw_dev, "Initializing firmware flash process", NULL, 0, 0); err = mlxfw_dev->ops->fsm_lock(mlxfw_dev, &fwhandle); if (err) { - pr_err("Could not lock the firmware FSM\n"); - NL_SET_ERR_MSG_MOD(extack, "Could not lock the firmware FSM"); + MLXFW_ERR_MSG(mlxfw_dev, extack, + "Could not lock the firmware FSM", err); goto err_fsm_lock; } @@ -239,16 +383,26 @@ int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev, if (err) goto err_state_wait_idle_to_locked; - err = mlxfw_flash_components(mlxfw_dev, fwhandle, mfa2_file, extack); + err = mlxfw_fsm_reactivate(mlxfw_dev, extack, &reactivate_supp); + if (err) + goto err_fsm_reactivate; + + err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle, + MLXFW_FSM_STATE_LOCKED, extack); + if (err) + goto err_state_wait_reactivate_to_locked; + + err = mlxfw_flash_components(mlxfw_dev, fwhandle, mfa2_file, + reactivate_supp, extack); if (err) goto err_flash_components; - pr_debug("Activate image\n"); + mlxfw_dbg(mlxfw_dev, "Activate image\n"); mlxfw_status_notify(mlxfw_dev, "Activating image", NULL, 0, 0); err = mlxfw_dev->ops->fsm_activate(mlxfw_dev, fwhandle); if (err) { - pr_err("Could not activate the downloaded image\n"); - NL_SET_ERR_MSG_MOD(extack, "Could not activate the downloaded image"); + MLXFW_ERR_MSG(mlxfw_dev, extack, + "Could not activate the downloaded image", err); goto err_fsm_activate; } @@ -257,21 +411,25 @@ int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev, if (err) goto err_state_wait_activate_to_locked; - pr_debug("Handle release\n"); + mlxfw_dbg(mlxfw_dev, "Handle release\n"); mlxfw_dev->ops->fsm_release(mlxfw_dev, fwhandle); - pr_info("Firmware flash done.\n"); + mlxfw_info(mlxfw_dev, "Firmware flash done\n"); mlxfw_status_notify(mlxfw_dev, "Firmware flash done", NULL, 0, 0); mlxfw_mfa2_file_fini(mfa2_file); + devlink_flash_update_end_notify(mlxfw_dev->devlink); return 0; err_state_wait_activate_to_locked: err_fsm_activate: err_flash_components: +err_state_wait_reactivate_to_locked: +err_fsm_reactivate: err_state_wait_idle_to_locked: mlxfw_dev->ops->fsm_release(mlxfw_dev, fwhandle); err_fsm_lock: mlxfw_mfa2_file_fini(mfa2_file); + devlink_flash_update_end_notify(mlxfw_dev->devlink); return err; } EXPORT_SYMBOL(mlxfw_firmware_flash); diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c index 79057af4fe99..5d9ddf36fb4e 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c @@ -496,7 +496,7 @@ mlxfw_mfa2_file_component_tlv_get(const struct mlxfw_mfa2_file *mfa2_file, struct mlxfw_mfa2_comp_data { struct mlxfw_mfa2_component comp; - u8 buff[0]; + u8 buff[]; }; static const struct mlxfw_mfa2_tlv_component_descriptor * diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv.h index 33c971190bba..2014a5de5a01 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv.h +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv.h @@ -11,7 +11,7 @@ struct mlxfw_mfa2_tlv { u8 version; u8 type; __be16 len; - u8 data[0]; + u8 data[]; } __packed; static inline const struct mlxfw_mfa2_tlv * diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index e9f791c43f20..e9ccd333f61d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -82,7 +82,7 @@ struct mlxsw_core { struct mlxsw_core_port *ports; unsigned int max_ports; bool fw_flash_in_progress; - unsigned long driver_priv[0]; + unsigned long driver_priv[]; /* driver_priv has to be always the last item */ }; @@ -142,6 +142,7 @@ struct mlxsw_rx_listener_item { struct list_head list; struct mlxsw_rx_listener rxl; void *priv; + bool enabled; }; struct mlxsw_event_listener_item { @@ -1197,6 +1198,72 @@ mlxsw_devlink_trap_group_init(struct devlink *devlink, return mlxsw_driver->trap_group_init(mlxsw_core, group); } +static int +mlxsw_devlink_trap_group_set(struct devlink *devlink, + const struct devlink_trap_group *group, + const struct devlink_trap_policer *policer) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + + if (!mlxsw_driver->trap_group_set) + return -EOPNOTSUPP; + return mlxsw_driver->trap_group_set(mlxsw_core, group, policer); +} + +static int +mlxsw_devlink_trap_policer_init(struct devlink *devlink, + const struct devlink_trap_policer *policer) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + + if (!mlxsw_driver->trap_policer_init) + return -EOPNOTSUPP; + return mlxsw_driver->trap_policer_init(mlxsw_core, policer); +} + +static void +mlxsw_devlink_trap_policer_fini(struct devlink *devlink, + const struct devlink_trap_policer *policer) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + + if (!mlxsw_driver->trap_policer_fini) + return; + mlxsw_driver->trap_policer_fini(mlxsw_core, policer); +} + +static int +mlxsw_devlink_trap_policer_set(struct devlink *devlink, + const struct devlink_trap_policer *policer, + u64 rate, u64 burst, + struct netlink_ext_ack *extack) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + + if (!mlxsw_driver->trap_policer_set) + return -EOPNOTSUPP; + return mlxsw_driver->trap_policer_set(mlxsw_core, policer, rate, burst, + extack); +} + +static int +mlxsw_devlink_trap_policer_counter_get(struct devlink *devlink, + const struct devlink_trap_policer *policer, + u64 *p_drops) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + + if (!mlxsw_driver->trap_policer_counter_get) + return -EOPNOTSUPP; + return mlxsw_driver->trap_policer_counter_get(mlxsw_core, policer, + p_drops); +} + static const struct devlink_ops mlxsw_devlink_ops = { .reload_down = mlxsw_devlink_core_bus_device_reload_down, .reload_up = mlxsw_devlink_core_bus_device_reload_up, @@ -1219,6 +1286,11 @@ static const struct devlink_ops mlxsw_devlink_ops = { .trap_fini = mlxsw_devlink_trap_fini, .trap_action_set = mlxsw_devlink_trap_action_set, .trap_group_init = mlxsw_devlink_trap_group_init, + .trap_group_set = mlxsw_devlink_trap_group_set, + .trap_policer_init = mlxsw_devlink_trap_policer_init, + .trap_policer_fini = mlxsw_devlink_trap_policer_fini, + .trap_policer_set = mlxsw_devlink_trap_policer_set, + .trap_policer_counter_get = mlxsw_devlink_trap_policer_counter_get, }; static int @@ -1457,14 +1529,12 @@ static bool __is_rx_listener_equal(const struct mlxsw_rx_listener *rxl_a, static struct mlxsw_rx_listener_item * __find_rx_listener_item(struct mlxsw_core *mlxsw_core, - const struct mlxsw_rx_listener *rxl, - void *priv) + const struct mlxsw_rx_listener *rxl) { struct mlxsw_rx_listener_item *rxl_item; list_for_each_entry(rxl_item, &mlxsw_core->rx_listener_list, list) { - if (__is_rx_listener_equal(&rxl_item->rxl, rxl) && - rxl_item->priv == priv) + if (__is_rx_listener_equal(&rxl_item->rxl, rxl)) return rxl_item; } return NULL; @@ -1472,11 +1542,11 @@ __find_rx_listener_item(struct mlxsw_core *mlxsw_core, int mlxsw_core_rx_listener_register(struct mlxsw_core *mlxsw_core, const struct mlxsw_rx_listener *rxl, - void *priv) + void *priv, bool enabled) { struct mlxsw_rx_listener_item *rxl_item; - rxl_item = __find_rx_listener_item(mlxsw_core, rxl, priv); + rxl_item = __find_rx_listener_item(mlxsw_core, rxl); if (rxl_item) return -EEXIST; rxl_item = kmalloc(sizeof(*rxl_item), GFP_KERNEL); @@ -1484,6 +1554,7 @@ int mlxsw_core_rx_listener_register(struct mlxsw_core *mlxsw_core, return -ENOMEM; rxl_item->rxl = *rxl; rxl_item->priv = priv; + rxl_item->enabled = enabled; list_add_rcu(&rxl_item->list, &mlxsw_core->rx_listener_list); return 0; @@ -1491,12 +1562,11 @@ int mlxsw_core_rx_listener_register(struct mlxsw_core *mlxsw_core, EXPORT_SYMBOL(mlxsw_core_rx_listener_register); void mlxsw_core_rx_listener_unregister(struct mlxsw_core *mlxsw_core, - const struct mlxsw_rx_listener *rxl, - void *priv) + const struct mlxsw_rx_listener *rxl) { struct mlxsw_rx_listener_item *rxl_item; - rxl_item = __find_rx_listener_item(mlxsw_core, rxl, priv); + rxl_item = __find_rx_listener_item(mlxsw_core, rxl); if (!rxl_item) return; list_del_rcu(&rxl_item->list); @@ -1505,6 +1575,19 @@ void mlxsw_core_rx_listener_unregister(struct mlxsw_core *mlxsw_core, } EXPORT_SYMBOL(mlxsw_core_rx_listener_unregister); +static void +mlxsw_core_rx_listener_state_set(struct mlxsw_core *mlxsw_core, + const struct mlxsw_rx_listener *rxl, + bool enabled) +{ + struct mlxsw_rx_listener_item *rxl_item; + + rxl_item = __find_rx_listener_item(mlxsw_core, rxl); + if (WARN_ON(!rxl_item)) + return; + rxl_item->enabled = enabled; +} + static void mlxsw_core_event_listener_func(struct sk_buff *skb, u8 local_port, void *priv) { @@ -1534,14 +1617,12 @@ static bool __is_event_listener_equal(const struct mlxsw_event_listener *el_a, static struct mlxsw_event_listener_item * __find_event_listener_item(struct mlxsw_core *mlxsw_core, - const struct mlxsw_event_listener *el, - void *priv) + const struct mlxsw_event_listener *el) { struct mlxsw_event_listener_item *el_item; list_for_each_entry(el_item, &mlxsw_core->event_listener_list, list) { - if (__is_event_listener_equal(&el_item->el, el) && - el_item->priv == priv) + if (__is_event_listener_equal(&el_item->el, el)) return el_item; } return NULL; @@ -1559,7 +1640,7 @@ int mlxsw_core_event_listener_register(struct mlxsw_core *mlxsw_core, .trap_id = el->trap_id, }; - el_item = __find_event_listener_item(mlxsw_core, el, priv); + el_item = __find_event_listener_item(mlxsw_core, el); if (el_item) return -EEXIST; el_item = kmalloc(sizeof(*el_item), GFP_KERNEL); @@ -1568,7 +1649,7 @@ int mlxsw_core_event_listener_register(struct mlxsw_core *mlxsw_core, el_item->el = *el; el_item->priv = priv; - err = mlxsw_core_rx_listener_register(mlxsw_core, &rxl, el_item); + err = mlxsw_core_rx_listener_register(mlxsw_core, &rxl, el_item, true); if (err) goto err_rx_listener_register; @@ -1586,8 +1667,7 @@ err_rx_listener_register: EXPORT_SYMBOL(mlxsw_core_event_listener_register); void mlxsw_core_event_listener_unregister(struct mlxsw_core *mlxsw_core, - const struct mlxsw_event_listener *el, - void *priv) + const struct mlxsw_event_listener *el) { struct mlxsw_event_listener_item *el_item; const struct mlxsw_rx_listener rxl = { @@ -1596,10 +1676,10 @@ void mlxsw_core_event_listener_unregister(struct mlxsw_core *mlxsw_core, .trap_id = el->trap_id, }; - el_item = __find_event_listener_item(mlxsw_core, el, priv); + el_item = __find_event_listener_item(mlxsw_core, el); if (!el_item) return; - mlxsw_core_rx_listener_unregister(mlxsw_core, &rxl, el_item); + mlxsw_core_rx_listener_unregister(mlxsw_core, &rxl); list_del(&el_item->list); kfree(el_item); } @@ -1607,16 +1687,18 @@ EXPORT_SYMBOL(mlxsw_core_event_listener_unregister); static int mlxsw_core_listener_register(struct mlxsw_core *mlxsw_core, const struct mlxsw_listener *listener, - void *priv) + void *priv, bool enabled) { - if (listener->is_event) + if (listener->is_event) { + WARN_ON(!enabled); return mlxsw_core_event_listener_register(mlxsw_core, - &listener->u.event_listener, + &listener->event_listener, priv); - else + } else { return mlxsw_core_rx_listener_register(mlxsw_core, - &listener->u.rx_listener, - priv); + &listener->rx_listener, + priv, enabled); + } } static void mlxsw_core_listener_unregister(struct mlxsw_core *mlxsw_core, @@ -1625,26 +1707,31 @@ static void mlxsw_core_listener_unregister(struct mlxsw_core *mlxsw_core, { if (listener->is_event) mlxsw_core_event_listener_unregister(mlxsw_core, - &listener->u.event_listener, - priv); + &listener->event_listener); else mlxsw_core_rx_listener_unregister(mlxsw_core, - &listener->u.rx_listener, - priv); + &listener->rx_listener); } int mlxsw_core_trap_register(struct mlxsw_core *mlxsw_core, const struct mlxsw_listener *listener, void *priv) { + enum mlxsw_reg_htgt_trap_group trap_group; + enum mlxsw_reg_hpkt_action action; char hpkt_pl[MLXSW_REG_HPKT_LEN]; int err; - err = mlxsw_core_listener_register(mlxsw_core, listener, priv); + err = mlxsw_core_listener_register(mlxsw_core, listener, priv, + listener->enabled_on_register); if (err) return err; - mlxsw_reg_hpkt_pack(hpkt_pl, listener->action, listener->trap_id, - listener->trap_group, listener->is_ctrl); + action = listener->enabled_on_register ? listener->en_action : + listener->dis_action; + trap_group = listener->enabled_on_register ? listener->en_trap_group : + listener->dis_trap_group; + mlxsw_reg_hpkt_pack(hpkt_pl, action, listener->trap_id, + trap_group, listener->is_ctrl); err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(hpkt), hpkt_pl); if (err) goto err_trap_set; @@ -1664,8 +1751,8 @@ void mlxsw_core_trap_unregister(struct mlxsw_core *mlxsw_core, char hpkt_pl[MLXSW_REG_HPKT_LEN]; if (!listener->is_event) { - mlxsw_reg_hpkt_pack(hpkt_pl, listener->unreg_action, - listener->trap_id, listener->trap_group, + mlxsw_reg_hpkt_pack(hpkt_pl, listener->dis_action, + listener->trap_id, listener->dis_trap_group, listener->is_ctrl); mlxsw_reg_write(mlxsw_core, MLXSW_REG(hpkt), hpkt_pl); } @@ -1674,17 +1761,33 @@ void mlxsw_core_trap_unregister(struct mlxsw_core *mlxsw_core, } EXPORT_SYMBOL(mlxsw_core_trap_unregister); -int mlxsw_core_trap_action_set(struct mlxsw_core *mlxsw_core, - const struct mlxsw_listener *listener, - enum mlxsw_reg_hpkt_action action) +int mlxsw_core_trap_state_set(struct mlxsw_core *mlxsw_core, + const struct mlxsw_listener *listener, + bool enabled) { + enum mlxsw_reg_htgt_trap_group trap_group; + enum mlxsw_reg_hpkt_action action; char hpkt_pl[MLXSW_REG_HPKT_LEN]; + int err; + + /* Not supported for event listener */ + if (WARN_ON(listener->is_event)) + return -EINVAL; + action = enabled ? listener->en_action : listener->dis_action; + trap_group = enabled ? listener->en_trap_group : + listener->dis_trap_group; mlxsw_reg_hpkt_pack(hpkt_pl, action, listener->trap_id, - listener->trap_group, listener->is_ctrl); - return mlxsw_reg_write(mlxsw_core, MLXSW_REG(hpkt), hpkt_pl); + trap_group, listener->is_ctrl); + err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(hpkt), hpkt_pl); + if (err) + return err; + + mlxsw_core_rx_listener_state_set(mlxsw_core, &listener->rx_listener, + enabled); + return 0; } -EXPORT_SYMBOL(mlxsw_core_trap_action_set); +EXPORT_SYMBOL(mlxsw_core_trap_state_set); static u64 mlxsw_core_tid_get(struct mlxsw_core *mlxsw_core) { @@ -1942,7 +2045,8 @@ void mlxsw_core_skb_receive(struct mlxsw_core *mlxsw_core, struct sk_buff *skb, if ((rxl->local_port == MLXSW_PORT_DONT_CARE || rxl->local_port == local_port) && rxl->trap_id == rx_info->trap_id) { - found = true; + if (rxl_item->enabled) + found = true; break; } } @@ -2168,13 +2272,22 @@ int mlxsw_core_module_max_width(struct mlxsw_core *mlxsw_core, u8 module) /* Here we need to get the module width according to the module type. */ switch (module_type) { + case MLXSW_REG_PMTM_MODULE_TYPE_C2C8X: /* fall through */ + case MLXSW_REG_PMTM_MODULE_TYPE_QSFP_DD: /* fall through */ + case MLXSW_REG_PMTM_MODULE_TYPE_OSFP: + return 8; + case MLXSW_REG_PMTM_MODULE_TYPE_C2C4X: /* fall through */ case MLXSW_REG_PMTM_MODULE_TYPE_BP_4X: /* fall through */ - case MLXSW_REG_PMTM_MODULE_TYPE_BP_QSFP: + case MLXSW_REG_PMTM_MODULE_TYPE_QSFP: return 4; - case MLXSW_REG_PMTM_MODULE_TYPE_BP_2X: + case MLXSW_REG_PMTM_MODULE_TYPE_C2C2X: /* fall through */ + case MLXSW_REG_PMTM_MODULE_TYPE_BP_2X: /* fall through */ + case MLXSW_REG_PMTM_MODULE_TYPE_SFP_DD: /* fall through */ + case MLXSW_REG_PMTM_MODULE_TYPE_DSFP: return 2; - case MLXSW_REG_PMTM_MODULE_TYPE_BP_SFP: /* fall through */ - case MLXSW_REG_PMTM_MODULE_TYPE_BP_1X: + case MLXSW_REG_PMTM_MODULE_TYPE_C2C1X: /* fall through */ + case MLXSW_REG_PMTM_MODULE_TYPE_BP_1X: /* fall through */ + case MLXSW_REG_PMTM_MODULE_TYPE_SFP: return 1; default: return -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 543476a2e503..22b0dfa7cfae 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -62,7 +62,6 @@ struct mlxsw_rx_listener { void (*func)(struct sk_buff *skb, u8 local_port, void *priv); u8 local_port; u16 trap_id; - enum mlxsw_reg_hpkt_action action; }; struct mlxsw_event_listener { @@ -76,58 +75,71 @@ struct mlxsw_listener { union { struct mlxsw_rx_listener rx_listener; struct mlxsw_event_listener event_listener; - } u; - enum mlxsw_reg_hpkt_action action; - enum mlxsw_reg_hpkt_action unreg_action; - u8 trap_group; - bool is_ctrl; /* should go via control buffer or not */ - bool is_event; + }; + enum mlxsw_reg_hpkt_action en_action; /* Action when enabled */ + enum mlxsw_reg_hpkt_action dis_action; /* Action when disabled */ + u8 en_trap_group; /* Trap group when enabled */ + u8 dis_trap_group; /* Trap group when disabled */ + u8 is_ctrl:1, /* should go via control buffer or not */ + is_event:1, + enabled_on_register:1; /* Trap should be enabled when listener + * is registered. + */ }; -#define MLXSW_RXL(_func, _trap_id, _action, _is_ctrl, _trap_group, \ - _unreg_action) \ - { \ - .trap_id = MLXSW_TRAP_ID_##_trap_id, \ - .u.rx_listener = \ - { \ - .func = _func, \ - .local_port = MLXSW_PORT_DONT_CARE, \ - .trap_id = MLXSW_TRAP_ID_##_trap_id, \ - }, \ - .action = MLXSW_REG_HPKT_ACTION_##_action, \ - .unreg_action = MLXSW_REG_HPKT_ACTION_##_unreg_action, \ - .trap_group = MLXSW_REG_HTGT_TRAP_GROUP_##_trap_group, \ - .is_ctrl = _is_ctrl, \ - .is_event = false, \ +#define __MLXSW_RXL(_func, _trap_id, _en_action, _is_ctrl, _en_trap_group, \ + _dis_action, _enabled_on_register, _dis_trap_group) \ + { \ + .trap_id = MLXSW_TRAP_ID_##_trap_id, \ + .rx_listener = \ + { \ + .func = _func, \ + .local_port = MLXSW_PORT_DONT_CARE, \ + .trap_id = MLXSW_TRAP_ID_##_trap_id, \ + }, \ + .en_action = MLXSW_REG_HPKT_ACTION_##_en_action, \ + .dis_action = MLXSW_REG_HPKT_ACTION_##_dis_action, \ + .en_trap_group = MLXSW_REG_HTGT_TRAP_GROUP_##_en_trap_group, \ + .dis_trap_group = MLXSW_REG_HTGT_TRAP_GROUP_##_dis_trap_group, \ + .is_ctrl = _is_ctrl, \ + .enabled_on_register = _enabled_on_register, \ } -#define MLXSW_EVENTL(_func, _trap_id, _trap_group) \ - { \ - .trap_id = MLXSW_TRAP_ID_##_trap_id, \ - .u.event_listener = \ - { \ - .func = _func, \ - .trap_id = MLXSW_TRAP_ID_##_trap_id, \ - }, \ - .action = MLXSW_REG_HPKT_ACTION_TRAP_TO_CPU, \ - .trap_group = MLXSW_REG_HTGT_TRAP_GROUP_##_trap_group, \ - .is_ctrl = false, \ - .is_event = true, \ +#define MLXSW_RXL(_func, _trap_id, _en_action, _is_ctrl, _trap_group, \ + _dis_action) \ + __MLXSW_RXL(_func, _trap_id, _en_action, _is_ctrl, _trap_group, \ + _dis_action, true, _trap_group) + +#define MLXSW_RXL_DIS(_func, _trap_id, _en_action, _is_ctrl, _en_trap_group, \ + _dis_action, _dis_trap_group) \ + __MLXSW_RXL(_func, _trap_id, _en_action, _is_ctrl, _en_trap_group, \ + _dis_action, false, _dis_trap_group) + +#define MLXSW_EVENTL(_func, _trap_id, _trap_group) \ + { \ + .trap_id = MLXSW_TRAP_ID_##_trap_id, \ + .event_listener = \ + { \ + .func = _func, \ + .trap_id = MLXSW_TRAP_ID_##_trap_id, \ + }, \ + .en_action = MLXSW_REG_HPKT_ACTION_TRAP_TO_CPU, \ + .en_trap_group = MLXSW_REG_HTGT_TRAP_GROUP_##_trap_group, \ + .is_event = true, \ + .enabled_on_register = true, \ } int mlxsw_core_rx_listener_register(struct mlxsw_core *mlxsw_core, const struct mlxsw_rx_listener *rxl, - void *priv); + void *priv, bool enabled); void mlxsw_core_rx_listener_unregister(struct mlxsw_core *mlxsw_core, - const struct mlxsw_rx_listener *rxl, - void *priv); + const struct mlxsw_rx_listener *rxl); int mlxsw_core_event_listener_register(struct mlxsw_core *mlxsw_core, const struct mlxsw_event_listener *el, void *priv); void mlxsw_core_event_listener_unregister(struct mlxsw_core *mlxsw_core, - const struct mlxsw_event_listener *el, - void *priv); + const struct mlxsw_event_listener *el); int mlxsw_core_trap_register(struct mlxsw_core *mlxsw_core, const struct mlxsw_listener *listener, @@ -135,9 +147,9 @@ int mlxsw_core_trap_register(struct mlxsw_core *mlxsw_core, void mlxsw_core_trap_unregister(struct mlxsw_core *mlxsw_core, const struct mlxsw_listener *listener, void *priv); -int mlxsw_core_trap_action_set(struct mlxsw_core *mlxsw_core, - const struct mlxsw_listener *listener, - enum mlxsw_reg_hpkt_action action); +int mlxsw_core_trap_state_set(struct mlxsw_core *mlxsw_core, + const struct mlxsw_listener *listener, + bool enabled); typedef void mlxsw_reg_trans_cb_t(struct mlxsw_core *mlxsw_core, char *payload, size_t payload_len, unsigned long cb_priv); @@ -315,6 +327,20 @@ struct mlxsw_driver { enum devlink_trap_action action); int (*trap_group_init)(struct mlxsw_core *mlxsw_core, const struct devlink_trap_group *group); + int (*trap_group_set)(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_group *group, + const struct devlink_trap_policer *policer); + int (*trap_policer_init)(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_policer *policer); + void (*trap_policer_fini)(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_policer *policer); + int (*trap_policer_set)(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_policer *policer, + u64 rate, u64 burst, + struct netlink_ext_ack *extack); + int (*trap_policer_counter_get)(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_policer *policer, + u64 *p_drops); void (*txhdr_construct)(struct sk_buff *skb, const struct mlxsw_tx_info *tx_info); int (*resources_register)(struct mlxsw_core *mlxsw_core); @@ -461,7 +487,10 @@ enum mlxsw_devlink_param_id { }; struct mlxsw_skb_cb { - struct mlxsw_tx_info tx_info; + union { + struct mlxsw_tx_info tx_info; + u32 cookie_index; /* Only used during receive */ + }; }; static inline struct mlxsw_skb_cb *mlxsw_skb_cb(struct sk_buff *skb) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c index c51b2adfc1e1..70a104e728f6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c @@ -7,6 +7,9 @@ #include <linux/errno.h> #include <linux/rhashtable.h> #include <linux/list.h> +#include <linux/idr.h> +#include <linux/refcount.h> +#include <net/flow_offload.h> #include "item.h" #include "trap.h" @@ -63,6 +66,8 @@ struct mlxsw_afa { void *ops_priv; struct rhashtable set_ht; struct rhashtable fwd_entry_ht; + struct rhashtable cookie_ht; + struct idr cookie_idr; }; #define MLXSW_AFA_SET_LEN 0xA8 @@ -121,6 +126,55 @@ static const struct rhashtable_params mlxsw_afa_fwd_entry_ht_params = { .automatic_shrinking = true, }; +struct mlxsw_afa_cookie { + struct rhash_head ht_node; + refcount_t ref_count; + struct rcu_head rcu; + u32 cookie_index; + struct flow_action_cookie fa_cookie; +}; + +static u32 mlxsw_afa_cookie_hash(const struct flow_action_cookie *fa_cookie, + u32 seed) +{ + return jhash2((u32 *) fa_cookie->cookie, + fa_cookie->cookie_len / sizeof(u32), seed); +} + +static u32 mlxsw_afa_cookie_key_hashfn(const void *data, u32 len, u32 seed) +{ + const struct flow_action_cookie *fa_cookie = data; + + return mlxsw_afa_cookie_hash(fa_cookie, seed); +} + +static u32 mlxsw_afa_cookie_obj_hashfn(const void *data, u32 len, u32 seed) +{ + const struct mlxsw_afa_cookie *cookie = data; + + return mlxsw_afa_cookie_hash(&cookie->fa_cookie, seed); +} + +static int mlxsw_afa_cookie_obj_cmpfn(struct rhashtable_compare_arg *arg, + const void *obj) +{ + const struct flow_action_cookie *fa_cookie = arg->key; + const struct mlxsw_afa_cookie *cookie = obj; + + if (cookie->fa_cookie.cookie_len == fa_cookie->cookie_len) + return memcmp(cookie->fa_cookie.cookie, fa_cookie->cookie, + fa_cookie->cookie_len); + return 1; +} + +static const struct rhashtable_params mlxsw_afa_cookie_ht_params = { + .head_offset = offsetof(struct mlxsw_afa_cookie, ht_node), + .hashfn = mlxsw_afa_cookie_key_hashfn, + .obj_hashfn = mlxsw_afa_cookie_obj_hashfn, + .obj_cmpfn = mlxsw_afa_cookie_obj_cmpfn, + .automatic_shrinking = true, +}; + struct mlxsw_afa *mlxsw_afa_create(unsigned int max_acts_per_set, const struct mlxsw_afa_ops *ops, void *ops_priv) @@ -138,11 +192,18 @@ struct mlxsw_afa *mlxsw_afa_create(unsigned int max_acts_per_set, &mlxsw_afa_fwd_entry_ht_params); if (err) goto err_fwd_entry_rhashtable_init; + err = rhashtable_init(&mlxsw_afa->cookie_ht, + &mlxsw_afa_cookie_ht_params); + if (err) + goto err_cookie_rhashtable_init; + idr_init(&mlxsw_afa->cookie_idr); mlxsw_afa->max_acts_per_set = max_acts_per_set; mlxsw_afa->ops = ops; mlxsw_afa->ops_priv = ops_priv; return mlxsw_afa; +err_cookie_rhashtable_init: + rhashtable_destroy(&mlxsw_afa->fwd_entry_ht); err_fwd_entry_rhashtable_init: rhashtable_destroy(&mlxsw_afa->set_ht); err_set_rhashtable_init: @@ -153,6 +214,9 @@ EXPORT_SYMBOL(mlxsw_afa_create); void mlxsw_afa_destroy(struct mlxsw_afa *mlxsw_afa) { + WARN_ON(!idr_is_empty(&mlxsw_afa->cookie_idr)); + idr_destroy(&mlxsw_afa->cookie_idr); + rhashtable_destroy(&mlxsw_afa->cookie_ht); rhashtable_destroy(&mlxsw_afa->fwd_entry_ht); rhashtable_destroy(&mlxsw_afa->set_ht); kfree(mlxsw_afa); @@ -627,6 +691,151 @@ err_counter_index_get: return ERR_PTR(err); } +/* 20 bits is a maximum that hardware can handle in trap with userdef action + * and carry along with the trapped packet. + */ +#define MLXSW_AFA_COOKIE_INDEX_BITS 20 +#define MLXSW_AFA_COOKIE_INDEX_MAX ((1 << MLXSW_AFA_COOKIE_INDEX_BITS) - 1) + +static struct mlxsw_afa_cookie * +mlxsw_afa_cookie_create(struct mlxsw_afa *mlxsw_afa, + const struct flow_action_cookie *fa_cookie) +{ + struct mlxsw_afa_cookie *cookie; + u32 cookie_index; + int err; + + cookie = kzalloc(sizeof(*cookie) + fa_cookie->cookie_len, GFP_KERNEL); + if (!cookie) + return ERR_PTR(-ENOMEM); + refcount_set(&cookie->ref_count, 1); + memcpy(&cookie->fa_cookie, fa_cookie, + sizeof(*fa_cookie) + fa_cookie->cookie_len); + + err = rhashtable_insert_fast(&mlxsw_afa->cookie_ht, &cookie->ht_node, + mlxsw_afa_cookie_ht_params); + if (err) + goto err_rhashtable_insert; + + /* Start cookie indexes with 1. Leave the 0 index unused. Packets + * that come from the HW which are not dropped by drop-with-cookie + * action are going to pass cookie_index 0 to lookup. + */ + cookie_index = 1; + err = idr_alloc_u32(&mlxsw_afa->cookie_idr, cookie, &cookie_index, + MLXSW_AFA_COOKIE_INDEX_MAX, GFP_KERNEL); + if (err) + goto err_idr_alloc; + cookie->cookie_index = cookie_index; + return cookie; + +err_idr_alloc: + rhashtable_remove_fast(&mlxsw_afa->cookie_ht, &cookie->ht_node, + mlxsw_afa_cookie_ht_params); +err_rhashtable_insert: + kfree(cookie); + return ERR_PTR(err); +} + +static void mlxsw_afa_cookie_destroy(struct mlxsw_afa *mlxsw_afa, + struct mlxsw_afa_cookie *cookie) +{ + idr_remove(&mlxsw_afa->cookie_idr, cookie->cookie_index); + rhashtable_remove_fast(&mlxsw_afa->cookie_ht, &cookie->ht_node, + mlxsw_afa_cookie_ht_params); + kfree_rcu(cookie, rcu); +} + +static struct mlxsw_afa_cookie * +mlxsw_afa_cookie_get(struct mlxsw_afa *mlxsw_afa, + const struct flow_action_cookie *fa_cookie) +{ + struct mlxsw_afa_cookie *cookie; + + cookie = rhashtable_lookup_fast(&mlxsw_afa->cookie_ht, fa_cookie, + mlxsw_afa_cookie_ht_params); + if (cookie) { + refcount_inc(&cookie->ref_count); + return cookie; + } + return mlxsw_afa_cookie_create(mlxsw_afa, fa_cookie); +} + +static void mlxsw_afa_cookie_put(struct mlxsw_afa *mlxsw_afa, + struct mlxsw_afa_cookie *cookie) +{ + if (!refcount_dec_and_test(&cookie->ref_count)) + return; + mlxsw_afa_cookie_destroy(mlxsw_afa, cookie); +} + +/* RCU read lock must be held */ +const struct flow_action_cookie * +mlxsw_afa_cookie_lookup(struct mlxsw_afa *mlxsw_afa, u32 cookie_index) +{ + struct mlxsw_afa_cookie *cookie; + + /* 0 index means no cookie */ + if (!cookie_index) + return NULL; + cookie = idr_find(&mlxsw_afa->cookie_idr, cookie_index); + if (!cookie) + return NULL; + return &cookie->fa_cookie; +} +EXPORT_SYMBOL(mlxsw_afa_cookie_lookup); + +struct mlxsw_afa_cookie_ref { + struct mlxsw_afa_resource resource; + struct mlxsw_afa_cookie *cookie; +}; + +static void +mlxsw_afa_cookie_ref_destroy(struct mlxsw_afa_block *block, + struct mlxsw_afa_cookie_ref *cookie_ref) +{ + mlxsw_afa_resource_del(&cookie_ref->resource); + mlxsw_afa_cookie_put(block->afa, cookie_ref->cookie); + kfree(cookie_ref); +} + +static void +mlxsw_afa_cookie_ref_destructor(struct mlxsw_afa_block *block, + struct mlxsw_afa_resource *resource) +{ + struct mlxsw_afa_cookie_ref *cookie_ref; + + cookie_ref = container_of(resource, struct mlxsw_afa_cookie_ref, + resource); + mlxsw_afa_cookie_ref_destroy(block, cookie_ref); +} + +static struct mlxsw_afa_cookie_ref * +mlxsw_afa_cookie_ref_create(struct mlxsw_afa_block *block, + const struct flow_action_cookie *fa_cookie) +{ + struct mlxsw_afa_cookie_ref *cookie_ref; + struct mlxsw_afa_cookie *cookie; + int err; + + cookie_ref = kzalloc(sizeof(*cookie_ref), GFP_KERNEL); + if (!cookie_ref) + return ERR_PTR(-ENOMEM); + cookie = mlxsw_afa_cookie_get(block->afa, fa_cookie); + if (IS_ERR(cookie)) { + err = PTR_ERR(cookie); + goto err_cookie_get; + } + cookie_ref->cookie = cookie; + cookie_ref->resource.destructor = mlxsw_afa_cookie_ref_destructor; + mlxsw_afa_resource_add(block, &cookie_ref->resource); + return cookie_ref; + +err_cookie_get: + kfree(cookie_ref); + return ERR_PTR(err); +} + #define MLXSW_AFA_ONE_ACTION_LEN 32 #define MLXSW_AFA_PAYLOAD_OFFSET 4 @@ -747,97 +956,170 @@ int mlxsw_afa_block_append_vlan_modify(struct mlxsw_afa_block *block, } EXPORT_SYMBOL(mlxsw_afa_block_append_vlan_modify); -/* Trap / Discard Action - * --------------------- - * The Trap / Discard action enables trapping / mirroring packets to the CPU +/* Trap Action / Trap With Userdef Action + * -------------------------------------- + * The Trap action enables trapping / mirroring packets to the CPU * as well as discarding packets. * The ACL Trap / Discard separates the forward/discard control from CPU * trap control. In addition, the Trap / Discard action enables activating * SPAN (port mirroring). + * + * The Trap with userdef action action has the same functionality as + * the Trap action with addition of user defined value that can be set + * and used by higher layer applications. */ -#define MLXSW_AFA_TRAPDISC_CODE 0x03 -#define MLXSW_AFA_TRAPDISC_SIZE 1 +#define MLXSW_AFA_TRAP_CODE 0x03 +#define MLXSW_AFA_TRAP_SIZE 1 -enum mlxsw_afa_trapdisc_trap_action { - MLXSW_AFA_TRAPDISC_TRAP_ACTION_NOP = 0, - MLXSW_AFA_TRAPDISC_TRAP_ACTION_TRAP = 2, +#define MLXSW_AFA_TRAPWU_CODE 0x04 +#define MLXSW_AFA_TRAPWU_SIZE 2 + +enum mlxsw_afa_trap_trap_action { + MLXSW_AFA_TRAP_TRAP_ACTION_NOP = 0, + MLXSW_AFA_TRAP_TRAP_ACTION_TRAP = 2, }; -/* afa_trapdisc_trap_action +/* afa_trap_trap_action * Trap Action. */ -MLXSW_ITEM32(afa, trapdisc, trap_action, 0x00, 24, 4); +MLXSW_ITEM32(afa, trap, trap_action, 0x00, 24, 4); -enum mlxsw_afa_trapdisc_forward_action { - MLXSW_AFA_TRAPDISC_FORWARD_ACTION_FORWARD = 1, - MLXSW_AFA_TRAPDISC_FORWARD_ACTION_DISCARD = 3, +enum mlxsw_afa_trap_forward_action { + MLXSW_AFA_TRAP_FORWARD_ACTION_FORWARD = 1, + MLXSW_AFA_TRAP_FORWARD_ACTION_DISCARD = 3, }; -/* afa_trapdisc_forward_action +/* afa_trap_forward_action * Forward Action. */ -MLXSW_ITEM32(afa, trapdisc, forward_action, 0x00, 0, 4); +MLXSW_ITEM32(afa, trap, forward_action, 0x00, 0, 4); -/* afa_trapdisc_trap_id +/* afa_trap_trap_id * Trap ID to configure. */ -MLXSW_ITEM32(afa, trapdisc, trap_id, 0x04, 0, 9); +MLXSW_ITEM32(afa, trap, trap_id, 0x04, 0, 9); -/* afa_trapdisc_mirror_agent +/* afa_trap_mirror_agent * Mirror agent. */ -MLXSW_ITEM32(afa, trapdisc, mirror_agent, 0x08, 29, 3); +MLXSW_ITEM32(afa, trap, mirror_agent, 0x08, 29, 3); -/* afa_trapdisc_mirror_enable +/* afa_trap_mirror_enable * Mirror enable. */ -MLXSW_ITEM32(afa, trapdisc, mirror_enable, 0x08, 24, 1); +MLXSW_ITEM32(afa, trap, mirror_enable, 0x08, 24, 1); + +/* user_def_val + * Value for the SW usage. Can be used to pass information of which + * rule has caused a trap. This may be overwritten by later traps. + * This field does a set on the packet's user_def_val only if this + * is the first trap_id or if the trap_id has replaced the previous + * packet's trap_id. + */ +MLXSW_ITEM32(afa, trap, user_def_val, 0x0C, 0, 20); static inline void -mlxsw_afa_trapdisc_pack(char *payload, - enum mlxsw_afa_trapdisc_trap_action trap_action, - enum mlxsw_afa_trapdisc_forward_action forward_action, - u16 trap_id) +mlxsw_afa_trap_pack(char *payload, + enum mlxsw_afa_trap_trap_action trap_action, + enum mlxsw_afa_trap_forward_action forward_action, + u16 trap_id) { - mlxsw_afa_trapdisc_trap_action_set(payload, trap_action); - mlxsw_afa_trapdisc_forward_action_set(payload, forward_action); - mlxsw_afa_trapdisc_trap_id_set(payload, trap_id); + mlxsw_afa_trap_trap_action_set(payload, trap_action); + mlxsw_afa_trap_forward_action_set(payload, forward_action); + mlxsw_afa_trap_trap_id_set(payload, trap_id); } static inline void -mlxsw_afa_trapdisc_mirror_pack(char *payload, bool mirror_enable, - u8 mirror_agent) +mlxsw_afa_trapwu_pack(char *payload, + enum mlxsw_afa_trap_trap_action trap_action, + enum mlxsw_afa_trap_forward_action forward_action, + u16 trap_id, u32 user_def_val) { - mlxsw_afa_trapdisc_mirror_enable_set(payload, mirror_enable); - mlxsw_afa_trapdisc_mirror_agent_set(payload, mirror_agent); + mlxsw_afa_trap_pack(payload, trap_action, forward_action, trap_id); + mlxsw_afa_trap_user_def_val_set(payload, user_def_val); } -int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block) +static inline void +mlxsw_afa_trap_mirror_pack(char *payload, bool mirror_enable, + u8 mirror_agent) { - char *act = mlxsw_afa_block_append_action(block, - MLXSW_AFA_TRAPDISC_CODE, - MLXSW_AFA_TRAPDISC_SIZE); + mlxsw_afa_trap_mirror_enable_set(payload, mirror_enable); + mlxsw_afa_trap_mirror_agent_set(payload, mirror_agent); +} + +static int mlxsw_afa_block_append_drop_plain(struct mlxsw_afa_block *block, + bool ingress) +{ + char *act = mlxsw_afa_block_append_action(block, MLXSW_AFA_TRAP_CODE, + MLXSW_AFA_TRAP_SIZE); if (IS_ERR(act)) return PTR_ERR(act); - mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_NOP, - MLXSW_AFA_TRAPDISC_FORWARD_ACTION_DISCARD, 0); + mlxsw_afa_trap_pack(act, MLXSW_AFA_TRAP_TRAP_ACTION_TRAP, + MLXSW_AFA_TRAP_FORWARD_ACTION_DISCARD, + ingress ? MLXSW_TRAP_ID_DISCARD_INGRESS_ACL : + MLXSW_TRAP_ID_DISCARD_EGRESS_ACL); return 0; } + +static int +mlxsw_afa_block_append_drop_with_cookie(struct mlxsw_afa_block *block, + bool ingress, + const struct flow_action_cookie *fa_cookie, + struct netlink_ext_ack *extack) +{ + struct mlxsw_afa_cookie_ref *cookie_ref; + u32 cookie_index; + char *act; + int err; + + cookie_ref = mlxsw_afa_cookie_ref_create(block, fa_cookie); + if (IS_ERR(cookie_ref)) { + NL_SET_ERR_MSG_MOD(extack, "Cannot create cookie for drop action"); + return PTR_ERR(cookie_ref); + } + cookie_index = cookie_ref->cookie->cookie_index; + + act = mlxsw_afa_block_append_action(block, MLXSW_AFA_TRAPWU_CODE, + MLXSW_AFA_TRAPWU_SIZE); + if (IS_ERR(act)) { + NL_SET_ERR_MSG_MOD(extack, "Cannot append drop with cookie action"); + err = PTR_ERR(act); + goto err_append_action; + } + mlxsw_afa_trapwu_pack(act, MLXSW_AFA_TRAP_TRAP_ACTION_TRAP, + MLXSW_AFA_TRAP_FORWARD_ACTION_DISCARD, + ingress ? MLXSW_TRAP_ID_DISCARD_INGRESS_ACL : + MLXSW_TRAP_ID_DISCARD_EGRESS_ACL, + cookie_index); + return 0; + +err_append_action: + mlxsw_afa_cookie_ref_destroy(block, cookie_ref); + return err; +} + +int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block, bool ingress, + const struct flow_action_cookie *fa_cookie, + struct netlink_ext_ack *extack) +{ + return fa_cookie ? + mlxsw_afa_block_append_drop_with_cookie(block, ingress, + fa_cookie, extack) : + mlxsw_afa_block_append_drop_plain(block, ingress); +} EXPORT_SYMBOL(mlxsw_afa_block_append_drop); int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block, u16 trap_id) { - char *act = mlxsw_afa_block_append_action(block, - MLXSW_AFA_TRAPDISC_CODE, - MLXSW_AFA_TRAPDISC_SIZE); + char *act = mlxsw_afa_block_append_action(block, MLXSW_AFA_TRAP_CODE, + MLXSW_AFA_TRAP_SIZE); if (IS_ERR(act)) return PTR_ERR(act); - mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_TRAP, - MLXSW_AFA_TRAPDISC_FORWARD_ACTION_DISCARD, - trap_id); + mlxsw_afa_trap_pack(act, MLXSW_AFA_TRAP_TRAP_ACTION_TRAP, + MLXSW_AFA_TRAP_FORWARD_ACTION_DISCARD, trap_id); return 0; } EXPORT_SYMBOL(mlxsw_afa_block_append_trap); @@ -845,15 +1127,13 @@ EXPORT_SYMBOL(mlxsw_afa_block_append_trap); int mlxsw_afa_block_append_trap_and_forward(struct mlxsw_afa_block *block, u16 trap_id) { - char *act = mlxsw_afa_block_append_action(block, - MLXSW_AFA_TRAPDISC_CODE, - MLXSW_AFA_TRAPDISC_SIZE); + char *act = mlxsw_afa_block_append_action(block, MLXSW_AFA_TRAP_CODE, + MLXSW_AFA_TRAP_SIZE); if (IS_ERR(act)) return PTR_ERR(act); - mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_TRAP, - MLXSW_AFA_TRAPDISC_FORWARD_ACTION_FORWARD, - trap_id); + mlxsw_afa_trap_pack(act, MLXSW_AFA_TRAP_TRAP_ACTION_TRAP, + MLXSW_AFA_TRAP_FORWARD_ACTION_FORWARD, trap_id); return 0; } EXPORT_SYMBOL(mlxsw_afa_block_append_trap_and_forward); @@ -920,13 +1200,13 @@ mlxsw_afa_block_append_allocated_mirror(struct mlxsw_afa_block *block, u8 mirror_agent) { char *act = mlxsw_afa_block_append_action(block, - MLXSW_AFA_TRAPDISC_CODE, - MLXSW_AFA_TRAPDISC_SIZE); + MLXSW_AFA_TRAP_CODE, + MLXSW_AFA_TRAP_SIZE); if (IS_ERR(act)) return PTR_ERR(act); - mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_NOP, - MLXSW_AFA_TRAPDISC_FORWARD_ACTION_FORWARD, 0); - mlxsw_afa_trapdisc_mirror_pack(act, true, mirror_agent); + mlxsw_afa_trap_pack(act, MLXSW_AFA_TRAP_TRAP_ACTION_NOP, + MLXSW_AFA_TRAP_FORWARD_ACTION_FORWARD, 0); + mlxsw_afa_trap_mirror_pack(act, true, mirror_agent); return 0; } @@ -958,6 +1238,179 @@ err_append_allocated_mirror: } EXPORT_SYMBOL(mlxsw_afa_block_append_mirror); +/* QoS Action + * ---------- + * The QOS_ACTION is used for manipulating the QoS attributes of a packet. It + * can be used to change the DCSP, ECN, Color and Switch Priority of the packet. + * Note that PCP field can be changed using the VLAN action. + */ + +#define MLXSW_AFA_QOS_CODE 0x06 +#define MLXSW_AFA_QOS_SIZE 1 + +enum mlxsw_afa_qos_ecn_cmd { + /* Do nothing */ + MLXSW_AFA_QOS_ECN_CMD_NOP, + /* Set ECN to afa_qos_ecn */ + MLXSW_AFA_QOS_ECN_CMD_SET, +}; + +/* afa_qos_ecn_cmd + */ +MLXSW_ITEM32(afa, qos, ecn_cmd, 0x04, 29, 3); + +/* afa_qos_ecn + * ECN value. + */ +MLXSW_ITEM32(afa, qos, ecn, 0x04, 24, 2); + +enum mlxsw_afa_qos_dscp_cmd { + /* Do nothing */ + MLXSW_AFA_QOS_DSCP_CMD_NOP, + /* Set DSCP 3 LSB bits according to dscp[2:0] */ + MLXSW_AFA_QOS_DSCP_CMD_SET_3LSB, + /* Set DSCP 3 MSB bits according to dscp[5:3] */ + MLXSW_AFA_QOS_DSCP_CMD_SET_3MSB, + /* Set DSCP 6 bits according to dscp[5:0] */ + MLXSW_AFA_QOS_DSCP_CMD_SET_ALL, +}; + +/* afa_qos_dscp_cmd + * DSCP command. + */ +MLXSW_ITEM32(afa, qos, dscp_cmd, 0x04, 14, 2); + +/* afa_qos_dscp + * DSCP value. + */ +MLXSW_ITEM32(afa, qos, dscp, 0x04, 0, 6); + +enum mlxsw_afa_qos_switch_prio_cmd { + /* Do nothing */ + MLXSW_AFA_QOS_SWITCH_PRIO_CMD_NOP, + /* Set Switch Priority to afa_qos_switch_prio */ + MLXSW_AFA_QOS_SWITCH_PRIO_CMD_SET, +}; + +/* afa_qos_switch_prio_cmd + */ +MLXSW_ITEM32(afa, qos, switch_prio_cmd, 0x08, 14, 2); + +/* afa_qos_switch_prio + * Switch Priority. + */ +MLXSW_ITEM32(afa, qos, switch_prio, 0x08, 0, 4); + +enum mlxsw_afa_qos_dscp_rw { + MLXSW_AFA_QOS_DSCP_RW_PRESERVE, + MLXSW_AFA_QOS_DSCP_RW_SET, + MLXSW_AFA_QOS_DSCP_RW_CLEAR, +}; + +/* afa_qos_dscp_rw + * DSCP Re-write Enable. Controlling the rewrite_enable for DSCP. + */ +MLXSW_ITEM32(afa, qos, dscp_rw, 0x0C, 30, 2); + +static inline void +mlxsw_afa_qos_ecn_pack(char *payload, + enum mlxsw_afa_qos_ecn_cmd ecn_cmd, u8 ecn) +{ + mlxsw_afa_qos_ecn_cmd_set(payload, ecn_cmd); + mlxsw_afa_qos_ecn_set(payload, ecn); +} + +static inline void +mlxsw_afa_qos_dscp_pack(char *payload, + enum mlxsw_afa_qos_dscp_cmd dscp_cmd, u8 dscp) +{ + mlxsw_afa_qos_dscp_cmd_set(payload, dscp_cmd); + mlxsw_afa_qos_dscp_set(payload, dscp); +} + +static inline void +mlxsw_afa_qos_switch_prio_pack(char *payload, + enum mlxsw_afa_qos_switch_prio_cmd prio_cmd, + u8 prio) +{ + mlxsw_afa_qos_switch_prio_cmd_set(payload, prio_cmd); + mlxsw_afa_qos_switch_prio_set(payload, prio); +} + +static int __mlxsw_afa_block_append_qos_dsfield(struct mlxsw_afa_block *block, + bool set_dscp, u8 dscp, + bool set_ecn, u8 ecn, + struct netlink_ext_ack *extack) +{ + char *act = mlxsw_afa_block_append_action(block, + MLXSW_AFA_QOS_CODE, + MLXSW_AFA_QOS_SIZE); + + if (IS_ERR(act)) { + NL_SET_ERR_MSG_MOD(extack, "Cannot append QOS action"); + return PTR_ERR(act); + } + + if (set_ecn) + mlxsw_afa_qos_ecn_pack(act, MLXSW_AFA_QOS_ECN_CMD_SET, ecn); + if (set_dscp) { + mlxsw_afa_qos_dscp_pack(act, MLXSW_AFA_QOS_DSCP_CMD_SET_ALL, + dscp); + mlxsw_afa_qos_dscp_rw_set(act, MLXSW_AFA_QOS_DSCP_RW_CLEAR); + } + + return 0; +} + +int mlxsw_afa_block_append_qos_dsfield(struct mlxsw_afa_block *block, + u8 dsfield, + struct netlink_ext_ack *extack) +{ + return __mlxsw_afa_block_append_qos_dsfield(block, + true, dsfield >> 2, + true, dsfield & 0x03, + extack); +} +EXPORT_SYMBOL(mlxsw_afa_block_append_qos_dsfield); + +int mlxsw_afa_block_append_qos_dscp(struct mlxsw_afa_block *block, + u8 dscp, struct netlink_ext_ack *extack) +{ + return __mlxsw_afa_block_append_qos_dsfield(block, + true, dscp, + false, 0, + extack); +} +EXPORT_SYMBOL(mlxsw_afa_block_append_qos_dscp); + +int mlxsw_afa_block_append_qos_ecn(struct mlxsw_afa_block *block, + u8 ecn, struct netlink_ext_ack *extack) +{ + return __mlxsw_afa_block_append_qos_dsfield(block, + false, 0, + true, ecn, + extack); +} +EXPORT_SYMBOL(mlxsw_afa_block_append_qos_ecn); + +int mlxsw_afa_block_append_qos_switch_prio(struct mlxsw_afa_block *block, + u8 prio, + struct netlink_ext_ack *extack) +{ + char *act = mlxsw_afa_block_append_action(block, + MLXSW_AFA_QOS_CODE, + MLXSW_AFA_QOS_SIZE); + + if (IS_ERR(act)) { + NL_SET_ERR_MSG_MOD(extack, "Cannot append QOS action"); + return PTR_ERR(act); + } + mlxsw_afa_qos_switch_prio_pack(act, MLXSW_AFA_QOS_SWITCH_PRIO_CMD_SET, + prio); + return 0; +} +EXPORT_SYMBOL(mlxsw_afa_block_append_qos_switch_prio); + /* Forwarding Action * ----------------- * Forwarding Action can be used to implement Policy Based Switching (PBS) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h index 0e3a59dda12e..8c2705e16ef7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h @@ -6,6 +6,7 @@ #include <linux/types.h> #include <linux/netdevice.h> +#include <net/flow_offload.h> struct mlxsw_afa; struct mlxsw_afa_block; @@ -42,7 +43,11 @@ int mlxsw_afa_block_activity_get(struct mlxsw_afa_block *block, bool *activity); int mlxsw_afa_block_continue(struct mlxsw_afa_block *block); int mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id); int mlxsw_afa_block_terminate(struct mlxsw_afa_block *block); -int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block); +const struct flow_action_cookie * +mlxsw_afa_cookie_lookup(struct mlxsw_afa *mlxsw_afa, u32 cookie_index); +int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block, bool ingress, + const struct flow_action_cookie *fa_cookie, + struct netlink_ext_ack *extack); int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block, u16 trap_id); int mlxsw_afa_block_append_trap_and_forward(struct mlxsw_afa_block *block, u16 trap_id); @@ -57,6 +62,16 @@ int mlxsw_afa_block_append_fwd(struct mlxsw_afa_block *block, int mlxsw_afa_block_append_vlan_modify(struct mlxsw_afa_block *block, u16 vid, u8 pcp, u8 et, struct netlink_ext_ack *extack); +int mlxsw_afa_block_append_qos_switch_prio(struct mlxsw_afa_block *block, + u8 prio, + struct netlink_ext_ack *extack); +int mlxsw_afa_block_append_qos_dsfield(struct mlxsw_afa_block *block, + u8 dsfield, + struct netlink_ext_ack *extack); +int mlxsw_afa_block_append_qos_dscp(struct mlxsw_afa_block *block, + u8 dscp, struct netlink_ext_ack *extack); +int mlxsw_afa_block_append_qos_ecn(struct mlxsw_afa_block *block, + u8 ecn, struct netlink_ext_ack *extack); int mlxsw_afa_block_append_allocated_counter(struct mlxsw_afa_block *block, u32 counter_index); int mlxsw_afa_block_append_counter(struct mlxsw_afa_block *block, diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c index feb4672a5ac0..9f6905fa6b47 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c @@ -9,6 +9,41 @@ #include "item.h" #include "core_acl_flex_keys.h" +/* For the purpose of the driver, define an internal storage scratchpad + * that will be used to store key/mask values. For each defined element type + * define an internal storage geometry. + * + * When adding new elements, MLXSW_AFK_ELEMENT_STORAGE_SIZE must be increased + * accordingly. + */ +static const struct mlxsw_afk_element_info mlxsw_afk_element_infos[] = { + MLXSW_AFK_ELEMENT_INFO_U32(SRC_SYS_PORT, 0x00, 16, 16), + MLXSW_AFK_ELEMENT_INFO_BUF(DMAC_32_47, 0x04, 2), + MLXSW_AFK_ELEMENT_INFO_BUF(DMAC_0_31, 0x06, 4), + MLXSW_AFK_ELEMENT_INFO_BUF(SMAC_32_47, 0x0A, 2), + MLXSW_AFK_ELEMENT_INFO_BUF(SMAC_0_31, 0x0C, 4), + MLXSW_AFK_ELEMENT_INFO_U32(ETHERTYPE, 0x00, 0, 16), + MLXSW_AFK_ELEMENT_INFO_U32(IP_PROTO, 0x10, 0, 8), + MLXSW_AFK_ELEMENT_INFO_U32(VID, 0x10, 8, 12), + MLXSW_AFK_ELEMENT_INFO_U32(PCP, 0x10, 20, 3), + MLXSW_AFK_ELEMENT_INFO_U32(TCP_FLAGS, 0x10, 23, 9), + MLXSW_AFK_ELEMENT_INFO_U32(DST_L4_PORT, 0x14, 0, 16), + MLXSW_AFK_ELEMENT_INFO_U32(SRC_L4_PORT, 0x14, 16, 16), + MLXSW_AFK_ELEMENT_INFO_U32(IP_TTL_, 0x18, 0, 8), + MLXSW_AFK_ELEMENT_INFO_U32(IP_ECN, 0x18, 9, 2), + MLXSW_AFK_ELEMENT_INFO_U32(IP_DSCP, 0x18, 11, 6), + MLXSW_AFK_ELEMENT_INFO_U32(VIRT_ROUTER_8_10, 0x18, 17, 3), + MLXSW_AFK_ELEMENT_INFO_U32(VIRT_ROUTER_0_7, 0x18, 20, 8), + MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_96_127, 0x20, 4), + MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_64_95, 0x24, 4), + MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_32_63, 0x28, 4), + MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_0_31, 0x2C, 4), + MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_96_127, 0x30, 4), + MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_64_95, 0x34, 4), + MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_32_63, 0x38, 4), + MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_0_31, 0x3C, 4), +}; + struct mlxsw_afk { struct list_head key_info_list; unsigned int max_blocks; @@ -26,13 +61,15 @@ static bool mlxsw_afk_blocks_check(struct mlxsw_afk *mlxsw_afk) const struct mlxsw_afk_block *block = &mlxsw_afk->blocks[i]; for (j = 0; j < block->instances_count; j++) { + const struct mlxsw_afk_element_info *elinfo; struct mlxsw_afk_element_inst *elinst; elinst = &block->instances[j]; - if (elinst->type != elinst->info->type || + elinfo = &mlxsw_afk_element_infos[elinst->element]; + if (elinst->type != elinfo->type || (!elinst->avoid_size_check && elinst->item.size.bits != - elinst->info->item.size.bits)) + elinfo->item.size.bits)) return false; } } @@ -72,7 +109,7 @@ struct mlxsw_afk_key_info { * is index inside "blocks" */ struct mlxsw_afk_element_usage elusage; - const struct mlxsw_afk_block *blocks[0]; + const struct mlxsw_afk_block *blocks[]; }; static bool @@ -116,7 +153,7 @@ static void mlxsw_afk_picker_count_hits(struct mlxsw_afk *mlxsw_afk, struct mlxsw_afk_element_inst *elinst; elinst = &block->instances[j]; - if (elinst->info->element == element) { + if (elinst->element == element) { __set_bit(element, picker->hits[i].element); picker->hits[i].total++; } @@ -301,7 +338,7 @@ mlxsw_afk_block_elinst_get(const struct mlxsw_afk_block *block, struct mlxsw_afk_element_inst *elinst; elinst = &block->instances[i]; - if (elinst->info->element == element) + if (elinst->element == element) return elinst; } return NULL; @@ -409,9 +446,12 @@ static void mlxsw_sp_afk_encode_one(const struct mlxsw_afk_element_inst *elinst, char *output, char *storage, int u32_diff) { - const struct mlxsw_item *storage_item = &elinst->info->item; const struct mlxsw_item *output_item = &elinst->item; + const struct mlxsw_afk_element_info *elinfo; + const struct mlxsw_item *storage_item; + elinfo = &mlxsw_afk_element_infos[elinst->element]; + storage_item = &elinfo->item; if (elinst->type == MLXSW_AFK_ELEMENT_TYPE_U32) mlxsw_sp_afk_encode_u32(storage_item, output_item, storage, output, u32_diff); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h index cb229b55ecc4..a47a17c04c62 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h @@ -69,42 +69,10 @@ struct mlxsw_afk_element_info { MLXSW_AFK_ELEMENT_INFO(MLXSW_AFK_ELEMENT_TYPE_BUF, \ _element, _offset, 0, _size) -/* For the purpose of the driver, define an internal storage scratchpad - * that will be used to store key/mask values. For each defined element type - * define an internal storage geometry. - */ -static const struct mlxsw_afk_element_info mlxsw_afk_element_infos[] = { - MLXSW_AFK_ELEMENT_INFO_U32(SRC_SYS_PORT, 0x00, 16, 16), - MLXSW_AFK_ELEMENT_INFO_BUF(DMAC_32_47, 0x04, 2), - MLXSW_AFK_ELEMENT_INFO_BUF(DMAC_0_31, 0x06, 4), - MLXSW_AFK_ELEMENT_INFO_BUF(SMAC_32_47, 0x0A, 2), - MLXSW_AFK_ELEMENT_INFO_BUF(SMAC_0_31, 0x0C, 4), - MLXSW_AFK_ELEMENT_INFO_U32(ETHERTYPE, 0x00, 0, 16), - MLXSW_AFK_ELEMENT_INFO_U32(IP_PROTO, 0x10, 0, 8), - MLXSW_AFK_ELEMENT_INFO_U32(VID, 0x10, 8, 12), - MLXSW_AFK_ELEMENT_INFO_U32(PCP, 0x10, 20, 3), - MLXSW_AFK_ELEMENT_INFO_U32(TCP_FLAGS, 0x10, 23, 9), - MLXSW_AFK_ELEMENT_INFO_U32(DST_L4_PORT, 0x14, 0, 16), - MLXSW_AFK_ELEMENT_INFO_U32(SRC_L4_PORT, 0x14, 16, 16), - MLXSW_AFK_ELEMENT_INFO_U32(IP_TTL_, 0x18, 0, 8), - MLXSW_AFK_ELEMENT_INFO_U32(IP_ECN, 0x18, 9, 2), - MLXSW_AFK_ELEMENT_INFO_U32(IP_DSCP, 0x18, 11, 6), - MLXSW_AFK_ELEMENT_INFO_U32(VIRT_ROUTER_8_10, 0x18, 17, 3), - MLXSW_AFK_ELEMENT_INFO_U32(VIRT_ROUTER_0_7, 0x18, 20, 8), - MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_96_127, 0x20, 4), - MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_64_95, 0x24, 4), - MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_32_63, 0x28, 4), - MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_0_31, 0x2C, 4), - MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_96_127, 0x30, 4), - MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_64_95, 0x34, 4), - MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_32_63, 0x38, 4), - MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_0_31, 0x3C, 4), -}; - #define MLXSW_AFK_ELEMENT_STORAGE_SIZE 0x40 struct mlxsw_afk_element_inst { /* element instance in actual block */ - const struct mlxsw_afk_element_info *info; + enum mlxsw_afk_element element; enum mlxsw_afk_element_type type; struct mlxsw_item item; /* element geometry in block */ int u32_key_diff; /* in case value needs to be adjusted before write @@ -116,7 +84,7 @@ struct mlxsw_afk_element_inst { /* element instance in actual block */ #define MLXSW_AFK_ELEMENT_INST(_type, _element, _offset, \ _shift, _size, _u32_key_diff, _avoid_size_check) \ { \ - .info = &mlxsw_afk_element_infos[MLXSW_AFK_ELEMENT_##_element], \ + .element = MLXSW_AFK_ELEMENT_##_element, \ .type = _type, \ .item = { \ .offset = _offset, \ diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.c b/drivers/net/ethernet/mellanox/mlxsw/i2c.c index 34566eb62c47..939b692ffc33 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/i2c.c +++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.c @@ -53,6 +53,7 @@ /** * struct mlxsw_i2c - device private data: + * @cmd: command attributes; * @cmd.mb_size_in: input mailbox size; * @cmd.mb_off_in: input mailbox offset in register space; * @cmd.mb_size_out: output mailbox size; diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index e9ded1a6e131..fd0e97de44e7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -575,6 +575,15 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci, rx_info.trap_id = mlxsw_pci_cqe_trap_id_get(cqe); + if (rx_info.trap_id == MLXSW_TRAP_ID_DISCARD_INGRESS_ACL || + rx_info.trap_id == MLXSW_TRAP_ID_DISCARD_EGRESS_ACL) { + u32 cookie_index = 0; + + if (mlxsw_pci->max_cqe_ver >= MLXSW_PCI_CQE_V2) + cookie_index = mlxsw_pci_cqe2_user_def_val_orig_pkt_len_get(cqe); + mlxsw_skb_cb(skb)->cookie_index = cookie_index; + } + byte_count = mlxsw_pci_cqe_byte_count_get(cqe); if (mlxsw_pci_cqe_crc_get(cqe_v, cqe)) byte_count -= ETH_FCS_LEN; diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h index 43fa8c85b5d9..32c7cabfb261 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h +++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h @@ -25,8 +25,6 @@ #define MLXSW_PCI_CIR_CTRL_STATUS_SHIFT 24 #define MLXSW_PCI_CIR_TIMEOUT_MSECS 1000 -#define MLXSW_PCI_SW_RESET 0xF0010 -#define MLXSW_PCI_SW_RESET_RST_BIT BIT(0) #define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS 900000 #define MLXSW_PCI_SW_RESET_WAIT_MSECS 200 #define MLXSW_PCI_FW_READY 0xA1844 @@ -210,6 +208,11 @@ MLXSW_ITEM32(pci, cqe0, dqn, 0x0C, 1, 5); MLXSW_ITEM32(pci, cqe12, dqn, 0x0C, 1, 6); mlxsw_pci_cqe_item_helpers(dqn, 0, 12, 12); +/* pci_cqe_user_def_val_orig_pkt_len + * When trap_id is an ACL: User defined value from policy engine action. + */ +MLXSW_ITEM32(pci, cqe2, user_def_val_orig_pkt_len, 0x14, 0, 20); + /* pci_cqe_owner * Ownership bit. */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index e05d1d1be2fd..9b39b8e70519 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -621,7 +621,7 @@ static inline void mlxsw_reg_sfn_pack(char *payload) { MLXSW_REG_ZERO(sfn, payload); mlxsw_reg_sfn_swid_set(payload, 0); - mlxsw_reg_sfn_end_set(payload, 1); + mlxsw_reg_sfn_end_set(payload, 0); mlxsw_reg_sfn_num_rec_set(payload, MLXSW_REG_SFN_REC_MAX_COUNT); } @@ -3296,6 +3296,12 @@ MLXSW_ITEM32(reg, qpcr, g, 0x00, 14, 2); */ MLXSW_ITEM32(reg, qpcr, pid, 0x00, 0, 14); +/* reg_qpcr_clear_counter + * Clear counters. + * Access: OP + */ +MLXSW_ITEM32(reg, qpcr, clear_counter, 0x04, 31, 1); + /* reg_qpcr_color_aware * Is the policer aware of colors. * Must be 0 (unaware) for cpu port. @@ -3393,6 +3399,17 @@ enum mlxsw_reg_qpcr_action { */ MLXSW_ITEM32(reg, qpcr, violate_action, 0x18, 0, 4); +/* reg_qpcr_violate_count + * Counts the number of times violate_action happened on this PID. + * Access: RW + */ +MLXSW_ITEM64(reg, qpcr, violate_count, 0x20, 0, 64); + +#define MLXSW_REG_QPCR_LOWEST_CIR 1 +#define MLXSW_REG_QPCR_HIGHEST_CIR (2 * 1000 * 1000 * 1000) /* 2Gpps */ +#define MLXSW_REG_QPCR_LOWEST_CBS 4 +#define MLXSW_REG_QPCR_HIGHEST_CBS 24 + static inline void mlxsw_reg_qpcr_pack(char *payload, u16 pid, enum mlxsw_reg_qpcr_ir_units ir_units, bool bytes, u32 cir, u16 cbs) @@ -5440,15 +5457,29 @@ enum mlxsw_reg_pmtm_module_type { /* Backplane with 4 lanes */ MLXSW_REG_PMTM_MODULE_TYPE_BP_4X, /* QSFP */ - MLXSW_REG_PMTM_MODULE_TYPE_BP_QSFP, + MLXSW_REG_PMTM_MODULE_TYPE_QSFP, /* SFP */ - MLXSW_REG_PMTM_MODULE_TYPE_BP_SFP, + MLXSW_REG_PMTM_MODULE_TYPE_SFP, /* Backplane with single lane */ MLXSW_REG_PMTM_MODULE_TYPE_BP_1X = 4, /* Backplane with two lane */ MLXSW_REG_PMTM_MODULE_TYPE_BP_2X = 8, - /* Chip2Chip */ - MLXSW_REG_PMTM_MODULE_TYPE_C2C = 10, + /* Chip2Chip4x */ + MLXSW_REG_PMTM_MODULE_TYPE_C2C4X = 10, + /* Chip2Chip2x */ + MLXSW_REG_PMTM_MODULE_TYPE_C2C2X, + /* Chip2Chip1x */ + MLXSW_REG_PMTM_MODULE_TYPE_C2C1X, + /* QSFP-DD */ + MLXSW_REG_PMTM_MODULE_TYPE_QSFP_DD = 14, + /* OSFP */ + MLXSW_REG_PMTM_MODULE_TYPE_OSFP, + /* SFP-DD */ + MLXSW_REG_PMTM_MODULE_TYPE_SFP_DD, + /* DSFP */ + MLXSW_REG_PMTM_MODULE_TYPE_DSFP, + /* Chip2Chip8x */ + MLXSW_REG_PMTM_MODULE_TYPE_C2C8X, }; /* reg_pmtm_module_type @@ -5506,12 +5537,10 @@ enum mlxsw_reg_htgt_trap_group { MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM, MLXSW_REG_HTGT_TRAP_GROUP_SP_MULTICAST, MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP, - MLXSW_REG_HTGT_TRAP_GROUP_SP_HOST_MISS, MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP, MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE, MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME, MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP, - MLXSW_REG_HTGT_TRAP_GROUP_SP_RPF, MLXSW_REG_HTGT_TRAP_GROUP_SP_EVENT, MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD, MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND, @@ -5526,9 +5555,11 @@ enum mlxsw_reg_htgt_trap_group { enum mlxsw_reg_htgt_discard_trap_group { MLXSW_REG_HTGT_DISCARD_TRAP_GROUP_BASE = MLXSW_REG_HTGT_TRAP_GROUP_MAX, + MLXSW_REG_HTGT_TRAP_GROUP_SP_DUMMY, MLXSW_REG_HTGT_TRAP_GROUP_SP_L2_DISCARDS, MLXSW_REG_HTGT_TRAP_GROUP_SP_L3_DISCARDS, MLXSW_REG_HTGT_TRAP_GROUP_SP_TUNNEL_DISCARDS, + MLXSW_REG_HTGT_TRAP_GROUP_SP_ACL_DISCARDS, }; /* reg_htgt_trap_group diff --git a/drivers/net/ethernet/mellanox/mlxsw/resources.h b/drivers/net/ethernet/mellanox/mlxsw/resources.h index 6534184cb942..d62496ef299c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/resources.h +++ b/drivers/net/ethernet/mellanox/mlxsw/resources.h @@ -18,6 +18,7 @@ enum mlxsw_res_id { MLXSW_RES_ID_CQE_V1, MLXSW_RES_ID_CQE_V2, MLXSW_RES_ID_COUNTER_POOL_SIZE, + MLXSW_RES_ID_COUNTER_BANK_SIZE, MLXSW_RES_ID_MAX_SPAN, MLXSW_RES_ID_COUNTER_SIZE_PACKETS_BYTES, MLXSW_RES_ID_COUNTER_SIZE_ROUTER_BASIC, @@ -75,6 +76,7 @@ static u16 mlxsw_res_ids[] = { [MLXSW_RES_ID_CQE_V1] = 0x2211, [MLXSW_RES_ID_CQE_V2] = 0x2212, [MLXSW_RES_ID_COUNTER_POOL_SIZE] = 0x2410, + [MLXSW_RES_ID_COUNTER_BANK_SIZE] = 0x2411, [MLXSW_RES_ID_MAX_SPAN] = 0x2420, [MLXSW_RES_ID_COUNTER_SIZE_PACKETS_BYTES] = 0x2443, [MLXSW_RES_ID_COUNTER_SIZE_ROUTER_BASIC] = 0x2449, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 7358b5bc7eb6..24ca8d5bc564 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -43,6 +43,7 @@ #include "spectrum_acl_flex_actions.h" #include "spectrum_span.h" #include "spectrum_ptp.h" +#include "spectrum_trap.h" #include "../mlxfw/mlxfw.h" #define MLXSW_SP1_FWREV_MAJOR 13 @@ -347,19 +348,6 @@ static void mlxsw_sp_fsm_release(struct mlxfw_dev *mlxfw_dev, u32 fwhandle) mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mcc), mcc_pl); } -static void mlxsw_sp_status_notify(struct mlxfw_dev *mlxfw_dev, - const char *msg, const char *comp_name, - u32 done_bytes, u32 total_bytes) -{ - struct mlxsw_sp_mlxfw_dev *mlxsw_sp_mlxfw_dev = - container_of(mlxfw_dev, struct mlxsw_sp_mlxfw_dev, mlxfw_dev); - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_mlxfw_dev->mlxsw_sp; - - devlink_flash_update_status_notify(priv_to_devlink(mlxsw_sp->core), - msg, comp_name, - done_bytes, total_bytes); -} - static const struct mlxfw_dev_ops mlxsw_sp_mlxfw_dev_ops = { .component_query = mlxsw_sp_component_query, .fsm_lock = mlxsw_sp_fsm_lock, @@ -370,7 +358,6 @@ static const struct mlxfw_dev_ops mlxsw_sp_mlxfw_dev_ops = { .fsm_query_state = mlxsw_sp_fsm_query_state, .fsm_cancel = mlxsw_sp_fsm_cancel, .fsm_release = mlxsw_sp_fsm_release, - .status_notify = mlxsw_sp_status_notify, }; static int mlxsw_sp_firmware_flash(struct mlxsw_sp *mlxsw_sp, @@ -382,16 +369,15 @@ static int mlxsw_sp_firmware_flash(struct mlxsw_sp *mlxsw_sp, .ops = &mlxsw_sp_mlxfw_dev_ops, .psid = mlxsw_sp->bus_info->psid, .psid_size = strlen(mlxsw_sp->bus_info->psid), + .devlink = priv_to_devlink(mlxsw_sp->core), }, .mlxsw_sp = mlxsw_sp }; int err; mlxsw_core_fw_flash_start(mlxsw_sp->core); - devlink_flash_update_begin_notify(priv_to_devlink(mlxsw_sp->core)); err = mlxfw_firmware_flash(&mlxsw_sp_mlxfw_dev.mlxfw_dev, firmware, extack); - devlink_flash_update_end_notify(priv_to_devlink(mlxsw_sp->core)); mlxsw_core_fw_flash_end(mlxsw_sp->core); return err; @@ -1798,6 +1784,8 @@ static int mlxsw_sp_setup_tc(struct net_device *dev, enum tc_setup_type type, return mlxsw_sp_setup_tc_ets(mlxsw_sp_port, type_data); case TC_SETUP_QDISC_TBF: return mlxsw_sp_setup_tc_tbf(mlxsw_sp_port, type_data); + case TC_SETUP_QDISC_FIFO: + return mlxsw_sp_setup_tc_fifo(mlxsw_sp_port, type_data); default: return -EOPNOTSUPP; } @@ -2243,6 +2231,15 @@ static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_rfc_3635_stats[] = { #define MLXSW_SP_PORT_HW_RFC_3635_STATS_LEN \ ARRAY_SIZE(mlxsw_sp_port_hw_rfc_3635_stats) +static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_ext_stats[] = { + { + .str = "ecn_marked", + .getter = mlxsw_reg_ppcnt_ecn_marked_get, + }, +}; + +#define MLXSW_SP_PORT_HW_EXT_STATS_LEN ARRAY_SIZE(mlxsw_sp_port_hw_ext_stats) + static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_discard_stats[] = { { .str = "discard_ingress_general", @@ -2352,6 +2349,7 @@ static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_tc_stats[] = { MLXSW_SP_PORT_HW_RFC_2863_STATS_LEN + \ MLXSW_SP_PORT_HW_RFC_2819_STATS_LEN + \ MLXSW_SP_PORT_HW_RFC_3635_STATS_LEN + \ + MLXSW_SP_PORT_HW_EXT_STATS_LEN + \ MLXSW_SP_PORT_HW_DISCARD_STATS_LEN + \ (MLXSW_SP_PORT_HW_PRIO_STATS_LEN * \ IEEE_8021QAZ_MAX_TCS) + \ @@ -2413,6 +2411,12 @@ static void mlxsw_sp_port_get_strings(struct net_device *dev, p += ETH_GSTRING_LEN; } + for (i = 0; i < MLXSW_SP_PORT_HW_EXT_STATS_LEN; i++) { + memcpy(p, mlxsw_sp_port_hw_ext_stats[i].str, + ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + } + for (i = 0; i < MLXSW_SP_PORT_HW_DISCARD_STATS_LEN; i++) { memcpy(p, mlxsw_sp_port_hw_discard_stats[i].str, ETH_GSTRING_LEN); @@ -2474,6 +2478,10 @@ mlxsw_sp_get_hw_stats_by_group(struct mlxsw_sp_port_hw_stats **p_hw_stats, *p_hw_stats = mlxsw_sp_port_hw_rfc_3635_stats; *p_len = MLXSW_SP_PORT_HW_RFC_3635_STATS_LEN; break; + case MLXSW_REG_PPCNT_EXT_CNT: + *p_hw_stats = mlxsw_sp_port_hw_ext_stats; + *p_len = MLXSW_SP_PORT_HW_EXT_STATS_LEN; + break; case MLXSW_REG_PPCNT_DISCARD_CNT: *p_hw_stats = mlxsw_sp_port_hw_discard_stats; *p_len = MLXSW_SP_PORT_HW_DISCARD_STATS_LEN; @@ -2543,6 +2551,11 @@ static void mlxsw_sp_port_get_stats(struct net_device *dev, data, data_index); data_index += MLXSW_SP_PORT_HW_RFC_3635_STATS_LEN; + /* Extended Counters */ + __mlxsw_sp_port_get_stats(dev, MLXSW_REG_PPCNT_EXT_CNT, 0, + data, data_index); + data_index += MLXSW_SP_PORT_HW_EXT_STATS_LEN; + /* Discard Counters */ __mlxsw_sp_port_get_stats(dev, MLXSW_REG_PPCNT_DISCARD_CNT, 0, data, data_index); @@ -2788,27 +2801,6 @@ static u32 mlxsw_sp1_to_ptys_speed(struct mlxsw_sp *mlxsw_sp, u8 width, return ptys_proto; } -static u32 -mlxsw_sp1_to_ptys_upper_speed(struct mlxsw_sp *mlxsw_sp, u32 upper_speed) -{ - u32 ptys_proto = 0; - int i; - - for (i = 0; i < MLXSW_SP1_PORT_LINK_MODE_LEN; i++) { - if (mlxsw_sp1_port_link_mode[i].speed <= upper_speed) - ptys_proto |= mlxsw_sp1_port_link_mode[i].mask; - } - return ptys_proto; -} - -static int -mlxsw_sp1_port_speed_base(struct mlxsw_sp *mlxsw_sp, u8 local_port, - u32 *base_speed) -{ - *base_speed = MLXSW_SP_PORT_BASE_SPEED_25G; - return 0; -} - static void mlxsw_sp1_reg_ptys_eth_pack(struct mlxsw_sp *mlxsw_sp, char *payload, u8 local_port, u32 proto_admin, bool autoneg) @@ -2833,8 +2825,6 @@ mlxsw_sp1_port_type_speed_ops = { .from_ptys_speed_duplex = mlxsw_sp1_from_ptys_speed_duplex, .to_ptys_advert_link = mlxsw_sp1_to_ptys_advert_link, .to_ptys_speed = mlxsw_sp1_to_ptys_speed, - .to_ptys_upper_speed = mlxsw_sp1_to_ptys_upper_speed, - .port_speed_base = mlxsw_sp1_port_speed_base, .reg_ptys_eth_pack = mlxsw_sp1_reg_ptys_eth_pack, .reg_ptys_eth_unpack = mlxsw_sp1_reg_ptys_eth_unpack, }; @@ -3235,51 +3225,6 @@ static u32 mlxsw_sp2_to_ptys_speed(struct mlxsw_sp *mlxsw_sp, return ptys_proto; } -static u32 -mlxsw_sp2_to_ptys_upper_speed(struct mlxsw_sp *mlxsw_sp, u32 upper_speed) -{ - u32 ptys_proto = 0; - int i; - - for (i = 0; i < MLXSW_SP2_PORT_LINK_MODE_LEN; i++) { - if (mlxsw_sp2_port_link_mode[i].speed <= upper_speed) - ptys_proto |= mlxsw_sp2_port_link_mode[i].mask; - } - return ptys_proto; -} - -static int -mlxsw_sp2_port_speed_base(struct mlxsw_sp *mlxsw_sp, u8 local_port, - u32 *base_speed) -{ - char ptys_pl[MLXSW_REG_PTYS_LEN]; - u32 eth_proto_cap; - int err; - - /* In Spectrum-2, the speed of 1x can change from port to port, so query - * it from firmware. - */ - mlxsw_reg_ptys_ext_eth_pack(ptys_pl, local_port, 0, false); - err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); - if (err) - return err; - mlxsw_reg_ptys_ext_eth_unpack(ptys_pl, ð_proto_cap, NULL, NULL); - - if (eth_proto_cap & - MLXSW_REG_PTYS_EXT_ETH_SPEED_50GAUI_1_LAUI_1_50GBASE_CR_KR) { - *base_speed = MLXSW_SP_PORT_BASE_SPEED_50G; - return 0; - } - - if (eth_proto_cap & - MLXSW_REG_PTYS_EXT_ETH_SPEED_25GAUI_1_25GBASE_CR_KR) { - *base_speed = MLXSW_SP_PORT_BASE_SPEED_25G; - return 0; - } - - return -EIO; -} - static void mlxsw_sp2_reg_ptys_eth_pack(struct mlxsw_sp *mlxsw_sp, char *payload, u8 local_port, u32 proto_admin, @@ -3305,8 +3250,6 @@ mlxsw_sp2_port_type_speed_ops = { .from_ptys_speed_duplex = mlxsw_sp2_from_ptys_speed_duplex, .to_ptys_advert_link = mlxsw_sp2_to_ptys_advert_link, .to_ptys_speed = mlxsw_sp2_to_ptys_speed, - .to_ptys_upper_speed = mlxsw_sp2_to_ptys_upper_speed, - .port_speed_base = mlxsw_sp2_port_speed_base, .reg_ptys_eth_pack = mlxsw_sp2_reg_ptys_eth_pack, .reg_ptys_eth_unpack = mlxsw_sp2_reg_ptys_eth_unpack, }; @@ -3520,24 +3463,24 @@ static int mlxsw_sp_port_speed_by_width_set(struct mlxsw_sp_port *mlxsw_sp_port) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u32 eth_proto_cap, eth_proto_admin, eth_proto_oper; const struct mlxsw_sp_port_type_speed_ops *ops; char ptys_pl[MLXSW_REG_PTYS_LEN]; - u32 eth_proto_admin; - u32 upper_speed; - u32 base_speed; int err; ops = mlxsw_sp->port_type_speed_ops; - err = ops->port_speed_base(mlxsw_sp, mlxsw_sp_port->local_port, - &base_speed); + /* Set advertised speeds to supported speeds. */ + ops->reg_ptys_eth_pack(mlxsw_sp, ptys_pl, mlxsw_sp_port->local_port, + 0, false); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); if (err) return err; - upper_speed = base_speed * mlxsw_sp_port->mapping.width; - eth_proto_admin = ops->to_ptys_upper_speed(mlxsw_sp, upper_speed); + ops->reg_ptys_eth_unpack(mlxsw_sp, ptys_pl, ð_proto_cap, + ð_proto_admin, ð_proto_oper); ops->reg_ptys_eth_pack(mlxsw_sp, ptys_pl, mlxsw_sp_port->local_port, - eth_proto_admin, mlxsw_sp_port->link.autoneg); + eth_proto_cap, mlxsw_sp_port->link.autoneg); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); } @@ -4614,6 +4557,7 @@ static const struct mlxsw_listener mlxsw_sp1_listener[] = { static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core) { + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); char qpcr_pl[MLXSW_REG_QPCR_LEN]; enum mlxsw_reg_qpcr_ir_units ir_units; int max_cpu_policers; @@ -4636,7 +4580,6 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core) case MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF: case MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM: - case MLXSW_REG_HTGT_TRAP_GROUP_SP_RPF: case MLXSW_REG_HTGT_TRAP_GROUP_SP_LBERROR: rate = 128; burst_size = 7; @@ -4649,7 +4592,6 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core) case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP: - case MLXSW_REG_HTGT_TRAP_GROUP_SP_HOST_MISS: case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE: case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND: @@ -4677,6 +4619,7 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core) continue; } + __set_bit(i, mlxsw_sp->trap->policers_usage); mlxsw_reg_qpcr_pack(qpcr_pl, i, ir_units, is_bytes, rate, burst_size); err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(qpcr), qpcr_pl); @@ -4729,19 +4672,20 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core) break; case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND: - case MLXSW_REG_HTGT_TRAP_GROUP_SP_RPF: case MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP1: priority = 2; tc = 2; break; - case MLXSW_REG_HTGT_TRAP_GROUP_SP_HOST_MISS: case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE: case MLXSW_REG_HTGT_TRAP_GROUP_SP_MULTICAST: - case MLXSW_REG_HTGT_TRAP_GROUP_SP_LBERROR: priority = 1; tc = 1; break; + case MLXSW_REG_HTGT_TRAP_GROUP_SP_LBERROR: + priority = 0; + tc = 1; + break; case MLXSW_REG_HTGT_TRAP_GROUP_SP_EVENT: priority = MLXSW_REG_HTGT_DEFAULT_PRIORITY; tc = MLXSW_REG_HTGT_DEFAULT_TC; @@ -4805,20 +4749,32 @@ static void mlxsw_sp_traps_unregister(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp) { + struct mlxsw_sp_trap *trap; + u64 max_policers; int err; + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_CPU_POLICERS)) + return -EIO; + max_policers = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_CPU_POLICERS); + trap = kzalloc(struct_size(trap, policers_usage, + BITS_TO_LONGS(max_policers)), GFP_KERNEL); + if (!trap) + return -ENOMEM; + trap->max_policers = max_policers; + mlxsw_sp->trap = trap; + err = mlxsw_sp_cpu_policers_set(mlxsw_sp->core); if (err) - return err; + goto err_cpu_policers_set; err = mlxsw_sp_trap_groups_set(mlxsw_sp->core); if (err) - return err; + goto err_trap_groups_set; err = mlxsw_sp_traps_register(mlxsw_sp, mlxsw_sp_listener, ARRAY_SIZE(mlxsw_sp_listener)); if (err) - return err; + goto err_traps_register; err = mlxsw_sp_traps_register(mlxsw_sp, mlxsw_sp->listeners, mlxsw_sp->listeners_count); @@ -4830,6 +4786,10 @@ static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp) err_extra_traps_init: mlxsw_sp_traps_unregister(mlxsw_sp, mlxsw_sp_listener, ARRAY_SIZE(mlxsw_sp_listener)); +err_traps_register: +err_trap_groups_set: +err_cpu_policers_set: + kfree(trap); return err; } @@ -4839,6 +4799,7 @@ static void mlxsw_sp_traps_fini(struct mlxsw_sp *mlxsw_sp) mlxsw_sp->listeners_count); mlxsw_sp_traps_unregister(mlxsw_sp, mlxsw_sp_listener, ARRAY_SIZE(mlxsw_sp_listener)); + kfree(mlxsw_sp->trap); } #define MLXSW_SP_LAG_SEED_INIT 0xcafecafe @@ -4935,16 +4896,35 @@ static const struct mlxsw_sp_span_ops mlxsw_sp1_span_ops = { }; #define MLXSW_SP2_SPAN_EG_MIRROR_BUFFER_FACTOR 38 +#define MLXSW_SP3_SPAN_EG_MIRROR_BUFFER_FACTOR 50 + +static u32 __mlxsw_sp_span_buffsize_get(int mtu, u32 speed, u32 buffer_factor) +{ + return 3 * mtu + buffer_factor * speed / 1000; +} static u32 mlxsw_sp2_span_buffsize_get(int mtu, u32 speed) { - return 3 * mtu + MLXSW_SP2_SPAN_EG_MIRROR_BUFFER_FACTOR * speed / 1000; + int factor = MLXSW_SP2_SPAN_EG_MIRROR_BUFFER_FACTOR; + + return __mlxsw_sp_span_buffsize_get(mtu, speed, factor); } static const struct mlxsw_sp_span_ops mlxsw_sp2_span_ops = { .buffsize_get = mlxsw_sp2_span_buffsize_get, }; +static u32 mlxsw_sp3_span_buffsize_get(int mtu, u32 speed) +{ + int factor = MLXSW_SP3_SPAN_EG_MIRROR_BUFFER_FACTOR; + + return __mlxsw_sp_span_buffsize_get(mtu, speed, factor); +} + +static const struct mlxsw_sp_span_ops mlxsw_sp3_span_ops = { + .buffsize_get = mlxsw_sp3_span_buffsize_get, +}; + u32 mlxsw_sp_span_buffsize_get(struct mlxsw_sp *mlxsw_sp, int mtu, u32 speed) { u32 buffsize = mlxsw_sp->span_ops->buffsize_get(speed, mtu); @@ -5223,7 +5203,7 @@ static int mlxsw_sp3_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->sb_vals = &mlxsw_sp2_sb_vals; mlxsw_sp->port_type_speed_ops = &mlxsw_sp2_port_type_speed_ops; mlxsw_sp->ptp_ops = &mlxsw_sp2_ptp_ops; - mlxsw_sp->span_ops = &mlxsw_sp2_span_ops; + mlxsw_sp->span_ops = &mlxsw_sp3_span_ops; mlxsw_sp->lowest_shaper_bs = MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP3; return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack); @@ -5460,8 +5440,13 @@ static int mlxsw_sp1_resources_register(struct mlxsw_core *mlxsw_core) if (err) goto err_resources_span_register; + err = mlxsw_sp_counter_resources_register(mlxsw_core); + if (err) + goto err_resources_counter_register; + return 0; +err_resources_counter_register: err_resources_span_register: devlink_resources_unregister(priv_to_devlink(mlxsw_core), NULL); return err; @@ -5479,8 +5464,13 @@ static int mlxsw_sp2_resources_register(struct mlxsw_core *mlxsw_core) if (err) goto err_resources_span_register; + err = mlxsw_sp_counter_resources_register(mlxsw_core); + if (err) + goto err_resources_counter_register; + return 0; +err_resources_counter_register: err_resources_span_register: devlink_resources_unregister(priv_to_devlink(mlxsw_core), NULL); return err; @@ -5684,6 +5674,11 @@ static struct mlxsw_driver mlxsw_sp1_driver = { .trap_fini = mlxsw_sp_trap_fini, .trap_action_set = mlxsw_sp_trap_action_set, .trap_group_init = mlxsw_sp_trap_group_init, + .trap_group_set = mlxsw_sp_trap_group_set, + .trap_policer_init = mlxsw_sp_trap_policer_init, + .trap_policer_fini = mlxsw_sp_trap_policer_fini, + .trap_policer_set = mlxsw_sp_trap_policer_set, + .trap_policer_counter_get = mlxsw_sp_trap_policer_counter_get, .txhdr_construct = mlxsw_sp_txhdr_construct, .resources_register = mlxsw_sp1_resources_register, .kvd_sizes_get = mlxsw_sp_kvd_sizes_get, @@ -5718,6 +5713,11 @@ static struct mlxsw_driver mlxsw_sp2_driver = { .trap_fini = mlxsw_sp_trap_fini, .trap_action_set = mlxsw_sp_trap_action_set, .trap_group_init = mlxsw_sp_trap_group_init, + .trap_group_set = mlxsw_sp_trap_group_set, + .trap_policer_init = mlxsw_sp_trap_policer_init, + .trap_policer_fini = mlxsw_sp_trap_policer_fini, + .trap_policer_set = mlxsw_sp_trap_policer_set, + .trap_policer_counter_get = mlxsw_sp_trap_policer_counter_get, .txhdr_construct = mlxsw_sp_txhdr_construct, .resources_register = mlxsw_sp2_resources_register, .params_register = mlxsw_sp2_params_register, @@ -5751,6 +5751,11 @@ static struct mlxsw_driver mlxsw_sp3_driver = { .trap_fini = mlxsw_sp_trap_fini, .trap_action_set = mlxsw_sp_trap_action_set, .trap_group_init = mlxsw_sp_trap_group_init, + .trap_group_set = mlxsw_sp_trap_group_set, + .trap_policer_init = mlxsw_sp_trap_policer_init, + .trap_policer_fini = mlxsw_sp_trap_policer_fini, + .trap_policer_set = mlxsw_sp_trap_policer_set, + .trap_policer_counter_get = mlxsw_sp_trap_policer_counter_get, .txhdr_construct = mlxsw_sp_txhdr_construct, .resources_register = mlxsw_sp2_resources_register, .params_register = mlxsw_sp2_params_register, @@ -6316,7 +6321,7 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, return -EINVAL; } if (netif_is_macvlan(upper_dev) && - !mlxsw_sp_rif_find_by_dev(mlxsw_sp, lower_dev)) { + !mlxsw_sp_rif_exists(mlxsw_sp, lower_dev)) { NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces"); return -EOPNOTSUPP; } @@ -6472,7 +6477,7 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev, return -EINVAL; } if (netif_is_macvlan(upper_dev) && - !mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan_dev)) { + !mlxsw_sp_rif_exists(mlxsw_sp, vlan_dev)) { NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces"); return -EOPNOTSUPP; } @@ -6549,7 +6554,7 @@ static int mlxsw_sp_netdevice_bridge_vlan_event(struct net_device *vlan_dev, if (!info->linking) break; if (netif_is_macvlan(upper_dev) && - !mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan_dev)) { + !mlxsw_sp_rif_exists(mlxsw_sp, vlan_dev)) { NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces"); return -EOPNOTSUPP; } @@ -6609,7 +6614,7 @@ static int mlxsw_sp_netdevice_bridge_event(struct net_device *br_dev, if (!info->linking) break; if (netif_is_macvlan(upper_dev) && - !mlxsw_sp_rif_find_by_dev(mlxsw_sp, br_dev)) { + !mlxsw_sp_rif_exists(mlxsw_sp, br_dev)) { NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces"); return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index a0f1f9dceec5..ca56e72cb4b7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -19,6 +19,7 @@ #include <net/pkt_cls.h> #include <net/red.h> #include <net/vxlan.h> +#include <net/flow_offload.h> #include "port.h" #include "core.h" @@ -32,9 +33,6 @@ #define MLXSW_SP_MID_MAX 7000 -#define MLXSW_SP_PORT_BASE_SPEED_25G 25000 /* Mb/s */ -#define MLXSW_SP_PORT_BASE_SPEED_50G 50000 /* Mb/s */ - #define MLXSW_SP_KVD_LINEAR_SIZE 98304 /* entries */ #define MLXSW_SP_KVD_GRANULARITY 128 @@ -48,6 +46,10 @@ #define MLXSW_SP_RESOURCE_NAME_SPAN "span_agents" +#define MLXSW_SP_RESOURCE_NAME_COUNTERS "counters" +#define MLXSW_SP_RESOURCE_NAME_COUNTERS_FLOW "flow" +#define MLXSW_SP_RESOURCE_NAME_COUNTERS_RIF "rif" + enum mlxsw_sp_resource_id { MLXSW_SP_RESOURCE_KVD = 1, MLXSW_SP_RESOURCE_KVD_LINEAR, @@ -57,6 +59,9 @@ enum mlxsw_sp_resource_id { MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS, MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS, MLXSW_SP_RESOURCE_SPAN, + MLXSW_SP_RESOURCE_COUNTERS, + MLXSW_SP_RESOURCE_COUNTERS_FLOW, + MLXSW_SP_RESOURCE_COUNTERS_RIF, }; struct mlxsw_sp_port; @@ -141,6 +146,7 @@ struct mlxsw_sp_port_type_speed_ops; struct mlxsw_sp_ptp_state; struct mlxsw_sp_ptp_ops; struct mlxsw_sp_span_ops; +struct mlxsw_sp_qdisc_state; struct mlxsw_sp_port_mapping { u8 module; @@ -168,12 +174,9 @@ struct mlxsw_sp { struct notifier_block netdevice_nb; struct mlxsw_sp_ptp_clock *clock; struct mlxsw_sp_ptp_state *ptp_state; - struct mlxsw_sp_counter_pool *counter_pool; - struct { - struct mlxsw_sp_span_entry *entries; - int entries_count; - } span; + struct mlxsw_sp_span *span; + struct mlxsw_sp_trap *trap; const struct mlxsw_fw_rev *req_rev; const char *fw_filename; const struct mlxsw_sp_kvdl_ops *kvdl_ops; @@ -282,8 +285,7 @@ struct mlxsw_sp_port { struct mlxsw_sp_port_sample *sample; struct list_head vlans_list; struct mlxsw_sp_port_vlan *default_vlan; - struct mlxsw_sp_qdisc *root_qdisc; - struct mlxsw_sp_qdisc *tclass_qdiscs; + struct mlxsw_sp_qdisc_state *qdisc; unsigned acl_rule_count; struct mlxsw_sp_acl_block *ing_acl_block; struct mlxsw_sp_acl_block *eg_acl_block; @@ -313,9 +315,6 @@ struct mlxsw_sp_port_type_speed_ops { u32 (*to_ptys_advert_link)(struct mlxsw_sp *mlxsw_sp, u8 width, const struct ethtool_link_ksettings *cmd); u32 (*to_ptys_speed)(struct mlxsw_sp *mlxsw_sp, u8 width, u32 speed); - u32 (*to_ptys_upper_speed)(struct mlxsw_sp *mlxsw_sp, u32 upper_speed); - int (*port_speed_base)(struct mlxsw_sp *mlxsw_sp, u8 local_port, - u32 *base_speed); void (*reg_ptys_eth_pack)(struct mlxsw_sp *mlxsw_sp, char *payload, u8 local_port, u32 proto_admin, bool autoneg); void (*reg_ptys_eth_unpack)(struct mlxsw_sp *mlxsw_sp, char *payload, @@ -468,10 +467,6 @@ int mlxsw_sp_bridge_vxlan_join(struct mlxsw_sp *mlxsw_sp, struct netlink_ext_ack *extack); void mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp, const struct net_device *vxlan_dev); -struct mlxsw_sp_fid *mlxsw_sp_bridge_fid_get(struct mlxsw_sp *mlxsw_sp, - const struct net_device *br_dev, - u16 vid, - struct netlink_ext_ack *extack); extern struct notifier_block mlxsw_sp_switchdev_notifier; /* spectrum.c */ @@ -556,7 +551,7 @@ int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, struct netdev_notifier_changeupper_info *info); bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp, const struct net_device *dev); -bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp *mlxsw_sp, +bool mlxsw_sp_netdev_is_ipip_ul(struct mlxsw_sp *mlxsw_sp, const struct net_device *dev); int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp, struct net_device *l3_dev, @@ -571,10 +566,10 @@ void mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan); void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp, struct net_device *dev); -struct mlxsw_sp_rif *mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp, - const struct net_device *dev); +bool mlxsw_sp_rif_exists(struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev); +u16 mlxsw_sp_rif_vid(struct mlxsw_sp *mlxsw_sp, const struct net_device *dev); u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp); -struct mlxsw_sp_fid *mlxsw_sp_rif_fid(const struct mlxsw_sp_rif *rif); int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, enum mlxsw_sp_l3proto ul_proto, const union mlxsw_sp_l3addr *ul_sip, @@ -653,7 +648,9 @@ struct mlxsw_sp_acl_rule_info { struct mlxsw_afk_element_values values; struct mlxsw_afa_block *act_block; u8 action_created:1, - egress_bind_blocker:1; + ingress_bind_blocker:1, + egress_bind_blocker:1, + counter_valid:1; unsigned int counter_index; }; @@ -672,16 +669,20 @@ struct mlxsw_sp_acl_block { struct mlxsw_sp *mlxsw_sp; unsigned int rule_count; unsigned int disable_count; + unsigned int ingress_blocker_rule_count; unsigned int egress_blocker_rule_count; + unsigned int ingress_binding_count; + unsigned int egress_binding_count; struct net *net; }; struct mlxsw_afk *mlxsw_sp_acl_afk(struct mlxsw_sp_acl *acl); struct mlxsw_sp *mlxsw_sp_acl_block_mlxsw_sp(struct mlxsw_sp_acl_block *block); -unsigned int mlxsw_sp_acl_block_rule_count(struct mlxsw_sp_acl_block *block); +unsigned int +mlxsw_sp_acl_block_rule_count(const struct mlxsw_sp_acl_block *block); void mlxsw_sp_acl_block_disable_inc(struct mlxsw_sp_acl_block *block); void mlxsw_sp_acl_block_disable_dec(struct mlxsw_sp_acl_block *block); -bool mlxsw_sp_acl_block_disabled(struct mlxsw_sp_acl_block *block); +bool mlxsw_sp_acl_block_disabled(const struct mlxsw_sp_acl_block *block); struct mlxsw_sp_acl_block *mlxsw_sp_acl_block_create(struct mlxsw_sp *mlxsw_sp, struct net *net); void mlxsw_sp_acl_block_destroy(struct mlxsw_sp_acl_block *block); @@ -694,7 +695,9 @@ int mlxsw_sp_acl_block_unbind(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_block *block, struct mlxsw_sp_port *mlxsw_sp_port, bool ingress); -bool mlxsw_sp_acl_block_is_egress_bound(struct mlxsw_sp_acl_block *block); +bool mlxsw_sp_acl_block_is_egress_bound(const struct mlxsw_sp_acl_block *block); +bool mlxsw_sp_acl_block_is_ingress_bound(const struct mlxsw_sp_acl_block *block); +bool mlxsw_sp_acl_block_is_mixed_bound(const struct mlxsw_sp_acl_block *block); struct mlxsw_sp_acl_ruleset * mlxsw_sp_acl_ruleset_lookup(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_block *block, u32 chain_index, @@ -726,7 +729,10 @@ int mlxsw_sp_acl_rulei_act_continue(struct mlxsw_sp_acl_rule_info *rulei); int mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei, u16 group_id); int mlxsw_sp_acl_rulei_act_terminate(struct mlxsw_sp_acl_rule_info *rulei); -int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei); +int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei, + bool ingress, + const struct flow_action_cookie *fa_cookie, + struct netlink_ext_ack *extack); int mlxsw_sp_acl_rulei_act_trap(struct mlxsw_sp_acl_rule_info *rulei); int mlxsw_sp_acl_rulei_act_mirror(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei, @@ -741,6 +747,14 @@ int mlxsw_sp_acl_rulei_act_vlan(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei, u32 action, u16 vid, u16 proto, u8 prio, struct netlink_ext_ack *extack); +int mlxsw_sp_acl_rulei_act_priority(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + u32 prio, struct netlink_ext_ack *extack); +int mlxsw_sp_acl_rulei_act_mangle(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + enum flow_action_mangle_base htype, + u32 offset, u32 mask, u32 val, + struct netlink_ext_ack *extack); int mlxsw_sp_acl_rulei_act_count(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei, struct netlink_ext_ack *extack); @@ -773,10 +787,17 @@ struct mlxsw_sp_acl_rule_info * mlxsw_sp_acl_rule_rulei(struct mlxsw_sp_acl_rule *rule); int mlxsw_sp_acl_rule_get_stats(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule *rule, - u64 *packets, u64 *bytes, u64 *last_use); + u64 *packets, u64 *bytes, u64 *last_use, + enum flow_action_hw_stats *used_hw_stats); struct mlxsw_sp_fid *mlxsw_sp_acl_dummy_fid(struct mlxsw_sp *mlxsw_sp); +static inline const struct flow_action_cookie * +mlxsw_sp_acl_act_cookie_lookup(struct mlxsw_sp *mlxsw_sp, u32 cookie_index) +{ + return mlxsw_afa_cookie_lookup(mlxsw_sp->afa, cookie_index); +} + int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_acl_fini(struct mlxsw_sp *mlxsw_sp); u32 mlxsw_sp_acl_region_rehash_intrvl_get(struct mlxsw_sp *mlxsw_sp); @@ -864,6 +885,8 @@ int mlxsw_sp_setup_tc_ets(struct mlxsw_sp_port *mlxsw_sp_port, struct tc_ets_qopt_offload *p); int mlxsw_sp_setup_tc_tbf(struct mlxsw_sp_port *mlxsw_sp_port, struct tc_tbf_qopt_offload *p); +int mlxsw_sp_setup_tc_fifo(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_fifo_qopt_offload *p); /* spectrum_fid.c */ bool mlxsw_sp_fid_is_dummy(struct mlxsw_sp *mlxsw_sp, u16 fid_index); @@ -974,9 +997,6 @@ void mlxsw_sp_nve_flood_ip_del(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *fid, enum mlxsw_sp_l3proto proto, union mlxsw_sp_l3addr *addr); -u32 mlxsw_sp_nve_decap_tunnel_index_get(const struct mlxsw_sp *mlxsw_sp); -bool mlxsw_sp_nve_ipv4_route_is_decap(const struct mlxsw_sp *mlxsw_sp, - u32 tb_id, __be32 addr); int mlxsw_sp_nve_fid_enable(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *fid, struct mlxsw_sp_nve_params *params, struct netlink_ext_ack *extack); @@ -1003,6 +1023,22 @@ int mlxsw_sp_trap_action_set(struct mlxsw_core *mlxsw_core, enum devlink_trap_action action); int mlxsw_sp_trap_group_init(struct mlxsw_core *mlxsw_core, const struct devlink_trap_group *group); +int mlxsw_sp_trap_group_set(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_group *group, + const struct devlink_trap_policer *policer); +int +mlxsw_sp_trap_policer_init(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_policer *policer); +void mlxsw_sp_trap_policer_fini(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_policer *policer); +int +mlxsw_sp_trap_policer_set(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_policer *policer, + u64 rate, u64 burst, struct netlink_ext_ack *extack); +int +mlxsw_sp_trap_policer_counter_get(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_policer *policer, + u64 *p_drops); static inline struct net *mlxsw_sp_net(struct mlxsw_sp *mlxsw_sp) { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c index 09ee0a807747..a9fff8adc75e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c @@ -60,7 +60,7 @@ static const struct mlxsw_sp1_kvdl_part_info mlxsw_sp1_kvdl_parts_info[] = { struct mlxsw_sp1_kvdl_part { struct mlxsw_sp1_kvdl_part_info info; - unsigned long usage[0]; /* Entries */ + unsigned long usage[]; /* Entries */ }; struct mlxsw_sp1_kvdl { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_kvdl.c index 8d14770766b4..3a73d654017f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_kvdl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_kvdl.c @@ -45,7 +45,7 @@ struct mlxsw_sp2_kvdl_part { unsigned int usage_bit_count; unsigned int indexes_per_usage_bit; unsigned int last_allocated_bit; - unsigned long usage[0]; /* Usage bits */ + unsigned long usage[]; /* Usage bits */ }; struct mlxsw_sp2_kvdl { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c index 3d3cca596116..67ee880a8727 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c @@ -58,7 +58,7 @@ struct mlxsw_sp_acl_ruleset { struct mlxsw_sp_acl_ruleset_ht_key ht_key; struct rhashtable rule_ht; unsigned int ref_count; - unsigned long priv[0]; + unsigned long priv[]; /* priv has to be always the last item */ }; @@ -71,7 +71,7 @@ struct mlxsw_sp_acl_rule { u64 last_used; u64 last_packets; u64 last_bytes; - unsigned long priv[0]; + unsigned long priv[]; /* priv has to be always the last item */ }; @@ -99,7 +99,8 @@ struct mlxsw_sp *mlxsw_sp_acl_block_mlxsw_sp(struct mlxsw_sp_acl_block *block) return block->mlxsw_sp; } -unsigned int mlxsw_sp_acl_block_rule_count(struct mlxsw_sp_acl_block *block) +unsigned int +mlxsw_sp_acl_block_rule_count(const struct mlxsw_sp_acl_block *block) { return block ? block->rule_count : 0; } @@ -116,20 +117,24 @@ void mlxsw_sp_acl_block_disable_dec(struct mlxsw_sp_acl_block *block) block->disable_count--; } -bool mlxsw_sp_acl_block_disabled(struct mlxsw_sp_acl_block *block) +bool mlxsw_sp_acl_block_disabled(const struct mlxsw_sp_acl_block *block) { return block->disable_count; } -bool mlxsw_sp_acl_block_is_egress_bound(struct mlxsw_sp_acl_block *block) +bool mlxsw_sp_acl_block_is_egress_bound(const struct mlxsw_sp_acl_block *block) { - struct mlxsw_sp_acl_block_binding *binding; + return block->egress_binding_count; +} - list_for_each_entry(binding, &block->binding_list, list) { - if (!binding->ingress) - return true; - } - return false; +bool mlxsw_sp_acl_block_is_ingress_bound(const struct mlxsw_sp_acl_block *block) +{ + return block->ingress_binding_count; +} + +bool mlxsw_sp_acl_block_is_mixed_bound(const struct mlxsw_sp_acl_block *block) +{ + return block->ingress_binding_count && block->egress_binding_count; } static bool @@ -163,7 +168,8 @@ mlxsw_sp_acl_ruleset_unbind(struct mlxsw_sp *mlxsw_sp, binding->mlxsw_sp_port, binding->ingress); } -static bool mlxsw_sp_acl_ruleset_block_bound(struct mlxsw_sp_acl_block *block) +static bool +mlxsw_sp_acl_ruleset_block_bound(const struct mlxsw_sp_acl_block *block) { return block->ruleset_zero; } @@ -250,6 +256,11 @@ int mlxsw_sp_acl_block_bind(struct mlxsw_sp *mlxsw_sp, if (WARN_ON(mlxsw_sp_acl_block_lookup(block, mlxsw_sp_port, ingress))) return -EEXIST; + if (ingress && block->ingress_blocker_rule_count) { + NL_SET_ERR_MSG_MOD(extack, "Block cannot be bound to ingress because it contains unsupported rules"); + return -EOPNOTSUPP; + } + if (!ingress && block->egress_blocker_rule_count) { NL_SET_ERR_MSG_MOD(extack, "Block cannot be bound to egress because it contains unsupported rules"); return -EOPNOTSUPP; @@ -267,6 +278,10 @@ int mlxsw_sp_acl_block_bind(struct mlxsw_sp *mlxsw_sp, goto err_ruleset_bind; } + if (ingress) + block->ingress_binding_count++; + else + block->egress_binding_count++; list_add(&binding->list, &block->binding_list); return 0; @@ -288,6 +303,11 @@ int mlxsw_sp_acl_block_unbind(struct mlxsw_sp *mlxsw_sp, list_del(&binding->list); + if (ingress) + block->ingress_binding_count--; + else + block->egress_binding_count--; + if (mlxsw_sp_acl_ruleset_block_bound(block)) mlxsw_sp_acl_ruleset_unbind(mlxsw_sp, block, binding); @@ -515,9 +535,13 @@ int mlxsw_sp_acl_rulei_act_terminate(struct mlxsw_sp_acl_rule_info *rulei) return mlxsw_afa_block_terminate(rulei->act_block); } -int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei) +int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei, + bool ingress, + const struct flow_action_cookie *fa_cookie, + struct netlink_ext_ack *extack) { - return mlxsw_afa_block_append_drop(rulei->act_block); + return mlxsw_afa_block_append_drop(rulei->act_block, ingress, + fa_cookie, extack); } int mlxsw_sp_acl_rulei_act_trap(struct mlxsw_sp_acl_rule_info *rulei) @@ -614,12 +638,126 @@ int mlxsw_sp_acl_rulei_act_vlan(struct mlxsw_sp *mlxsw_sp, } } +int mlxsw_sp_acl_rulei_act_priority(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + u32 prio, struct netlink_ext_ack *extack) +{ + /* Even though both Linux and Spectrum switches support 16 priorities, + * spectrum_qdisc only processes the first eight priomap elements, and + * the DCB and PFC features are tied to 8 priorities as well. Therefore + * bounce attempts to prioritize packets to higher priorities. + */ + if (prio >= IEEE_8021QAZ_MAX_TCS) { + NL_SET_ERR_MSG_MOD(extack, "Only priorities 0..7 are supported"); + return -EINVAL; + } + return mlxsw_afa_block_append_qos_switch_prio(rulei->act_block, prio, + extack); +} + +enum mlxsw_sp_acl_mangle_field { + MLXSW_SP_ACL_MANGLE_FIELD_IP_DSFIELD, + MLXSW_SP_ACL_MANGLE_FIELD_IP_DSCP, + MLXSW_SP_ACL_MANGLE_FIELD_IP_ECN, +}; + +struct mlxsw_sp_acl_mangle_action { + enum flow_action_mangle_base htype; + /* Offset is u32-aligned. */ + u32 offset; + /* Mask bits are unset for the modified field. */ + u32 mask; + /* Shift required to extract the set value. */ + u32 shift; + enum mlxsw_sp_acl_mangle_field field; +}; + +#define MLXSW_SP_ACL_MANGLE_ACTION(_htype, _offset, _mask, _shift, _field) \ + { \ + .htype = _htype, \ + .offset = _offset, \ + .mask = _mask, \ + .shift = _shift, \ + .field = MLXSW_SP_ACL_MANGLE_FIELD_##_field, \ + } + +#define MLXSW_SP_ACL_MANGLE_ACTION_IP4(_offset, _mask, _shift, _field) \ + MLXSW_SP_ACL_MANGLE_ACTION(FLOW_ACT_MANGLE_HDR_TYPE_IP4, \ + _offset, _mask, _shift, _field) + +#define MLXSW_SP_ACL_MANGLE_ACTION_IP6(_offset, _mask, _shift, _field) \ + MLXSW_SP_ACL_MANGLE_ACTION(FLOW_ACT_MANGLE_HDR_TYPE_IP6, \ + _offset, _mask, _shift, _field) + +static struct mlxsw_sp_acl_mangle_action mlxsw_sp_acl_mangle_actions[] = { + MLXSW_SP_ACL_MANGLE_ACTION_IP4(0, 0xff00ffff, 16, IP_DSFIELD), + MLXSW_SP_ACL_MANGLE_ACTION_IP4(0, 0xff03ffff, 18, IP_DSCP), + MLXSW_SP_ACL_MANGLE_ACTION_IP4(0, 0xfffcffff, 16, IP_ECN), + MLXSW_SP_ACL_MANGLE_ACTION_IP6(0, 0xf00fffff, 20, IP_DSFIELD), + MLXSW_SP_ACL_MANGLE_ACTION_IP6(0, 0xf03fffff, 22, IP_DSCP), + MLXSW_SP_ACL_MANGLE_ACTION_IP6(0, 0xffcfffff, 20, IP_ECN), +}; + +static int +mlxsw_sp_acl_rulei_act_mangle_field(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + struct mlxsw_sp_acl_mangle_action *mact, + u32 val, struct netlink_ext_ack *extack) +{ + switch (mact->field) { + case MLXSW_SP_ACL_MANGLE_FIELD_IP_DSFIELD: + return mlxsw_afa_block_append_qos_dsfield(rulei->act_block, + val, extack); + case MLXSW_SP_ACL_MANGLE_FIELD_IP_DSCP: + return mlxsw_afa_block_append_qos_dscp(rulei->act_block, + val, extack); + case MLXSW_SP_ACL_MANGLE_FIELD_IP_ECN: + return mlxsw_afa_block_append_qos_ecn(rulei->act_block, + val, extack); + } + + /* We shouldn't have gotten a match in the first place! */ + WARN_ONCE(1, "Unhandled mangle field"); + return -EINVAL; +} + +int mlxsw_sp_acl_rulei_act_mangle(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + enum flow_action_mangle_base htype, + u32 offset, u32 mask, u32 val, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_acl_mangle_action *mact; + size_t i; + + for (i = 0; i < ARRAY_SIZE(mlxsw_sp_acl_mangle_actions); ++i) { + mact = &mlxsw_sp_acl_mangle_actions[i]; + if (mact->htype == htype && + mact->offset == offset && + mact->mask == mask) { + val >>= mact->shift; + return mlxsw_sp_acl_rulei_act_mangle_field(mlxsw_sp, + rulei, mact, + val, extack); + } + } + + NL_SET_ERR_MSG_MOD(extack, "Unsupported mangle field"); + return -EINVAL; +} + int mlxsw_sp_acl_rulei_act_count(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei, struct netlink_ext_ack *extack) { - return mlxsw_afa_block_append_counter(rulei->act_block, - &rulei->counter_index, extack); + int err; + + err = mlxsw_afa_block_append_counter(rulei->act_block, + &rulei->counter_index, extack); + if (err) + return err; + rulei->counter_valid = true; + return 0; } int mlxsw_sp_acl_rulei_act_fid_set(struct mlxsw_sp *mlxsw_sp, @@ -707,6 +845,7 @@ int mlxsw_sp_acl_rule_add(struct mlxsw_sp *mlxsw_sp, list_add_tail(&rule->list, &mlxsw_sp->acl->rules); mutex_unlock(&mlxsw_sp->acl->rules_lock); block->rule_count++; + block->ingress_blocker_rule_count += rule->rulei->ingress_bind_blocker; block->egress_blocker_rule_count += rule->rulei->egress_bind_blocker; return 0; @@ -726,6 +865,7 @@ void mlxsw_sp_acl_rule_del(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_block *block = ruleset->ht_key.block; block->egress_blocker_rule_count -= rule->rulei->egress_bind_blocker; + block->ingress_blocker_rule_count -= rule->rulei->ingress_bind_blocker; ruleset->ht_key.block->rule_count--; mutex_lock(&mlxsw_sp->acl->rules_lock); list_del(&rule->list); @@ -827,20 +967,24 @@ static void mlxsw_sp_acl_rule_activity_update_work(struct work_struct *work) int mlxsw_sp_acl_rule_get_stats(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule *rule, - u64 *packets, u64 *bytes, u64 *last_use) + u64 *packets, u64 *bytes, u64 *last_use, + enum flow_action_hw_stats *used_hw_stats) { struct mlxsw_sp_acl_rule_info *rulei; - u64 current_packets; - u64 current_bytes; + u64 current_packets = 0; + u64 current_bytes = 0; int err; rulei = mlxsw_sp_acl_rule_rulei(rule); - err = mlxsw_sp_flow_counter_get(mlxsw_sp, rulei->counter_index, - ¤t_packets, ¤t_bytes); - if (err) - return err; - + if (rulei->counter_valid) { + err = mlxsw_sp_flow_counter_get(mlxsw_sp, rulei->counter_index, + ¤t_packets, + ¤t_bytes); + if (err) + return err; + *used_hw_stats = FLOW_ACTION_HW_STATS_IMMEDIATE; + } *packets = current_packets - rule->last_packets; *bytes = current_bytes - rule->last_bytes; *last_use = rule->last_used; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c index 3a2de13fcb68..dbd3bebf11ec 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c @@ -13,7 +13,7 @@ struct mlxsw_sp_acl_bf { struct mutex lock; /* Protects Bloom Filter updates. */ unsigned int bank_size; - refcount_t refcnt[0]; + refcount_t refcnt[]; }; /* Bloom filter uses a crc-16 hash over chunks of data which contain 4 key diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index e993159e8e4c..430da69003d8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -224,7 +224,7 @@ struct mlxsw_sp_acl_tcam_vchunk; struct mlxsw_sp_acl_tcam_chunk { struct mlxsw_sp_acl_tcam_vchunk *vchunk; struct mlxsw_sp_acl_tcam_region *region; - unsigned long priv[0]; + unsigned long priv[]; /* priv has to be always the last item */ }; @@ -243,7 +243,7 @@ struct mlxsw_sp_acl_tcam_vchunk { struct mlxsw_sp_acl_tcam_entry { struct mlxsw_sp_acl_tcam_ventry *ventry; struct mlxsw_sp_acl_tcam_chunk *chunk; - unsigned long priv[0]; + unsigned long priv[]; /* priv has to be always the last item */ }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h index 5965913565a5..96437992b102 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h @@ -20,7 +20,7 @@ struct mlxsw_sp_acl_tcam { struct mutex lock; /* guards vregion list */ struct list_head vregion_list; u32 vregion_rehash_intrvl; /* ms */ - unsigned long priv[0]; + unsigned long priv[]; /* priv has to be always the last item */ }; @@ -86,7 +86,7 @@ struct mlxsw_sp_acl_tcam_region { char tcam_region_info[MLXSW_REG_PXXX_TCAM_REGION_INFO_LEN]; struct mlxsw_afk_key_info *key_info; struct mlxsw_sp *mlxsw_sp; - unsigned long priv[0]; + unsigned long priv[]; /* priv has to be always the last item */ }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.c index 83c2e1e5f216..7974982533b5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.c @@ -3,92 +3,147 @@ #include <linux/kernel.h> #include <linux/bitops.h> +#include <linux/spinlock.h> #include "spectrum_cnt.h" -#define MLXSW_SP_COUNTER_POOL_BANK_SIZE 4096 - struct mlxsw_sp_counter_sub_pool { + u64 size; unsigned int base_index; - unsigned int size; + enum mlxsw_res_id entry_size_res_id; + const char *resource_name; /* devlink resource name */ + u64 resource_id; /* devlink resource id */ unsigned int entry_size; unsigned int bank_count; + atomic_t active_entries_count; }; struct mlxsw_sp_counter_pool { - unsigned int pool_size; + u64 pool_size; unsigned long *usage; /* Usage bitmap */ - struct mlxsw_sp_counter_sub_pool *sub_pools; + spinlock_t counter_pool_lock; /* Protects counter pool allocations */ + atomic_t active_entries_count; + unsigned int sub_pools_count; + struct mlxsw_sp_counter_sub_pool sub_pools[]; }; -static struct mlxsw_sp_counter_sub_pool mlxsw_sp_counter_sub_pools[] = { +static const struct mlxsw_sp_counter_sub_pool mlxsw_sp_counter_sub_pools[] = { [MLXSW_SP_COUNTER_SUB_POOL_FLOW] = { + .entry_size_res_id = MLXSW_RES_ID_COUNTER_SIZE_PACKETS_BYTES, + .resource_name = MLXSW_SP_RESOURCE_NAME_COUNTERS_FLOW, + .resource_id = MLXSW_SP_RESOURCE_COUNTERS_FLOW, .bank_count = 6, }, [MLXSW_SP_COUNTER_SUB_POOL_RIF] = { + .entry_size_res_id = MLXSW_RES_ID_COUNTER_SIZE_ROUTER_BASIC, + .resource_name = MLXSW_SP_RESOURCE_NAME_COUNTERS_RIF, + .resource_id = MLXSW_SP_RESOURCE_COUNTERS_RIF, .bank_count = 2, } }; -static int mlxsw_sp_counter_pool_validate(struct mlxsw_sp *mlxsw_sp) +static u64 mlxsw_sp_counter_sub_pool_occ_get(void *priv) +{ + const struct mlxsw_sp_counter_sub_pool *sub_pool = priv; + + return atomic_read(&sub_pool->active_entries_count); +} + +static int mlxsw_sp_counter_sub_pools_init(struct mlxsw_sp *mlxsw_sp) { - unsigned int total_bank_config = 0; - unsigned int pool_size; + struct mlxsw_sp_counter_pool *pool = mlxsw_sp->counter_pool; + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + struct mlxsw_sp_counter_sub_pool *sub_pool; + unsigned int base_index = 0; + enum mlxsw_res_id res_id; + int err; int i; - pool_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, COUNTER_POOL_SIZE); - /* Check config is valid, no bank over subscription */ - for (i = 0; i < ARRAY_SIZE(mlxsw_sp_counter_sub_pools); i++) - total_bank_config += mlxsw_sp_counter_sub_pools[i].bank_count; - if (total_bank_config > pool_size / MLXSW_SP_COUNTER_POOL_BANK_SIZE + 1) - return -EINVAL; + for (i = 0; i < pool->sub_pools_count; i++) { + sub_pool = &pool->sub_pools[i]; + res_id = sub_pool->entry_size_res_id; + + if (!mlxsw_core_res_valid(mlxsw_sp->core, res_id)) + return -EIO; + sub_pool->entry_size = mlxsw_core_res_get(mlxsw_sp->core, + res_id); + err = devlink_resource_size_get(devlink, + sub_pool->resource_id, + &sub_pool->size); + if (err) + goto err_resource_size_get; + + devlink_resource_occ_get_register(devlink, + sub_pool->resource_id, + mlxsw_sp_counter_sub_pool_occ_get, + sub_pool); + + sub_pool->base_index = base_index; + base_index += sub_pool->size; + atomic_set(&sub_pool->active_entries_count, 0); + } return 0; + +err_resource_size_get: + for (i--; i >= 0; i--) { + sub_pool = &pool->sub_pools[i]; + + devlink_resource_occ_get_unregister(devlink, + sub_pool->resource_id); + } + return err; } -static int mlxsw_sp_counter_sub_pools_prepare(struct mlxsw_sp *mlxsw_sp) +static void mlxsw_sp_counter_sub_pools_fini(struct mlxsw_sp *mlxsw_sp) { + struct mlxsw_sp_counter_pool *pool = mlxsw_sp->counter_pool; + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); struct mlxsw_sp_counter_sub_pool *sub_pool; + int i; - /* Prepare generic flow pool*/ - sub_pool = &mlxsw_sp_counter_sub_pools[MLXSW_SP_COUNTER_SUB_POOL_FLOW]; - if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, COUNTER_SIZE_PACKETS_BYTES)) - return -EIO; - sub_pool->entry_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, - COUNTER_SIZE_PACKETS_BYTES); - /* Prepare erif pool*/ - sub_pool = &mlxsw_sp_counter_sub_pools[MLXSW_SP_COUNTER_SUB_POOL_RIF]; - if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, COUNTER_SIZE_ROUTER_BASIC)) - return -EIO; - sub_pool->entry_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, - COUNTER_SIZE_ROUTER_BASIC); - return 0; + for (i = 0; i < pool->sub_pools_count; i++) { + sub_pool = &pool->sub_pools[i]; + + WARN_ON(atomic_read(&sub_pool->active_entries_count)); + devlink_resource_occ_get_unregister(devlink, + sub_pool->resource_id); + } +} + +static u64 mlxsw_sp_counter_pool_occ_get(void *priv) +{ + const struct mlxsw_sp_counter_pool *pool = priv; + + return atomic_read(&pool->active_entries_count); } int mlxsw_sp_counter_pool_init(struct mlxsw_sp *mlxsw_sp) { + unsigned int sub_pools_count = ARRAY_SIZE(mlxsw_sp_counter_sub_pools); + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); struct mlxsw_sp_counter_sub_pool *sub_pool; struct mlxsw_sp_counter_pool *pool; - unsigned int base_index; unsigned int map_size; - int i; int err; - if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, COUNTER_POOL_SIZE)) - return -EIO; - - err = mlxsw_sp_counter_pool_validate(mlxsw_sp); - if (err) - return err; - - err = mlxsw_sp_counter_sub_pools_prepare(mlxsw_sp); - if (err) - return err; - - pool = kzalloc(sizeof(*pool), GFP_KERNEL); + pool = kzalloc(struct_size(pool, sub_pools, sub_pools_count), + GFP_KERNEL); if (!pool) return -ENOMEM; + mlxsw_sp->counter_pool = pool; + memcpy(pool->sub_pools, mlxsw_sp_counter_sub_pools, + sub_pools_count * sizeof(*sub_pool)); + pool->sub_pools_count = sub_pools_count; + spin_lock_init(&pool->counter_pool_lock); + atomic_set(&pool->active_entries_count, 0); + + err = devlink_resource_size_get(devlink, MLXSW_SP_RESOURCE_COUNTERS, + &pool->pool_size); + if (err) + goto err_pool_resource_size_get; + devlink_resource_occ_get_register(devlink, MLXSW_SP_RESOURCE_COUNTERS, + mlxsw_sp_counter_pool_occ_get, pool); - pool->pool_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, COUNTER_POOL_SIZE); map_size = BITS_TO_LONGS(pool->pool_size) * sizeof(unsigned long); pool->usage = kzalloc(map_size, GFP_KERNEL); @@ -97,26 +152,18 @@ int mlxsw_sp_counter_pool_init(struct mlxsw_sp *mlxsw_sp) goto err_usage_alloc; } - pool->sub_pools = mlxsw_sp_counter_sub_pools; - /* Allocation is based on bank count which should be - * specified for each sub pool statically. - */ - base_index = 0; - for (i = 0; i < ARRAY_SIZE(mlxsw_sp_counter_sub_pools); i++) { - sub_pool = &pool->sub_pools[i]; - sub_pool->size = sub_pool->bank_count * - MLXSW_SP_COUNTER_POOL_BANK_SIZE; - sub_pool->base_index = base_index; - base_index += sub_pool->size; - /* The last bank can't be fully used */ - if (sub_pool->base_index + sub_pool->size > pool->pool_size) - sub_pool->size = pool->pool_size - sub_pool->base_index; - } + err = mlxsw_sp_counter_sub_pools_init(mlxsw_sp); + if (err) + goto err_sub_pools_init; - mlxsw_sp->counter_pool = pool; return 0; +err_sub_pools_init: + kfree(pool->usage); err_usage_alloc: + devlink_resource_occ_get_unregister(devlink, + MLXSW_SP_RESOURCE_COUNTERS); +err_pool_resource_size_get: kfree(pool); return err; } @@ -124,10 +171,15 @@ err_usage_alloc: void mlxsw_sp_counter_pool_fini(struct mlxsw_sp *mlxsw_sp) { struct mlxsw_sp_counter_pool *pool = mlxsw_sp->counter_pool; + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + mlxsw_sp_counter_sub_pools_fini(mlxsw_sp); WARN_ON(find_first_bit(pool->usage, pool->pool_size) != pool->pool_size); + WARN_ON(atomic_read(&pool->active_entries_count)); kfree(pool->usage); + devlink_resource_occ_get_unregister(devlink, + MLXSW_SP_RESOURCE_COUNTERS); kfree(pool); } @@ -139,25 +191,37 @@ int mlxsw_sp_counter_alloc(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_counter_sub_pool *sub_pool; unsigned int entry_index; unsigned int stop_index; - int i; + int i, err; - sub_pool = &mlxsw_sp_counter_sub_pools[sub_pool_id]; + sub_pool = &pool->sub_pools[sub_pool_id]; stop_index = sub_pool->base_index + sub_pool->size; entry_index = sub_pool->base_index; + spin_lock(&pool->counter_pool_lock); entry_index = find_next_zero_bit(pool->usage, stop_index, entry_index); - if (entry_index == stop_index) - return -ENOBUFS; + if (entry_index == stop_index) { + err = -ENOBUFS; + goto err_alloc; + } /* The sub-pools can contain non-integer number of entries * so we must check for overflow */ - if (entry_index + sub_pool->entry_size > stop_index) - return -ENOBUFS; + if (entry_index + sub_pool->entry_size > stop_index) { + err = -ENOBUFS; + goto err_alloc; + } for (i = 0; i < sub_pool->entry_size; i++) __set_bit(entry_index + i, pool->usage); + spin_unlock(&pool->counter_pool_lock); *p_counter_index = entry_index; + atomic_add(sub_pool->entry_size, &sub_pool->active_entries_count); + atomic_add(sub_pool->entry_size, &pool->active_entries_count); return 0; + +err_alloc: + spin_unlock(&pool->counter_pool_lock); + return err; } void mlxsw_sp_counter_free(struct mlxsw_sp *mlxsw_sp, @@ -170,7 +234,77 @@ void mlxsw_sp_counter_free(struct mlxsw_sp *mlxsw_sp, if (WARN_ON(counter_index >= pool->pool_size)) return; - sub_pool = &mlxsw_sp_counter_sub_pools[sub_pool_id]; + sub_pool = &pool->sub_pools[sub_pool_id]; + spin_lock(&pool->counter_pool_lock); for (i = 0; i < sub_pool->entry_size; i++) __clear_bit(counter_index + i, pool->usage); + spin_unlock(&pool->counter_pool_lock); + atomic_sub(sub_pool->entry_size, &sub_pool->active_entries_count); + atomic_sub(sub_pool->entry_size, &pool->active_entries_count); +} + +int mlxsw_sp_counter_resources_register(struct mlxsw_core *mlxsw_core) +{ + static struct devlink_resource_size_params size_params; + struct devlink *devlink = priv_to_devlink(mlxsw_core); + const struct mlxsw_sp_counter_sub_pool *sub_pool; + unsigned int total_bank_config; + u64 sub_pool_size; + u64 base_index; + u64 pool_size; + u64 bank_size; + int err; + int i; + + if (!MLXSW_CORE_RES_VALID(mlxsw_core, COUNTER_POOL_SIZE) || + !MLXSW_CORE_RES_VALID(mlxsw_core, COUNTER_BANK_SIZE)) + return -EIO; + + pool_size = MLXSW_CORE_RES_GET(mlxsw_core, COUNTER_POOL_SIZE); + bank_size = MLXSW_CORE_RES_GET(mlxsw_core, COUNTER_BANK_SIZE); + + devlink_resource_size_params_init(&size_params, pool_size, + pool_size, bank_size, + DEVLINK_RESOURCE_UNIT_ENTRY); + err = devlink_resource_register(devlink, + MLXSW_SP_RESOURCE_NAME_COUNTERS, + pool_size, + MLXSW_SP_RESOURCE_COUNTERS, + DEVLINK_RESOURCE_ID_PARENT_TOP, + &size_params); + if (err) + return err; + + /* Allocation is based on bank count which should be + * specified for each sub pool statically. + */ + total_bank_config = 0; + base_index = 0; + for (i = 0; i < ARRAY_SIZE(mlxsw_sp_counter_sub_pools); i++) { + sub_pool = &mlxsw_sp_counter_sub_pools[i]; + sub_pool_size = sub_pool->bank_count * bank_size; + /* The last bank can't be fully used */ + if (base_index + sub_pool_size > pool_size) + sub_pool_size = pool_size - base_index; + base_index += sub_pool_size; + + devlink_resource_size_params_init(&size_params, sub_pool_size, + sub_pool_size, bank_size, + DEVLINK_RESOURCE_UNIT_ENTRY); + err = devlink_resource_register(devlink, + sub_pool->resource_name, + sub_pool_size, + sub_pool->resource_id, + MLXSW_SP_RESOURCE_COUNTERS, + &size_params); + if (err) + return err; + total_bank_config += sub_pool->bank_count; + } + + /* Check config is valid, no bank over subscription */ + if (WARN_ON(total_bank_config > div64_u64(pool_size, bank_size) + 1)) + return -EINVAL; + + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h index 81465e267b10..a68d931090dd 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h @@ -4,6 +4,7 @@ #ifndef _MLXSW_SPECTRUM_CNT_H #define _MLXSW_SPECTRUM_CNT_H +#include "core.h" #include "spectrum.h" enum mlxsw_sp_counter_sub_pool_id { @@ -19,5 +20,6 @@ void mlxsw_sp_counter_free(struct mlxsw_sp *mlxsw_sp, unsigned int counter_index); int mlxsw_sp_counter_pool_init(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_counter_pool_fini(struct mlxsw_sp *mlxsw_sp); +int mlxsw_sp_counter_resources_register(struct mlxsw_core *mlxsw_core); #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c index 2dc0978428e6..daf029931b5f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c @@ -2,6 +2,7 @@ /* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */ #include <linux/kernel.h> +#include <linux/mutex.h> #include <net/devlink.h> #include "spectrum.h" @@ -210,7 +211,7 @@ mlxsw_sp_dpipe_table_erif_entries_dump(void *priv, bool counters_enabled, return err; rif_count = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); - rtnl_lock(); + mutex_lock(&mlxsw_sp->router->lock); i = 0; start_again: err = devlink_dpipe_entry_ctx_prepare(dump_ctx); @@ -241,14 +242,14 @@ start_again: devlink_dpipe_entry_ctx_close(dump_ctx); if (i != rif_count) goto start_again; - rtnl_unlock(); + mutex_unlock(&mlxsw_sp->router->lock); devlink_dpipe_entry_clear(&entry); return 0; err_entry_append: err_entry_get: err_ctx_prepare: - rtnl_unlock(); + mutex_unlock(&mlxsw_sp->router->lock); devlink_dpipe_entry_clear(&entry); return err; } @@ -258,7 +259,7 @@ static int mlxsw_sp_dpipe_table_erif_counters_update(void *priv, bool enable) struct mlxsw_sp *mlxsw_sp = priv; int i; - rtnl_lock(); + mutex_lock(&mlxsw_sp->router->lock); for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) { struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i); @@ -271,7 +272,7 @@ static int mlxsw_sp_dpipe_table_erif_counters_update(void *priv, bool enable) mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS); } - rtnl_unlock(); + mutex_unlock(&mlxsw_sp->router->lock); return 0; } @@ -546,7 +547,7 @@ mlxsw_sp_dpipe_table_host_entries_get(struct mlxsw_sp *mlxsw_sp, int i, j; int err; - rtnl_lock(); + mutex_lock(&mlxsw_sp->router->lock); i = 0; rif_count = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); start_again: @@ -602,12 +603,12 @@ out: if (i != rif_count) goto start_again; - rtnl_unlock(); + mutex_unlock(&mlxsw_sp->router->lock); return 0; err_ctx_prepare: err_entry_append: - rtnl_unlock(); + mutex_unlock(&mlxsw_sp->router->lock); return err; } @@ -662,7 +663,7 @@ mlxsw_sp_dpipe_table_host_counters_update(struct mlxsw_sp *mlxsw_sp, { int i; - rtnl_lock(); + mutex_lock(&mlxsw_sp->router->lock); for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) { struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i); struct mlxsw_sp_neigh_entry *neigh_entry; @@ -684,7 +685,7 @@ mlxsw_sp_dpipe_table_host_counters_update(struct mlxsw_sp *mlxsw_sp, enable); } } - rtnl_unlock(); + mutex_unlock(&mlxsw_sp->router->lock); } static int mlxsw_sp_dpipe_table_host4_counters_update(void *priv, bool enable) @@ -701,7 +702,7 @@ mlxsw_sp_dpipe_table_host_size_get(struct mlxsw_sp *mlxsw_sp, int type) u64 size = 0; int i; - rtnl_lock(); + mutex_lock(&mlxsw_sp->router->lock); for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) { struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i); struct mlxsw_sp_neigh_entry *neigh_entry; @@ -721,7 +722,7 @@ mlxsw_sp_dpipe_table_host_size_get(struct mlxsw_sp *mlxsw_sp, int type) size++; } } - rtnl_unlock(); + mutex_unlock(&mlxsw_sp->router->lock); return size; } @@ -1093,7 +1094,7 @@ mlxsw_sp_dpipe_table_adj_entries_get(struct mlxsw_sp *mlxsw_sp, int j; int err; - rtnl_lock(); + mutex_lock(&mlxsw_sp->router->lock); nh_count_max = mlxsw_sp_dpipe_table_adj_size(mlxsw_sp); start_again: err = devlink_dpipe_entry_ctx_prepare(dump_ctx); @@ -1130,13 +1131,13 @@ skip: devlink_dpipe_entry_ctx_close(dump_ctx); if (nh_count != nh_count_max) goto start_again; - rtnl_unlock(); + mutex_unlock(&mlxsw_sp->router->lock); return 0; err_ctx_prepare: err_entry_append: - rtnl_unlock(); + mutex_unlock(&mlxsw_sp->router->lock); return err; } @@ -1206,9 +1207,9 @@ mlxsw_sp_dpipe_table_adj_size_get(void *priv) struct mlxsw_sp *mlxsw_sp = priv; u64 size; - rtnl_lock(); + mutex_lock(&mlxsw_sp->router->lock); size = mlxsw_sp_dpipe_table_adj_size(mlxsw_sp); - rtnl_unlock(); + mutex_unlock(&mlxsw_sp->router->lock); return size; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c index 8df3cb21baa6..004c42274e48 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c @@ -8,6 +8,7 @@ #include <linux/netdevice.h> #include <linux/rhashtable.h> #include <linux/rtnetlink.h> +#include <linux/refcount.h> #include "spectrum.h" #include "reg.h" @@ -24,7 +25,7 @@ struct mlxsw_sp_fid_core { struct mlxsw_sp_fid { struct list_head list; struct mlxsw_sp_rif *rif; - unsigned int ref_count; + refcount_t ref_count; u16 fid_index; struct mlxsw_sp_fid_family *fid_family; struct rhash_head ht_node; @@ -149,7 +150,7 @@ struct mlxsw_sp_fid *mlxsw_sp_fid_lookup_by_index(struct mlxsw_sp *mlxsw_sp, fid = rhashtable_lookup_fast(&mlxsw_sp->fid_core->fid_ht, &fid_index, mlxsw_sp_fid_ht_params); if (fid) - fid->ref_count++; + refcount_inc(&fid->ref_count); return fid; } @@ -183,7 +184,7 @@ struct mlxsw_sp_fid *mlxsw_sp_fid_lookup_by_vni(struct mlxsw_sp *mlxsw_sp, fid = rhashtable_lookup_fast(&mlxsw_sp->fid_core->vni_ht, &vni, mlxsw_sp_fid_vni_ht_params); if (fid) - fid->ref_count++; + refcount_inc(&fid->ref_count); return fid; } @@ -437,16 +438,6 @@ static int mlxsw_sp_fid_vni_op(struct mlxsw_sp *mlxsw_sp, u16 fid_index, return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl); } -static int mlxsw_sp_fid_vid_map(struct mlxsw_sp *mlxsw_sp, u16 fid_index, - u16 vid, bool valid) -{ - enum mlxsw_reg_svfa_mt mt = MLXSW_REG_SVFA_MT_VID_TO_FID; - char svfa_pl[MLXSW_REG_SVFA_LEN]; - - mlxsw_reg_svfa_pack(svfa_pl, 0, mt, valid, fid_index, vid); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(svfa), svfa_pl); -} - static int __mlxsw_sp_fid_port_vid_map(struct mlxsw_sp *mlxsw_sp, u16 fid_index, u8 local_port, u16 vid, bool valid) { @@ -457,140 +448,6 @@ static int __mlxsw_sp_fid_port_vid_map(struct mlxsw_sp *mlxsw_sp, u16 fid_index, return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(svfa), svfa_pl); } -static int mlxsw_sp_fid_8021q_configure(struct mlxsw_sp_fid *fid) -{ - struct mlxsw_sp *mlxsw_sp = fid->fid_family->mlxsw_sp; - struct mlxsw_sp_fid_8021q *fid_8021q; - int err; - - err = mlxsw_sp_fid_op(mlxsw_sp, fid->fid_index, fid->fid_index, true); - if (err) - return err; - - fid_8021q = mlxsw_sp_fid_8021q_fid(fid); - err = mlxsw_sp_fid_vid_map(mlxsw_sp, fid->fid_index, fid_8021q->vid, - true); - if (err) - goto err_fid_map; - - return 0; - -err_fid_map: - mlxsw_sp_fid_op(mlxsw_sp, fid->fid_index, 0, false); - return err; -} - -static void mlxsw_sp_fid_8021q_deconfigure(struct mlxsw_sp_fid *fid) -{ - struct mlxsw_sp *mlxsw_sp = fid->fid_family->mlxsw_sp; - struct mlxsw_sp_fid_8021q *fid_8021q; - - fid_8021q = mlxsw_sp_fid_8021q_fid(fid); - mlxsw_sp_fid_vid_map(mlxsw_sp, fid->fid_index, fid_8021q->vid, false); - mlxsw_sp_fid_op(mlxsw_sp, fid->fid_index, 0, false); -} - -static int mlxsw_sp_fid_8021q_index_alloc(struct mlxsw_sp_fid *fid, - const void *arg, u16 *p_fid_index) -{ - struct mlxsw_sp_fid_family *fid_family = fid->fid_family; - u16 vid = *(u16 *) arg; - - /* Use 1:1 mapping for simplicity although not a must */ - if (vid < fid_family->start_index || vid > fid_family->end_index) - return -EINVAL; - *p_fid_index = vid; - - return 0; -} - -static bool -mlxsw_sp_fid_8021q_compare(const struct mlxsw_sp_fid *fid, const void *arg) -{ - u16 vid = *(u16 *) arg; - - return mlxsw_sp_fid_8021q_fid(fid)->vid == vid; -} - -static u16 mlxsw_sp_fid_8021q_flood_index(const struct mlxsw_sp_fid *fid) -{ - return fid->fid_index; -} - -static int mlxsw_sp_fid_8021q_port_vid_map(struct mlxsw_sp_fid *fid, - struct mlxsw_sp_port *mlxsw_sp_port, - u16 vid) -{ - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - u8 local_port = mlxsw_sp_port->local_port; - - /* In case there are no {Port, VID} => FID mappings on the port, - * we can use the global VID => FID mapping we created when the - * FID was configured. - */ - if (mlxsw_sp->fid_core->port_fid_mappings[local_port] == 0) - return 0; - return __mlxsw_sp_fid_port_vid_map(mlxsw_sp, fid->fid_index, local_port, - vid, true); -} - -static void -mlxsw_sp_fid_8021q_port_vid_unmap(struct mlxsw_sp_fid *fid, - struct mlxsw_sp_port *mlxsw_sp_port, u16 vid) -{ - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - u8 local_port = mlxsw_sp_port->local_port; - - if (mlxsw_sp->fid_core->port_fid_mappings[local_port] == 0) - return; - __mlxsw_sp_fid_port_vid_map(mlxsw_sp, fid->fid_index, local_port, vid, - false); -} - -static const struct mlxsw_sp_fid_ops mlxsw_sp_fid_8021q_ops = { - .setup = mlxsw_sp_fid_8021q_setup, - .configure = mlxsw_sp_fid_8021q_configure, - .deconfigure = mlxsw_sp_fid_8021q_deconfigure, - .index_alloc = mlxsw_sp_fid_8021q_index_alloc, - .compare = mlxsw_sp_fid_8021q_compare, - .flood_index = mlxsw_sp_fid_8021q_flood_index, - .port_vid_map = mlxsw_sp_fid_8021q_port_vid_map, - .port_vid_unmap = mlxsw_sp_fid_8021q_port_vid_unmap, -}; - -static const struct mlxsw_sp_flood_table mlxsw_sp_fid_8021q_flood_tables[] = { - { - .packet_type = MLXSW_SP_FLOOD_TYPE_UC, - .bridge_type = MLXSW_REG_SFGC_BRIDGE_TYPE_1Q_FID, - .table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFSET, - .table_index = 0, - }, - { - .packet_type = MLXSW_SP_FLOOD_TYPE_MC, - .bridge_type = MLXSW_REG_SFGC_BRIDGE_TYPE_1Q_FID, - .table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFSET, - .table_index = 1, - }, - { - .packet_type = MLXSW_SP_FLOOD_TYPE_BC, - .bridge_type = MLXSW_REG_SFGC_BRIDGE_TYPE_1Q_FID, - .table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFSET, - .table_index = 2, - }, -}; - -/* Range and flood configuration must match mlxsw_config_profile */ -static const struct mlxsw_sp_fid_family mlxsw_sp_fid_8021q_family = { - .type = MLXSW_SP_FID_TYPE_8021Q, - .fid_size = sizeof(struct mlxsw_sp_fid_8021q), - .start_index = 1, - .end_index = VLAN_VID_MASK, - .flood_tables = mlxsw_sp_fid_8021q_flood_tables, - .nr_flood_tables = ARRAY_SIZE(mlxsw_sp_fid_8021q_flood_tables), - .rif_type = MLXSW_SP_RIF_TYPE_VLAN, - .ops = &mlxsw_sp_fid_8021q_ops, -}; - static struct mlxsw_sp_fid_8021d * mlxsw_sp_fid_8021d_fid(const struct mlxsw_sp_fid *fid) { @@ -845,6 +702,14 @@ static const struct mlxsw_sp_fid_family mlxsw_sp_fid_8021d_family = { .lag_vid_valid = 1, }; +static bool +mlxsw_sp_fid_8021q_compare(const struct mlxsw_sp_fid *fid, const void *arg) +{ + u16 vid = *(u16 *) arg; + + return mlxsw_sp_fid_8021q_fid(fid)->vid == vid; +} + static void mlxsw_sp_fid_8021q_fdb_clear_offload(const struct mlxsw_sp_fid *fid, const struct net_device *nve_dev) @@ -1030,7 +895,7 @@ static struct mlxsw_sp_fid *mlxsw_sp_fid_lookup(struct mlxsw_sp *mlxsw_sp, list_for_each_entry(fid, &fid_family->fids_list, list) { if (!fid->fid_family->ops->compare(fid, arg)) continue; - fid->ref_count++; + refcount_inc(&fid->ref_count); return fid; } @@ -1075,7 +940,7 @@ static struct mlxsw_sp_fid *mlxsw_sp_fid_get(struct mlxsw_sp *mlxsw_sp, goto err_rhashtable_insert; list_add(&fid->list, &fid_family->fids_list); - fid->ref_count++; + refcount_set(&fid->ref_count, 1); return fid; err_rhashtable_insert: @@ -1093,7 +958,7 @@ void mlxsw_sp_fid_put(struct mlxsw_sp_fid *fid) struct mlxsw_sp_fid_family *fid_family = fid->fid_family; struct mlxsw_sp *mlxsw_sp = fid_family->mlxsw_sp; - if (--fid->ref_count != 0) + if (!refcount_dec_and_test(&fid->ref_count)) return; list_del(&fid->list); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index b607919c8ad0..2f76908cae73 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -26,11 +26,20 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, if (!flow_action_has_entries(flow_action)) return 0; + if (!flow_action_mixed_hw_stats_check(flow_action, extack)) + return -EOPNOTSUPP; - /* Count action is inserted first */ - err = mlxsw_sp_acl_rulei_act_count(mlxsw_sp, rulei, extack); - if (err) - return err; + act = flow_action_first_entry_get(flow_action); + if (act->hw_stats == FLOW_ACTION_HW_STATS_ANY || + act->hw_stats == FLOW_ACTION_HW_STATS_IMMEDIATE) { + /* Count action is inserted first */ + err = mlxsw_sp_acl_rulei_act_count(mlxsw_sp, rulei, extack); + if (err) + return err; + } else if (act->hw_stats != FLOW_ACTION_HW_STATS_DISABLED) { + NL_SET_ERR_MSG_MOD(extack, "Unsupported action HW stats type"); + return -EOPNOTSUPP; + } flow_action_for_each(i, act, flow_action) { switch (act->id) { @@ -41,12 +50,30 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, return err; } break; - case FLOW_ACTION_DROP: - err = mlxsw_sp_acl_rulei_act_drop(rulei); + case FLOW_ACTION_DROP: { + bool ingress; + + if (mlxsw_sp_acl_block_is_mixed_bound(block)) { + NL_SET_ERR_MSG_MOD(extack, "Drop action is not supported when block is bound to ingress and egress"); + return -EOPNOTSUPP; + } + ingress = mlxsw_sp_acl_block_is_ingress_bound(block); + err = mlxsw_sp_acl_rulei_act_drop(rulei, ingress, + act->cookie, extack); if (err) { NL_SET_ERR_MSG_MOD(extack, "Cannot append drop action"); return err; } + + /* Forbid block with this rulei to be bound + * to ingress/egress in future. Ingress rule is + * a blocker for egress and vice versa. + */ + if (ingress) + rulei->egress_bind_blocker = 1; + else + rulei->ingress_bind_blocker = 1; + } break; case FLOW_ACTION_TRAP: err = mlxsw_sp_acl_rulei_act_trap(rulei); @@ -127,6 +154,25 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, act->id, vid, proto, prio, extack); } + case FLOW_ACTION_PRIORITY: + return mlxsw_sp_acl_rulei_act_priority(mlxsw_sp, rulei, + act->priority, + extack); + case FLOW_ACTION_MANGLE: { + enum flow_action_mangle_base htype = act->mangle.htype; + __be32 be_mask = (__force __be32) act->mangle.mask; + __be32 be_val = (__force __be32) act->mangle.val; + u32 offset = act->mangle.offset; + u32 mask = be32_to_cpu(be_mask); + u32 val = be32_to_cpu(be_val); + + err = mlxsw_sp_acl_rulei_act_mangle(mlxsw_sp, rulei, + htype, offset, + mask, val, extack); + if (err) + return err; + break; + } default: NL_SET_ERR_MSG_MOD(extack, "Unsupported action"); dev_err(mlxsw_sp->bus_info->dev, "Unsupported action\n"); @@ -525,6 +571,7 @@ int mlxsw_sp_flower_stats(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_block *block, struct flow_cls_offload *f) { + enum flow_action_hw_stats used_hw_stats = FLOW_ACTION_HW_STATS_DISABLED; struct mlxsw_sp_acl_ruleset *ruleset; struct mlxsw_sp_acl_rule *rule; u64 packets; @@ -543,11 +590,11 @@ int mlxsw_sp_flower_stats(struct mlxsw_sp *mlxsw_sp, return -EINVAL; err = mlxsw_sp_acl_rule_get_stats(mlxsw_sp, rule, &packets, &bytes, - &lastuse); + &lastuse, &used_hw_stats); if (err) goto err_rule_get_stats; - flow_stats_update(&f->stats, bytes, packets, lastuse); + flow_stats_update(&f->stats, bytes, packets, lastuse, used_hw_stats); mlxsw_sp_acl_ruleset_put(mlxsw_sp, ruleset); return 0; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c index 1e4cdee7bcd7..20d72f1c0cee 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c @@ -2,13 +2,15 @@ /* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */ #include <linux/kernel.h> +#include <linux/mutex.h> #include <linux/slab.h> #include "spectrum.h" struct mlxsw_sp_kvdl { const struct mlxsw_sp_kvdl_ops *kvdl_ops; - unsigned long priv[0]; + struct mutex kvdl_lock; /* Protects kvdl allocations */ + unsigned long priv[]; /* priv has to be always the last item */ }; @@ -22,6 +24,7 @@ int mlxsw_sp_kvdl_init(struct mlxsw_sp *mlxsw_sp) GFP_KERNEL); if (!kvdl) return -ENOMEM; + mutex_init(&kvdl->kvdl_lock); kvdl->kvdl_ops = kvdl_ops; mlxsw_sp->kvdl = kvdl; @@ -31,6 +34,7 @@ int mlxsw_sp_kvdl_init(struct mlxsw_sp *mlxsw_sp) return 0; err_init: + mutex_destroy(&kvdl->kvdl_lock); kfree(kvdl); return err; } @@ -40,6 +44,7 @@ void mlxsw_sp_kvdl_fini(struct mlxsw_sp *mlxsw_sp) struct mlxsw_sp_kvdl *kvdl = mlxsw_sp->kvdl; kvdl->kvdl_ops->fini(mlxsw_sp, kvdl->priv); + mutex_destroy(&kvdl->kvdl_lock); kfree(kvdl); } @@ -48,9 +53,14 @@ int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count, u32 *p_entry_index) { struct mlxsw_sp_kvdl *kvdl = mlxsw_sp->kvdl; + int err; + + mutex_lock(&kvdl->kvdl_lock); + err = kvdl->kvdl_ops->alloc(mlxsw_sp, kvdl->priv, type, + entry_count, p_entry_index); + mutex_unlock(&kvdl->kvdl_lock); - return kvdl->kvdl_ops->alloc(mlxsw_sp, kvdl->priv, type, - entry_count, p_entry_index); + return err; } void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, @@ -59,8 +69,10 @@ void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_kvdl *kvdl = mlxsw_sp->kvdl; + mutex_lock(&kvdl->kvdl_lock); kvdl->kvdl_ops->free(mlxsw_sp, kvdl->priv, type, entry_count, entry_index); + mutex_unlock(&kvdl->kvdl_lock); } int mlxsw_sp_kvdl_alloc_count_query(struct mlxsw_sp *mlxsw_sp, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c index 336e5ecc68f8..47eb751a2570 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 /* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */ +#include <linux/mutex.h> #include <linux/rhashtable.h> #include <net/ipv6.h> @@ -12,6 +13,7 @@ struct mlxsw_sp_mr { void *catchall_route_priv; struct delayed_work stats_update_dw; struct list_head table_list; + struct mutex table_list_lock; /* Protects table_list */ #define MLXSW_SP_MR_ROUTES_COUNTER_UPDATE_INTERVAL 5000 /* ms */ unsigned long priv[0]; /* priv has to be always the last item */ @@ -66,9 +68,10 @@ struct mlxsw_sp_mr_table { u32 vr_id; struct mlxsw_sp_mr_vif vifs[MAXVIFS]; struct list_head route_list; + struct mutex route_list_lock; /* Protects route_list */ struct rhashtable route_ht; const struct mlxsw_sp_mr_table_ops *ops; - char catchall_route_priv[0]; + char catchall_route_priv[]; /* catchall_route_priv has to be always the last item */ }; @@ -370,11 +373,13 @@ static void mlxsw_sp_mr_mfc_offload_update(struct mlxsw_sp_mr_route *mr_route) static void __mlxsw_sp_mr_route_del(struct mlxsw_sp_mr_table *mr_table, struct mlxsw_sp_mr_route *mr_route) { + WARN_ON_ONCE(!mutex_is_locked(&mr_table->route_list_lock)); + mlxsw_sp_mr_mfc_offload_set(mr_route, false); - mlxsw_sp_mr_route_erase(mr_table, mr_route); rhashtable_remove_fast(&mr_table->route_ht, &mr_route->ht_node, mlxsw_sp_mr_route_ht_params); list_del(&mr_route->node); + mlxsw_sp_mr_route_erase(mr_table, mr_route); mlxsw_sp_mr_route_destroy(mr_table, mr_route); } @@ -415,19 +420,21 @@ int mlxsw_sp_mr_route_add(struct mlxsw_sp_mr_table *mr_table, goto err_duplicate_route; } + /* Write the route to the hardware */ + err = mlxsw_sp_mr_route_write(mr_table, mr_route, replace); + if (err) + goto err_mr_route_write; + /* Put it in the table data-structures */ + mutex_lock(&mr_table->route_list_lock); list_add_tail(&mr_route->node, &mr_table->route_list); + mutex_unlock(&mr_table->route_list_lock); err = rhashtable_insert_fast(&mr_table->route_ht, &mr_route->ht_node, mlxsw_sp_mr_route_ht_params); if (err) goto err_rhashtable_insert; - /* Write the route to the hardware */ - err = mlxsw_sp_mr_route_write(mr_table, mr_route, replace); - if (err) - goto err_mr_route_write; - /* Destroy the original route */ if (replace) { rhashtable_remove_fast(&mr_table->route_ht, @@ -440,11 +447,12 @@ int mlxsw_sp_mr_route_add(struct mlxsw_sp_mr_table *mr_table, mlxsw_sp_mr_mfc_offload_update(mr_route); return 0; -err_mr_route_write: - rhashtable_remove_fast(&mr_table->route_ht, &mr_route->ht_node, - mlxsw_sp_mr_route_ht_params); err_rhashtable_insert: + mutex_lock(&mr_table->route_list_lock); list_del(&mr_route->node); + mutex_unlock(&mr_table->route_list_lock); + mlxsw_sp_mr_route_erase(mr_table, mr_route); +err_mr_route_write: err_no_orig_route: err_duplicate_route: mlxsw_sp_mr_route_destroy(mr_table, mr_route); @@ -460,8 +468,11 @@ void mlxsw_sp_mr_route_del(struct mlxsw_sp_mr_table *mr_table, mr_table->ops->key_create(mr_table, &key, mfc); mr_route = rhashtable_lookup_fast(&mr_table->route_ht, &key, mlxsw_sp_mr_route_ht_params); - if (mr_route) + if (mr_route) { + mutex_lock(&mr_table->route_list_lock); __mlxsw_sp_mr_route_del(mr_table, mr_route); + mutex_unlock(&mr_table->route_list_lock); + } } /* Should be called after the VIF struct is updated */ @@ -910,6 +921,7 @@ struct mlxsw_sp_mr_table *mlxsw_sp_mr_table_create(struct mlxsw_sp *mlxsw_sp, mr_table->proto = proto; mr_table->ops = &mlxsw_sp_mr_table_ops_arr[proto]; INIT_LIST_HEAD(&mr_table->route_list); + mutex_init(&mr_table->route_list_lock); err = rhashtable_init(&mr_table->route_ht, &mlxsw_sp_mr_route_ht_params); @@ -927,12 +939,15 @@ struct mlxsw_sp_mr_table *mlxsw_sp_mr_table_create(struct mlxsw_sp *mlxsw_sp, &catchall_route_params); if (err) goto err_ops_route_create; + mutex_lock(&mr->table_list_lock); list_add_tail(&mr_table->node, &mr->table_list); + mutex_unlock(&mr->table_list_lock); return mr_table; err_ops_route_create: rhashtable_destroy(&mr_table->route_ht); err_route_rhashtable_init: + mutex_destroy(&mr_table->route_list_lock); kfree(mr_table); return ERR_PTR(err); } @@ -943,10 +958,13 @@ void mlxsw_sp_mr_table_destroy(struct mlxsw_sp_mr_table *mr_table) struct mlxsw_sp_mr *mr = mlxsw_sp->mr; WARN_ON(!mlxsw_sp_mr_table_empty(mr_table)); + mutex_lock(&mr->table_list_lock); list_del(&mr_table->node); + mutex_unlock(&mr->table_list_lock); mr->mr_ops->route_destroy(mlxsw_sp, mr->priv, &mr_table->catchall_route_priv); rhashtable_destroy(&mr_table->route_ht); + mutex_destroy(&mr_table->route_list_lock); kfree(mr_table); } @@ -955,8 +973,10 @@ void mlxsw_sp_mr_table_flush(struct mlxsw_sp_mr_table *mr_table) struct mlxsw_sp_mr_route *mr_route, *tmp; int i; + mutex_lock(&mr_table->route_list_lock); list_for_each_entry_safe(mr_route, tmp, &mr_table->route_list, node) __mlxsw_sp_mr_route_del(mr_table, mr_route); + mutex_unlock(&mr_table->route_list_lock); for (i = 0; i < MAXVIFS; i++) { mr_table->vifs[i].dev = NULL; @@ -1000,12 +1020,15 @@ static void mlxsw_sp_mr_stats_update(struct work_struct *work) struct mlxsw_sp_mr_route *mr_route; unsigned long interval; - rtnl_lock(); - list_for_each_entry(mr_table, &mr->table_list, node) + mutex_lock(&mr->table_list_lock); + list_for_each_entry(mr_table, &mr->table_list, node) { + mutex_lock(&mr_table->route_list_lock); list_for_each_entry(mr_route, &mr_table->route_list, node) mlxsw_sp_mr_route_stats_update(mr_table->mlxsw_sp, mr_route); - rtnl_unlock(); + mutex_unlock(&mr_table->route_list_lock); + } + mutex_unlock(&mr->table_list_lock); interval = msecs_to_jiffies(MLXSW_SP_MR_ROUTES_COUNTER_UPDATE_INTERVAL); mlxsw_core_schedule_dw(&mr->stats_update_dw, interval); @@ -1024,6 +1047,7 @@ int mlxsw_sp_mr_init(struct mlxsw_sp *mlxsw_sp, mr->mr_ops = mr_ops; mlxsw_sp->mr = mr; INIT_LIST_HEAD(&mr->table_list); + mutex_init(&mr->table_list_lock); err = mr_ops->init(mlxsw_sp, mr->priv); if (err) @@ -1035,6 +1059,7 @@ int mlxsw_sp_mr_init(struct mlxsw_sp *mlxsw_sp, mlxsw_core_schedule_dw(&mr->stats_update_dw, interval); return 0; err: + mutex_destroy(&mr->table_list_lock); kfree(mr); return err; } @@ -1045,5 +1070,6 @@ void mlxsw_sp_mr_fini(struct mlxsw_sp *mlxsw_sp) cancel_delayed_work_sync(&mr->stats_update_dw); mr->mr_ops->fini(mlxsw_sp, mr->priv); + mutex_destroy(&mr->table_list_lock); kfree(mr); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c index 2153bcc4b585..54d3e7dcd303 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c @@ -67,7 +67,7 @@ struct mlxsw_sp_nve_mc_record { struct mlxsw_sp_nve_mc_list *mc_list; const struct mlxsw_sp_nve_mc_record_ops *ops; u32 kvdl_index; - struct mlxsw_sp_nve_mc_entry entries[0]; + struct mlxsw_sp_nve_mc_entry entries[]; }; struct mlxsw_sp_nve_mc_list { @@ -713,27 +713,6 @@ static void mlxsw_sp_nve_flood_ip_flush(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_nve_mc_list_put(mlxsw_sp, mc_list); } -u32 mlxsw_sp_nve_decap_tunnel_index_get(const struct mlxsw_sp *mlxsw_sp) -{ - WARN_ON(mlxsw_sp->nve->num_nve_tunnels == 0); - - return mlxsw_sp->nve->tunnel_index; -} - -bool mlxsw_sp_nve_ipv4_route_is_decap(const struct mlxsw_sp *mlxsw_sp, - u32 tb_id, __be32 addr) -{ - struct mlxsw_sp_nve *nve = mlxsw_sp->nve; - struct mlxsw_sp_nve_config *config = &nve->config; - - if (nve->num_nve_tunnels && - config->ul_proto == MLXSW_SP_L3_PROTO_IPV4 && - config->ul_sip.addr4 == addr && config->ul_tb_id == tb_id) - return true; - - return false; -} - static int mlxsw_sp_nve_tunnel_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nve_config *config) { @@ -744,6 +723,8 @@ static int mlxsw_sp_nve_tunnel_init(struct mlxsw_sp *mlxsw_sp, if (nve->num_nve_tunnels++ != 0) return 0; + nve->config = *config; + err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1, &nve->tunnel_index); if (err) @@ -760,6 +741,7 @@ err_ops_init: mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1, nve->tunnel_index); err_kvdl_alloc: + memset(&nve->config, 0, sizeof(nve->config)); nve->num_nve_tunnels--; return err; } @@ -840,8 +822,6 @@ int mlxsw_sp_nve_fid_enable(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *fid, goto err_fid_vni_set; } - nve->config = config; - err = ops->fdb_replay(params->dev, params->vni, extack); if (err) goto err_fdb_replay; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c index 34f7c3501b08..9650562fc0ef 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c @@ -922,6 +922,8 @@ static int mlxsw_sp_ptp_get_message_types(const struct hwtstamp_config *config, case HWTSTAMP_TX_ONESTEP_SYNC: case HWTSTAMP_TX_ONESTEP_P2P: return -ERANGE; + default: + return -EINVAL; } switch (rx_filter) { @@ -952,6 +954,8 @@ static int mlxsw_sp_ptp_get_message_types(const struct hwtstamp_config *config, case HWTSTAMP_FILTER_SOME: case HWTSTAMP_FILTER_NTP_ALL: return -ERANGE; + default: + return -EINVAL; } *p_ing_types = ing_types; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c index 02526c53d4f5..670a43fe2a00 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -20,14 +20,17 @@ enum mlxsw_sp_qdisc_type { MLXSW_SP_QDISC_PRIO, MLXSW_SP_QDISC_ETS, MLXSW_SP_QDISC_TBF, + MLXSW_SP_QDISC_FIFO, }; +struct mlxsw_sp_qdisc; + struct mlxsw_sp_qdisc_ops { enum mlxsw_sp_qdisc_type type; int (*check_params)(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params); - int (*replace)(struct mlxsw_sp_port *mlxsw_sp_port, + int (*replace)(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params); int (*destroy)(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc); @@ -64,6 +67,25 @@ struct mlxsw_sp_qdisc { struct mlxsw_sp_qdisc_ops *ops; }; +struct mlxsw_sp_qdisc_state { + struct mlxsw_sp_qdisc root_qdisc; + struct mlxsw_sp_qdisc tclass_qdiscs[IEEE_8021QAZ_MAX_TCS]; + + /* When a PRIO or ETS are added, the invisible FIFOs in their bands are + * created first. When notifications for these FIFOs arrive, it is not + * known what qdisc their parent handle refers to. It could be a + * newly-created PRIO that will replace the currently-offloaded one, or + * it could be e.g. a RED that will be attached below it. + * + * As the notifications start to arrive, use them to note what the + * future parent handle is, and keep track of which child FIFOs were + * seen. Then when the parent is known, retroactively offload those + * FIFOs. + */ + u32 future_handle; + bool future_fifos[IEEE_8021QAZ_MAX_TCS]; +}; + static bool mlxsw_sp_qdisc_compare(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, u32 handle, enum mlxsw_sp_qdisc_type type) @@ -77,36 +99,38 @@ static struct mlxsw_sp_qdisc * mlxsw_sp_qdisc_find(struct mlxsw_sp_port *mlxsw_sp_port, u32 parent, bool root_only) { + struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; int tclass, child_index; if (parent == TC_H_ROOT) - return mlxsw_sp_port->root_qdisc; + return &qdisc_state->root_qdisc; - if (root_only || !mlxsw_sp_port->root_qdisc || - !mlxsw_sp_port->root_qdisc->ops || - TC_H_MAJ(parent) != mlxsw_sp_port->root_qdisc->handle || + if (root_only || !qdisc_state || + !qdisc_state->root_qdisc.ops || + TC_H_MAJ(parent) != qdisc_state->root_qdisc.handle || TC_H_MIN(parent) > IEEE_8021QAZ_MAX_TCS) return NULL; child_index = TC_H_MIN(parent); tclass = MLXSW_SP_PRIO_CHILD_TO_TCLASS(child_index); - return &mlxsw_sp_port->tclass_qdiscs[tclass]; + return &qdisc_state->tclass_qdiscs[tclass]; } static struct mlxsw_sp_qdisc * mlxsw_sp_qdisc_find_by_handle(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle) { + struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; int i; - if (mlxsw_sp_port->root_qdisc->handle == handle) - return mlxsw_sp_port->root_qdisc; + if (qdisc_state->root_qdisc.handle == handle) + return &qdisc_state->root_qdisc; - if (mlxsw_sp_port->root_qdisc->handle == TC_H_UNSPEC) + if (qdisc_state->root_qdisc.handle == TC_H_UNSPEC) return NULL; for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) - if (mlxsw_sp_port->tclass_qdiscs[i].handle == handle) - return &mlxsw_sp_port->tclass_qdiscs[i]; + if (qdisc_state->tclass_qdiscs[i].handle == handle) + return &qdisc_state->tclass_qdiscs[i]; return NULL; } @@ -147,11 +171,15 @@ mlxsw_sp_qdisc_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, if (err) goto err_bad_param; - err = ops->replace(mlxsw_sp_port, mlxsw_sp_qdisc, params); + err = ops->replace(mlxsw_sp_port, handle, mlxsw_sp_qdisc, params); if (err) goto err_config; - if (mlxsw_sp_qdisc->handle != handle) { + /* Check if the Qdisc changed. That includes a situation where an + * invisible Qdisc replaces another one, or is being added for the + * first time. + */ + if (mlxsw_sp_qdisc->handle != handle || handle == TC_H_UNSPEC) { mlxsw_sp_qdisc->ops = ops; if (ops->clean_stats) ops->clean_stats(mlxsw_sp_port, mlxsw_sp_qdisc); @@ -295,7 +323,7 @@ mlxsw_sp_qdisc_get_tc_stats(struct mlxsw_sp_port *mlxsw_sp_port, static int mlxsw_sp_tclass_congestion_enable(struct mlxsw_sp_port *mlxsw_sp_port, int tclass_num, u32 min, u32 max, - u32 probability, bool is_ecn) + u32 probability, bool is_wred, bool is_ecn) { char cwtpm_cmd[MLXSW_REG_CWTPM_LEN]; char cwtp_cmd[MLXSW_REG_CWTP_LEN]; @@ -313,7 +341,7 @@ mlxsw_sp_tclass_congestion_enable(struct mlxsw_sp_port *mlxsw_sp_port, return err; mlxsw_reg_cwtpm_pack(cwtpm_cmd, mlxsw_sp_port->local_port, tclass_num, - MLXSW_REG_CWTP_DEFAULT_PROFILE, true, is_ecn); + MLXSW_REG_CWTP_DEFAULT_PROFILE, is_wred, is_ecn); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(cwtpm), cwtpm_cmd); } @@ -347,7 +375,6 @@ mlxsw_sp_setup_tc_qdisc_red_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_qdisc->prio_bitmap, &stats_base->tx_packets, &stats_base->tx_bytes); - red_base->prob_mark = xstats->ecn; red_base->prob_drop = xstats->wred_drop[tclass_num]; red_base->pdrop = mlxsw_sp_xstats_tail_drop(xstats, tclass_num); @@ -361,7 +388,8 @@ static int mlxsw_sp_qdisc_red_destroy(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) { - struct mlxsw_sp_qdisc *root_qdisc = mlxsw_sp_port->root_qdisc; + struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; + struct mlxsw_sp_qdisc *root_qdisc = &qdisc_state->root_qdisc; if (root_qdisc != mlxsw_sp_qdisc) root_qdisc->stats_base.backlog -= @@ -400,7 +428,7 @@ mlxsw_sp_qdisc_red_check_params(struct mlxsw_sp_port *mlxsw_sp_port, } static int -mlxsw_sp_qdisc_red_replace(struct mlxsw_sp_port *mlxsw_sp_port, +mlxsw_sp_qdisc_red_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params) { @@ -417,8 +445,9 @@ mlxsw_sp_qdisc_red_replace(struct mlxsw_sp_port *mlxsw_sp_port, prob = DIV_ROUND_UP(prob, 1 << 16); min = mlxsw_sp_bytes_cells(mlxsw_sp, p->min); max = mlxsw_sp_bytes_cells(mlxsw_sp, p->max); - return mlxsw_sp_tclass_congestion_enable(mlxsw_sp_port, tclass_num, min, - max, prob, p->is_ecn); + return mlxsw_sp_tclass_congestion_enable(mlxsw_sp_port, tclass_num, + min, max, prob, + !p->is_nodrop, p->is_ecn); } static void @@ -453,22 +482,19 @@ mlxsw_sp_qdisc_get_red_xstats(struct mlxsw_sp_port *mlxsw_sp_port, u8 tclass_num = mlxsw_sp_qdisc->tclass_num; struct mlxsw_sp_port_xstats *xstats; struct red_stats *res = xstats_ptr; - int early_drops, marks, pdrops; + int early_drops, pdrops; xstats = &mlxsw_sp_port->periodic_hw_stats.xstats; early_drops = xstats->wred_drop[tclass_num] - xstats_base->prob_drop; - marks = xstats->ecn - xstats_base->prob_mark; pdrops = mlxsw_sp_xstats_tail_drop(xstats, tclass_num) - xstats_base->pdrop; res->pdrop += pdrops; res->prob_drop += early_drops; - res->prob_mark += marks; xstats_base->pdrop += pdrops; xstats_base->prob_drop += early_drops; - xstats_base->prob_mark += marks; return 0; } @@ -486,8 +512,7 @@ mlxsw_sp_qdisc_get_red_stats(struct mlxsw_sp_port *mlxsw_sp_port, stats_base = &mlxsw_sp_qdisc->stats_base; mlxsw_sp_qdisc_get_tc_stats(mlxsw_sp_port, mlxsw_sp_qdisc, stats_ptr); - overlimits = xstats->wred_drop[tclass_num] + xstats->ecn - - stats_base->overlimits; + overlimits = xstats->wred_drop[tclass_num] - stats_base->overlimits; stats_ptr->qstats->overlimits += overlimits; stats_base->overlimits += overlimits; @@ -564,7 +589,8 @@ static int mlxsw_sp_qdisc_tbf_destroy(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) { - struct mlxsw_sp_qdisc *root_qdisc = mlxsw_sp_port->root_qdisc; + struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; + struct mlxsw_sp_qdisc *root_qdisc = &qdisc_state->root_qdisc; if (root_qdisc != mlxsw_sp_qdisc) root_qdisc->stats_base.backlog -= @@ -651,7 +677,7 @@ mlxsw_sp_qdisc_tbf_check_params(struct mlxsw_sp_port *mlxsw_sp_port, } static int -mlxsw_sp_qdisc_tbf_replace(struct mlxsw_sp_port *mlxsw_sp_port, +mlxsw_sp_qdisc_tbf_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params) { @@ -740,8 +766,121 @@ int mlxsw_sp_setup_tc_tbf(struct mlxsw_sp_port *mlxsw_sp_port, } static int +mlxsw_sp_qdisc_fifo_destroy(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) +{ + struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; + struct mlxsw_sp_qdisc *root_qdisc = &qdisc_state->root_qdisc; + + if (root_qdisc != mlxsw_sp_qdisc) + root_qdisc->stats_base.backlog -= + mlxsw_sp_qdisc->stats_base.backlog; + return 0; +} + +static int +mlxsw_sp_qdisc_fifo_check_params(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) +{ + return 0; +} + +static int +mlxsw_sp_qdisc_fifo_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) +{ + return 0; +} + +static int +mlxsw_sp_qdisc_get_fifo_stats(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct tc_qopt_offload_stats *stats_ptr) +{ + mlxsw_sp_qdisc_get_tc_stats(mlxsw_sp_port, mlxsw_sp_qdisc, + stats_ptr); + return 0; +} + +static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_fifo = { + .type = MLXSW_SP_QDISC_FIFO, + .check_params = mlxsw_sp_qdisc_fifo_check_params, + .replace = mlxsw_sp_qdisc_fifo_replace, + .destroy = mlxsw_sp_qdisc_fifo_destroy, + .get_stats = mlxsw_sp_qdisc_get_fifo_stats, + .clean_stats = mlxsw_sp_setup_tc_qdisc_leaf_clean_stats, +}; + +int mlxsw_sp_setup_tc_fifo(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_fifo_qopt_offload *p) +{ + struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; + int tclass, child_index; + u32 parent_handle; + + /* Invisible FIFOs are tracked in future_handle and future_fifos. Make + * sure that not more than one qdisc is created for a port at a time. + * RTNL is a simple proxy for that. + */ + ASSERT_RTNL(); + + mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, false); + if (!mlxsw_sp_qdisc && p->handle == TC_H_UNSPEC) { + parent_handle = TC_H_MAJ(p->parent); + if (parent_handle != qdisc_state->future_handle) { + /* This notifications is for a different Qdisc than + * previously. Wipe the future cache. + */ + memset(qdisc_state->future_fifos, 0, + sizeof(qdisc_state->future_fifos)); + qdisc_state->future_handle = parent_handle; + } + + child_index = TC_H_MIN(p->parent); + tclass = MLXSW_SP_PRIO_CHILD_TO_TCLASS(child_index); + if (tclass < IEEE_8021QAZ_MAX_TCS) { + if (p->command == TC_FIFO_REPLACE) + qdisc_state->future_fifos[tclass] = true; + else if (p->command == TC_FIFO_DESTROY) + qdisc_state->future_fifos[tclass] = false; + } + } + if (!mlxsw_sp_qdisc) + return -EOPNOTSUPP; + + if (p->command == TC_FIFO_REPLACE) { + return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle, + mlxsw_sp_qdisc, + &mlxsw_sp_qdisc_ops_fifo, NULL); + } + + if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle, + MLXSW_SP_QDISC_FIFO)) + return -EOPNOTSUPP; + + switch (p->command) { + case TC_FIFO_DESTROY: + if (p->handle == mlxsw_sp_qdisc->handle) + return mlxsw_sp_qdisc_destroy(mlxsw_sp_port, + mlxsw_sp_qdisc); + return 0; + case TC_FIFO_STATS: + return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc, + &p->stats); + case TC_FIFO_REPLACE: /* Handled above. */ + break; + } + + return -EOPNOTSUPP; +} + +static int __mlxsw_sp_qdisc_ets_destroy(struct mlxsw_sp_port *mlxsw_sp_port) { + struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; int i; for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { @@ -751,8 +890,8 @@ __mlxsw_sp_qdisc_ets_destroy(struct mlxsw_sp_port *mlxsw_sp_port) MLXSW_REG_QEEC_HR_SUBGROUP, i, 0, false, 0); mlxsw_sp_qdisc_destroy(mlxsw_sp_port, - &mlxsw_sp_port->tclass_qdiscs[i]); - mlxsw_sp_port->tclass_qdiscs[i].prio_bitmap = 0; + &qdisc_state->tclass_qdiscs[i]); + qdisc_state->tclass_qdiscs[i].prio_bitmap = 0; } return 0; @@ -785,12 +924,13 @@ mlxsw_sp_qdisc_prio_check_params(struct mlxsw_sp_port *mlxsw_sp_port, } static int -__mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, +__mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, unsigned int nbands, const unsigned int *quanta, const unsigned int *weights, const u8 *priomap) { + struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; struct mlxsw_sp_qdisc *child_qdisc; int tclass, i, band, backlog; u8 old_priomap; @@ -798,7 +938,7 @@ __mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, for (band = 0; band < nbands; band++) { tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band); - child_qdisc = &mlxsw_sp_port->tclass_qdiscs[tclass]; + child_qdisc = &qdisc_state->tclass_qdiscs[tclass]; old_priomap = child_qdisc->prio_bitmap; child_qdisc->prio_bitmap = 0; @@ -827,28 +967,41 @@ __mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, child_qdisc); child_qdisc->stats_base.backlog = backlog; } + + if (handle == qdisc_state->future_handle && + qdisc_state->future_fifos[tclass]) { + err = mlxsw_sp_qdisc_replace(mlxsw_sp_port, TC_H_UNSPEC, + child_qdisc, + &mlxsw_sp_qdisc_ops_fifo, + NULL); + if (err) + return err; + } } for (; band < IEEE_8021QAZ_MAX_TCS; band++) { tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band); - child_qdisc = &mlxsw_sp_port->tclass_qdiscs[tclass]; + child_qdisc = &qdisc_state->tclass_qdiscs[tclass]; child_qdisc->prio_bitmap = 0; mlxsw_sp_qdisc_destroy(mlxsw_sp_port, child_qdisc); mlxsw_sp_port_ets_set(mlxsw_sp_port, MLXSW_REG_QEEC_HR_SUBGROUP, tclass, 0, false, 0); } + + qdisc_state->future_handle = TC_H_UNSPEC; + memset(qdisc_state->future_fifos, 0, sizeof(qdisc_state->future_fifos)); return 0; } static int -mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port, +mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params) { struct tc_prio_qopt_offload_params *p = params; unsigned int zeroes[TCQ_ETS_MAX_BANDS] = {0}; - return __mlxsw_sp_qdisc_ets_replace(mlxsw_sp_port, p->bands, + return __mlxsw_sp_qdisc_ets_replace(mlxsw_sp_port, handle, p->bands, zeroes, zeroes, p->priomap); } @@ -880,6 +1033,7 @@ mlxsw_sp_qdisc_get_prio_stats(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, struct tc_qopt_offload_stats *stats_ptr) { + struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; struct mlxsw_sp_qdisc *tc_qdisc; u64 tx_packets = 0; u64 tx_bytes = 0; @@ -888,7 +1042,7 @@ mlxsw_sp_qdisc_get_prio_stats(struct mlxsw_sp_port *mlxsw_sp_port, int i; for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { - tc_qdisc = &mlxsw_sp_port->tclass_qdiscs[i]; + tc_qdisc = &qdisc_state->tclass_qdiscs[i]; mlxsw_sp_qdisc_collect_tc_stats(mlxsw_sp_port, tc_qdisc, &tx_bytes, &tx_packets, &drops, &backlog); @@ -946,13 +1100,13 @@ mlxsw_sp_qdisc_ets_check_params(struct mlxsw_sp_port *mlxsw_sp_port, } static int -mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, +mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params) { struct tc_ets_qopt_offload_replace_params *p = params; - return __mlxsw_sp_qdisc_ets_replace(mlxsw_sp_port, p->bands, + return __mlxsw_sp_qdisc_ets_replace(mlxsw_sp_port, handle, p->bands, p->quanta, p->weights, p->priomap); } @@ -1014,11 +1168,12 @@ __mlxsw_sp_qdisc_ets_graft(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, u8 band, u32 child_handle) { + struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; int tclass_num = MLXSW_SP_PRIO_BAND_TO_TCLASS(band); struct mlxsw_sp_qdisc *old_qdisc; if (band < IEEE_8021QAZ_MAX_TCS && - mlxsw_sp_port->tclass_qdiscs[tclass_num].handle == child_handle) + qdisc_state->tclass_qdiscs[tclass_num].handle == child_handle) return 0; if (!child_handle) { @@ -1037,7 +1192,7 @@ __mlxsw_sp_qdisc_ets_graft(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_qdisc_destroy(mlxsw_sp_port, old_qdisc); mlxsw_sp_qdisc_destroy(mlxsw_sp_port, - &mlxsw_sp_port->tclass_qdiscs[tclass_num]); + &qdisc_state->tclass_qdiscs[tclass_num]); return -EOPNOTSUPP; } @@ -1119,37 +1274,23 @@ int mlxsw_sp_setup_tc_ets(struct mlxsw_sp_port *mlxsw_sp_port, int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port) { - struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; + struct mlxsw_sp_qdisc_state *qdisc_state; int i; - mlxsw_sp_qdisc = kzalloc(sizeof(*mlxsw_sp_qdisc), GFP_KERNEL); - if (!mlxsw_sp_qdisc) - goto err_root_qdisc_init; - - mlxsw_sp_port->root_qdisc = mlxsw_sp_qdisc; - mlxsw_sp_port->root_qdisc->prio_bitmap = 0xff; - mlxsw_sp_port->root_qdisc->tclass_num = MLXSW_SP_PORT_DEFAULT_TCLASS; + qdisc_state = kzalloc(sizeof(*qdisc_state), GFP_KERNEL); + if (!qdisc_state) + return -ENOMEM; - mlxsw_sp_qdisc = kcalloc(IEEE_8021QAZ_MAX_TCS, - sizeof(*mlxsw_sp_qdisc), - GFP_KERNEL); - if (!mlxsw_sp_qdisc) - goto err_tclass_qdiscs_init; - - mlxsw_sp_port->tclass_qdiscs = mlxsw_sp_qdisc; + qdisc_state->root_qdisc.prio_bitmap = 0xff; + qdisc_state->root_qdisc.tclass_num = MLXSW_SP_PORT_DEFAULT_TCLASS; for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) - mlxsw_sp_port->tclass_qdiscs[i].tclass_num = i; + qdisc_state->tclass_qdiscs[i].tclass_num = i; + mlxsw_sp_port->qdisc = qdisc_state; return 0; - -err_tclass_qdiscs_init: - kfree(mlxsw_sp_port->root_qdisc); -err_root_qdisc_init: - return -ENOMEM; } void mlxsw_sp_tc_qdisc_fini(struct mlxsw_sp_port *mlxsw_sp_port) { - kfree(mlxsw_sp_port->tclass_qdiscs); - kfree(mlxsw_sp_port->root_qdisc); + kfree(mlxsw_sp_port->qdisc); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 4a77b511ead2..d5bca1be3ef5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -17,6 +17,7 @@ #include <linux/refcount.h> #include <linux/jhash.h> #include <linux/net_namespace.h> +#include <linux/mutex.h> #include <net/netevent.h> #include <net/neighbour.h> #include <net/arp.h> @@ -48,39 +49,6 @@ struct mlxsw_sp_vr; struct mlxsw_sp_lpm_tree; struct mlxsw_sp_rif_ops; -struct mlxsw_sp_router { - struct mlxsw_sp *mlxsw_sp; - struct mlxsw_sp_rif **rifs; - struct mlxsw_sp_vr *vrs; - struct rhashtable neigh_ht; - struct rhashtable nexthop_group_ht; - struct rhashtable nexthop_ht; - struct list_head nexthop_list; - struct { - /* One tree for each protocol: IPv4 and IPv6 */ - struct mlxsw_sp_lpm_tree *proto_trees[2]; - struct mlxsw_sp_lpm_tree *trees; - unsigned int tree_count; - } lpm; - struct { - struct delayed_work dw; - unsigned long interval; /* ms */ - } neighs_update; - struct delayed_work nexthop_probe_dw; -#define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */ - struct list_head nexthop_neighs_list; - struct list_head ipip_list; - bool aborted; - struct notifier_block fib_nb; - struct notifier_block netevent_nb; - struct notifier_block inetaddr_nb; - struct notifier_block inet6addr_nb; - const struct mlxsw_sp_rif_ops **rif_ops_arr; - const struct mlxsw_sp_ipip_ops **ipip_ops_arr; - u32 adj_discard_index; - bool adj_discard_index_valid; -}; - struct mlxsw_sp_rif { struct list_head nexthop_list; struct list_head neigh_list; @@ -145,6 +113,9 @@ struct mlxsw_sp_rif_ops { void (*fdb_del)(struct mlxsw_sp_rif *rif, const char *mac); }; +static struct mlxsw_sp_rif * +mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev); static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif); static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree); static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp, @@ -760,13 +731,18 @@ int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id, u16 *vr_id) { struct mlxsw_sp_vr *vr; + int err = 0; + mutex_lock(&mlxsw_sp->router->lock); vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id); - if (!vr) - return -ESRCH; + if (!vr) { + err = -ESRCH; + goto out; + } *vr_id = vr->id; - - return 0; +out: + mutex_unlock(&mlxsw_sp->router->lock); + return err; } static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr, @@ -988,17 +964,23 @@ __mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev) struct ip_tunnel *tun = netdev_priv(ol_dev); struct net *net = dev_net(ol_dev); - return __dev_get_by_index(net, tun->parms.link); + return dev_get_by_index_rcu(net, tun->parms.link); } u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev) { - struct net_device *d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev); + struct net_device *d; + u32 tb_id; + rcu_read_lock(); + d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev); if (d) - return l3mdev_fib_table(d) ? : RT_TABLE_MAIN; + tb_id = l3mdev_fib_table(d) ? : RT_TABLE_MAIN; else - return RT_TABLE_MAIN; + tb_id = RT_TABLE_MAIN; + rcu_read_unlock(); + + return tb_id; } static struct mlxsw_sp_rif * @@ -1230,7 +1212,7 @@ mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp, saddr_len = 4; saddr_prefix_len = 32; break; - case MLXSW_SP_L3_PROTO_IPV6: + default: WARN_ON(1); return NULL; } @@ -1355,8 +1337,12 @@ mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp, ipip_list_node); list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list, ipip_list_node) { - struct net_device *ipip_ul_dev = - __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev); + struct net_device *ol_dev = ipip_entry->ol_dev; + struct net_device *ipip_ul_dev; + + rcu_read_lock(); + ipip_ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev); + rcu_read_unlock(); if (ipip_ul_dev == ul_dev) return ipip_entry; @@ -1365,10 +1351,16 @@ mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp, return NULL; } -bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp *mlxsw_sp, +bool mlxsw_sp_netdev_is_ipip_ul(struct mlxsw_sp *mlxsw_sp, const struct net_device *dev) { - return mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL); + bool is_ipip_ul; + + mutex_lock(&mlxsw_sp->router->lock); + is_ipip_ul = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL); + mutex_unlock(&mlxsw_sp->router->lock); + + return is_ipip_ul; } static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp, @@ -1388,9 +1380,9 @@ static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp, struct net_device *ol_dev) { + enum mlxsw_sp_ipip_type ipipt = MLXSW_SP_IPIP_TYPE_MAX; struct mlxsw_sp_ipip_entry *ipip_entry; enum mlxsw_sp_l3proto ul_proto; - enum mlxsw_sp_ipip_type ipipt; union mlxsw_sp_l3addr saddr; u32 ul_tb_id; @@ -1543,13 +1535,17 @@ static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif); /** - * Update the offload related to an IPIP entry. This always updates decap, and - * in addition to that it also: - * @recreate_loopback: recreates the associated loopback RIF - * @keep_encap: updates next hops that use the tunnel netdevice. This is only + * __mlxsw_sp_ipip_entry_update_tunnel - Update offload related to IPIP entry. + * @mlxsw_sp: mlxsw_sp. + * @ipip_entry: IPIP entry. + * @recreate_loopback: Recreates the associated loopback RIF. + * @keep_encap: Updates next hops that use the tunnel netdevice. This is only * relevant when recreate_loopback is true. - * @update_nexthops: updates next hops, keeping the current loopback RIF. This + * @update_nexthops: Updates next hops, keeping the current loopback RIF. This * is only relevant when recreate_loopback is false. + * @extack: extack. + * + * Return: Non-zero value on failure. */ int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_ipip_entry *ipip_entry, @@ -1722,9 +1718,12 @@ static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp, list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list, ipip_list_node) { - struct net_device *ipip_ul_dev = - __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev); + struct net_device *ol_dev = ipip_entry->ol_dev; + struct net_device *ipip_ul_dev; + rcu_read_lock(); + ipip_ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev); + rcu_read_unlock(); if (ipip_ul_dev == ul_dev) mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry); } @@ -1737,35 +1736,41 @@ int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp, { struct netdev_notifier_changeupper_info *chup; struct netlink_ext_ack *extack; + int err = 0; + mutex_lock(&mlxsw_sp->router->lock); switch (event) { case NETDEV_REGISTER: - return mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev); + err = mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev); + break; case NETDEV_UNREGISTER: mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev); - return 0; + break; case NETDEV_UP: mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev); - return 0; + break; case NETDEV_DOWN: mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev); - return 0; + break; case NETDEV_CHANGEUPPER: chup = container_of(info, typeof(*chup), info); extack = info->extack; if (netif_is_l3_master(chup->upper_dev)) - return mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp, - ol_dev, - extack); - return 0; + err = mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp, + ol_dev, + extack); + break; case NETDEV_CHANGE: extack = info->extack; - return mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp, - ol_dev, extack); + err = mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp, + ol_dev, extack); + break; case NETDEV_CHANGEMTU: - return mlxsw_sp_netdevice_ipip_ol_update_mtu(mlxsw_sp, ol_dev); + err = mlxsw_sp_netdevice_ipip_ol_update_mtu(mlxsw_sp, ol_dev); + break; } - return 0; + mutex_unlock(&mlxsw_sp->router->lock); + return err; } static int @@ -1809,8 +1814,9 @@ mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp, struct netdev_notifier_info *info) { struct mlxsw_sp_ipip_entry *ipip_entry = NULL; - int err; + int err = 0; + mutex_lock(&mlxsw_sp->router->lock); while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, ul_dev, ipip_entry))) { @@ -1823,7 +1829,7 @@ mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp, if (err) { mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp, ul_dev); - return err; + break; } if (demote_this) { @@ -1840,8 +1846,9 @@ mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp, ipip_entry = prev; } } + mutex_unlock(&mlxsw_sp->router->lock); - return 0; + return err; } int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, @@ -1850,8 +1857,22 @@ int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, u32 tunnel_index) { enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; + struct mlxsw_sp_router *router = mlxsw_sp->router; struct mlxsw_sp_fib_entry *fib_entry; - int err; + int err = 0; + + mutex_lock(&mlxsw_sp->router->lock); + + if (WARN_ON_ONCE(router->nve_decap_config.valid)) { + err = -EINVAL; + goto out; + } + + router->nve_decap_config.ul_tb_id = ul_tb_id; + router->nve_decap_config.tunnel_index = tunnel_index; + router->nve_decap_config.ul_proto = ul_proto; + router->nve_decap_config.ul_sip = *ul_sip; + router->nve_decap_config.valid = true; /* It is valid to create a tunnel with a local IP and only later * assign this IP address to a local interface @@ -1860,7 +1881,7 @@ int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, ul_proto, ul_sip, type); if (!fib_entry) - return 0; + goto out; fib_entry->decap.tunnel_index = tunnel_index; fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP; @@ -1869,11 +1890,13 @@ int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, if (err) goto err_fib_entry_update; - return 0; + goto out; err_fib_entry_update: fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); +out: + mutex_unlock(&mlxsw_sp->router->lock); return err; } @@ -1882,16 +1905,40 @@ void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, const union mlxsw_sp_l3addr *ul_sip) { enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP; + struct mlxsw_sp_router *router = mlxsw_sp->router; struct mlxsw_sp_fib_entry *fib_entry; + mutex_lock(&mlxsw_sp->router->lock); + + if (WARN_ON_ONCE(!router->nve_decap_config.valid)) + goto out; + + router->nve_decap_config.valid = false; + fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id, ul_proto, ul_sip, type); if (!fib_entry) - return; + goto out; fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); +out: + mutex_unlock(&mlxsw_sp->router->lock); +} + +static bool mlxsw_sp_router_nve_is_decap(struct mlxsw_sp *mlxsw_sp, + u32 ul_tb_id, + enum mlxsw_sp_l3proto ul_proto, + const union mlxsw_sp_l3addr *ul_sip) +{ + struct mlxsw_sp_router *router = mlxsw_sp->router; + + return router->nve_decap_config.valid && + router->nve_decap_config.ul_tb_id == ul_tb_id && + router->nve_decap_config.ul_proto == ul_proto && + !memcmp(&router->nve_decap_config.ul_sip, ul_sip, + sizeof(*ul_sip)); } struct mlxsw_sp_neigh_key { @@ -2264,10 +2311,8 @@ __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp, int i, num_rec; int err; - /* Make sure the neighbour's netdev isn't removed in the - * process. - */ - rtnl_lock(); + /* Ensure the RIF we read from the device does not change mid-dump. */ + mutex_lock(&mlxsw_sp->router->lock); do { mlxsw_reg_rauhtd_pack(rauhtd_pl, type); err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd), @@ -2281,7 +2326,7 @@ __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl, i); } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl)); - rtnl_unlock(); + mutex_unlock(&mlxsw_sp->router->lock); return err; } @@ -2312,15 +2357,14 @@ static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp) { struct mlxsw_sp_neigh_entry *neigh_entry; - /* Take RTNL mutex here to prevent lists from changes */ - rtnl_lock(); + mutex_lock(&mlxsw_sp->router->lock); list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list, nexthop_neighs_list_node) /* If this neigh have nexthops, make the kernel think this neigh * is active regardless of the traffic. */ neigh_event_send(neigh_entry->key.n, NULL); - rtnl_unlock(); + mutex_unlock(&mlxsw_sp->router->lock); } static void @@ -2360,15 +2404,13 @@ static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work) * the nexthop wouldn't get offloaded until the neighbor is resolved * but it wouldn't get resolved ever in case traffic is flowing in HW * using different nexthop. - * - * Take RTNL mutex here to prevent lists from changes. */ - rtnl_lock(); + mutex_lock(&router->lock); list_for_each_entry(neigh_entry, &router->nexthop_neighs_list, nexthop_neighs_list_node) if (!neigh_entry->connected) neigh_event_send(neigh_entry->key.n, NULL); - rtnl_unlock(); + mutex_unlock(&router->lock); mlxsw_core_schedule_dw(&router->nexthop_probe_dw, MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL); @@ -2506,7 +2548,7 @@ static void mlxsw_sp_router_neigh_event_work(struct work_struct *work) dead = n->dead; read_unlock_bh(&n->lock); - rtnl_lock(); + mutex_lock(&mlxsw_sp->router->lock); mlxsw_sp_span_respin(mlxsw_sp); entry_connected = nud_state & NUD_VALID && !dead; @@ -2528,7 +2570,7 @@ static void mlxsw_sp_router_neigh_event_work(struct work_struct *work) mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry); out: - rtnl_unlock(); + mutex_unlock(&mlxsw_sp->router->lock); neigh_release(n); kfree(net_work); } @@ -3189,7 +3231,6 @@ mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index = nh_grp->adj_index; /* base */ struct mlxsw_sp_nexthop *nh; int i; - int err; for (i = 0; i < nh_grp->count; i++) { nh = &nh_grp->nexthops[i]; @@ -3200,6 +3241,8 @@ mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp, } if (nh->update || reallocate) { + int err = 0; + switch (nh->type) { case MLXSW_SP_NEXTHOP_TYPE_ETH: err = mlxsw_sp_nexthop_update @@ -3711,9 +3754,15 @@ static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp, static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev) { - struct net_device *ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev); + struct net_device *ul_dev; + bool is_up; + + rcu_read_lock(); + ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev); + is_up = ul_dev ? (ul_dev->flags & IFF_UP) : true; + rcu_read_unlock(); - return ul_dev ? (ul_dev->flags & IFF_UP) : true; + return is_up; } static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp, @@ -3840,10 +3889,14 @@ static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp, if (!dev) return 0; - in_dev = __in_dev_get_rtnl(dev); + rcu_read_lock(); + in_dev = __in_dev_get_rcu(dev); if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && - fib_nh->fib_nh_flags & RTNH_F_LINKDOWN) + fib_nh->fib_nh_flags & RTNH_F_LINKDOWN) { + rcu_read_unlock(); return 0; + } + rcu_read_unlock(); err = mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh); if (err) @@ -4473,6 +4526,7 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp, { struct net_device *dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev; union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) }; + struct mlxsw_sp_router *router = mlxsw_sp->router; u32 tb_id = mlxsw_sp_fix_tb_id(fen_info->tb_id); struct mlxsw_sp_ipip_entry *ipip_entry; struct fib_info *fi = fen_info->fi; @@ -4487,12 +4541,13 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp, fib_entry, ipip_entry); } - if (mlxsw_sp_nve_ipv4_route_is_decap(mlxsw_sp, tb_id, - dip.addr4)) { - u32 t_index; + if (mlxsw_sp_router_nve_is_decap(mlxsw_sp, tb_id, + MLXSW_SP_L3_PROTO_IPV4, + &dip)) { + u32 tunnel_index; - t_index = mlxsw_sp_nve_decap_tunnel_index_get(mlxsw_sp); - fib_entry->decap.tunnel_index = t_index; + tunnel_index = router->nve_decap_config.tunnel_index; + fib_entry->decap.tunnel_index = tunnel_index; fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP; return 0; } @@ -5923,8 +5978,7 @@ static void mlxsw_sp_router_fib4_event_work(struct work_struct *work) struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; int err; - /* Protect internal structures from changes */ - rtnl_lock(); + mutex_lock(&mlxsw_sp->router->lock); mlxsw_sp_span_respin(mlxsw_sp); switch (fib_work->event) { @@ -5946,7 +6000,7 @@ static void mlxsw_sp_router_fib4_event_work(struct work_struct *work) fib_info_put(fib_work->fnh_info.fib_nh->nh_parent); break; } - rtnl_unlock(); + mutex_unlock(&mlxsw_sp->router->lock); kfree(fib_work); } @@ -5957,7 +6011,7 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work) struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; int err; - rtnl_lock(); + mutex_lock(&mlxsw_sp->router->lock); mlxsw_sp_span_respin(mlxsw_sp); switch (fib_work->event) { @@ -5984,7 +6038,7 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work) mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work); break; } - rtnl_unlock(); + mutex_unlock(&mlxsw_sp->router->lock); kfree(fib_work); } @@ -5997,6 +6051,7 @@ static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work) int err; rtnl_lock(); + mutex_lock(&mlxsw_sp->router->lock); switch (fib_work->event) { case FIB_EVENT_ENTRY_REPLACE: /* fall through */ case FIB_EVENT_ENTRY_ADD: @@ -6025,6 +6080,7 @@ static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work) dev_put(fib_work->ven_info.dev); break; } + mutex_unlock(&mlxsw_sp->router->lock); rtnl_unlock(); kfree(fib_work); } @@ -6233,7 +6289,7 @@ err_fib_event: return NOTIFY_BAD; } -struct mlxsw_sp_rif * +static struct mlxsw_sp_rif * mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp, const struct net_device *dev) { @@ -6247,6 +6303,41 @@ mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp, return NULL; } +bool mlxsw_sp_rif_exists(struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev) +{ + struct mlxsw_sp_rif *rif; + + mutex_lock(&mlxsw_sp->router->lock); + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + mutex_unlock(&mlxsw_sp->router->lock); + + return rif; +} + +u16 mlxsw_sp_rif_vid(struct mlxsw_sp *mlxsw_sp, const struct net_device *dev) +{ + struct mlxsw_sp_rif *rif; + u16 vid = 0; + + mutex_lock(&mlxsw_sp->router->lock); + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (!rif) + goto out; + + /* We only return the VID for VLAN RIFs. Otherwise we return an + * invalid value (0). + */ + if (rif->ops->type != MLXSW_SP_RIF_TYPE_VLAN) + goto out; + + vid = mlxsw_sp_fid_8021q_vid(rif->fid); + +out: + mutex_unlock(&mlxsw_sp->router->lock); + return vid; +} + static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif) { char ritr_pl[MLXSW_REG_RITR_LEN]; @@ -6281,7 +6372,8 @@ mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev, case NETDEV_UP: return rif == NULL; case NETDEV_DOWN: - idev = __in_dev_get_rtnl(dev); + rcu_read_lock(); + idev = __in_dev_get_rcu(dev); if (idev && idev->ifa_list) addr_list_empty = false; @@ -6289,6 +6381,7 @@ mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev, if (addr_list_empty && inet6_dev && !list_empty(&inet6_dev->addr_list)) addr_list_empty = false; + rcu_read_unlock(); /* macvlans do not have a RIF, but rather piggy back on the * RIF of their lower device. @@ -6411,11 +6504,6 @@ const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif) return rif->dev; } -struct mlxsw_sp_fid *mlxsw_sp_rif_fid(const struct mlxsw_sp_rif *rif) -{ - return rif->fid; -} - static struct mlxsw_sp_rif * mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_rif_params *params, @@ -6528,10 +6616,13 @@ void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_rif *rif; + mutex_lock(&mlxsw_sp->router->lock); rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); if (!rif) - return; + goto out; mlxsw_sp_rif_destroy(rif); +out: + mutex_unlock(&mlxsw_sp->router->lock); } static void @@ -6631,8 +6722,8 @@ err_fid_port_vid_map: return err; } -void -mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan) +static void +__mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan) { struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port; struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid; @@ -6650,6 +6741,16 @@ mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan) mlxsw_sp_rif_subport_put(rif); } +void +mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port_vlan->mlxsw_sp_port->mlxsw_sp; + + mutex_lock(&mlxsw_sp->router->lock); + __mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan); + mutex_unlock(&mlxsw_sp->router->lock); +} + static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev, struct net_device *port_dev, unsigned long event, u16 vid, @@ -6667,7 +6768,7 @@ static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev, return mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan, l3_dev, extack); case NETDEV_DOWN: - mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan); + __mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan); break; } @@ -6848,8 +6949,8 @@ err_rif_vrrp_add: return err; } -void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp, - const struct net_device *macvlan_dev) +static void __mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp, + const struct net_device *macvlan_dev) { struct macvlan_dev *vlan = netdev_priv(macvlan_dev); struct mlxsw_sp_rif *rif; @@ -6866,6 +6967,14 @@ void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_fid_index(rif->fid), false); } +void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp, + const struct net_device *macvlan_dev) +{ + mutex_lock(&mlxsw_sp->router->lock); + __mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev); + mutex_unlock(&mlxsw_sp->router->lock); +} + static int mlxsw_sp_inetaddr_macvlan_event(struct mlxsw_sp *mlxsw_sp, struct net_device *macvlan_dev, unsigned long event, @@ -6875,7 +6984,7 @@ static int mlxsw_sp_inetaddr_macvlan_event(struct mlxsw_sp *mlxsw_sp, case NETDEV_UP: return mlxsw_sp_rif_macvlan_add(mlxsw_sp, macvlan_dev, extack); case NETDEV_DOWN: - mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev); + __mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev); break; } @@ -6945,15 +7054,17 @@ static int mlxsw_sp_inetaddr_event(struct notifier_block *nb, /* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */ if (event == NETDEV_UP) - goto out; + return NOTIFY_DONE; router = container_of(nb, struct mlxsw_sp_router, inetaddr_nb); + mutex_lock(&router->lock); rif = mlxsw_sp_rif_find_by_dev(router->mlxsw_sp, dev); if (!mlxsw_sp_rif_should_config(rif, dev, event)) goto out; err = __mlxsw_sp_inetaddr_event(router->mlxsw_sp, dev, event, NULL); out: + mutex_unlock(&router->lock); return notifier_from_errno(err); } @@ -6968,8 +7079,9 @@ int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused, mlxsw_sp = mlxsw_sp_lower_get(dev); if (!mlxsw_sp) - goto out; + return NOTIFY_DONE; + mutex_lock(&mlxsw_sp->router->lock); rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); if (!mlxsw_sp_rif_should_config(rif, dev, event)) goto out; @@ -6981,6 +7093,7 @@ int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused, err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, ivi->extack); out: + mutex_unlock(&mlxsw_sp->router->lock); return notifier_from_errno(err); } @@ -7001,6 +7114,7 @@ static void mlxsw_sp_inet6addr_event_work(struct work_struct *work) struct mlxsw_sp_rif *rif; rtnl_lock(); + mutex_lock(&mlxsw_sp->router->lock); rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); if (!mlxsw_sp_rif_should_config(rif, dev, event)) @@ -7008,6 +7122,7 @@ static void mlxsw_sp_inet6addr_event_work(struct work_struct *work) __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, NULL); out: + mutex_unlock(&mlxsw_sp->router->lock); rtnl_unlock(); dev_put(dev); kfree(inet6addr_work); @@ -7052,8 +7167,9 @@ int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused, mlxsw_sp = mlxsw_sp_lower_get(dev); if (!mlxsw_sp) - goto out; + return NOTIFY_DONE; + mutex_lock(&mlxsw_sp->router->lock); rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); if (!mlxsw_sp_rif_should_config(rif, dev, event)) goto out; @@ -7065,6 +7181,7 @@ int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused, err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, i6vi->extack); out: + mutex_unlock(&mlxsw_sp->router->lock); return notifier_from_errno(err); } @@ -7151,24 +7268,30 @@ int mlxsw_sp_netdevice_router_port_event(struct net_device *dev, { struct mlxsw_sp *mlxsw_sp; struct mlxsw_sp_rif *rif; + int err = 0; mlxsw_sp = mlxsw_sp_lower_get(dev); if (!mlxsw_sp) return 0; + mutex_lock(&mlxsw_sp->router->lock); rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); if (!rif) - return 0; + goto out; switch (event) { case NETDEV_CHANGEMTU: /* fall through */ case NETDEV_CHANGEADDR: - return mlxsw_sp_router_port_change_event(mlxsw_sp, rif); + err = mlxsw_sp_router_port_change_event(mlxsw_sp, rif); + break; case NETDEV_PRE_CHANGEADDR: - return mlxsw_sp_router_port_pre_changeaddr_event(rif, ptr); + err = mlxsw_sp_router_port_pre_changeaddr_event(rif, ptr); + break; } - return 0; +out: + mutex_unlock(&mlxsw_sp->router->lock); + return err; } static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp, @@ -7211,9 +7334,10 @@ int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, if (!mlxsw_sp || netif_is_macvlan(l3_dev)) return 0; + mutex_lock(&mlxsw_sp->router->lock); switch (event) { case NETDEV_PRECHANGEUPPER: - return 0; + break; case NETDEV_CHANGEUPPER: if (info->linking) { struct netlink_ext_ack *extack; @@ -7225,6 +7349,7 @@ int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, } break; } + mutex_unlock(&mlxsw_sp->router->lock); return err; } @@ -7351,13 +7476,14 @@ u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp) return mlxsw_core_max_ports(mlxsw_sp->core) + 1; } -static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif) +static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif) { struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; - u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid); + u16 fid_index = mlxsw_sp_fid_index(rif->fid); int err; - err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, true); + err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, + true); if (err) return err; @@ -7386,13 +7512,13 @@ err_fid_bc_flood_set: mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, mlxsw_sp_router_port(mlxsw_sp), false); err_fid_mc_flood_set: - mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false); + mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false); return err; } -static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif) +static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif) { - u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid); + u16 fid_index = mlxsw_sp_fid_index(rif->fid); struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; struct mlxsw_sp_fid *fid = rif->fid; @@ -7404,10 +7530,41 @@ static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif) mlxsw_sp_router_port(mlxsw_sp), false); mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, mlxsw_sp_router_port(mlxsw_sp), false); - mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false); + mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false); } static struct mlxsw_sp_fid * +mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif, + struct netlink_ext_ack *extack) +{ + return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif->dev->ifindex); +} + +static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac) +{ + struct switchdev_notifier_fdb_info info; + struct net_device *dev; + + dev = br_fdb_find_port(rif->dev, mac, 0); + if (!dev) + return; + + info.addr = mac; + info.vid = 0; + call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info, + NULL); +} + +static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = { + .type = MLXSW_SP_RIF_TYPE_FID, + .rif_size = sizeof(struct mlxsw_sp_rif), + .configure = mlxsw_sp_rif_fid_configure, + .deconfigure = mlxsw_sp_rif_fid_deconfigure, + .fid_get = mlxsw_sp_rif_fid_fid_get, + .fdb_del = mlxsw_sp_rif_fid_fdb_del, +}; + +static struct mlxsw_sp_fid * mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif, struct netlink_ext_ack *extack) { @@ -7428,7 +7585,7 @@ mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif, } } - return mlxsw_sp_bridge_fid_get(rif->mlxsw_sp, br_dev, vid, extack); + return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, vid); } static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac) @@ -7449,103 +7606,6 @@ static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac) NULL); } -static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_ops = { - .type = MLXSW_SP_RIF_TYPE_VLAN, - .rif_size = sizeof(struct mlxsw_sp_rif), - .configure = mlxsw_sp_rif_vlan_configure, - .deconfigure = mlxsw_sp_rif_vlan_deconfigure, - .fid_get = mlxsw_sp_rif_vlan_fid_get, - .fdb_del = mlxsw_sp_rif_vlan_fdb_del, -}; - -static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif) -{ - struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; - u16 fid_index = mlxsw_sp_fid_index(rif->fid); - int err; - - err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, - true); - if (err) - return err; - - err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, - mlxsw_sp_router_port(mlxsw_sp), true); - if (err) - goto err_fid_mc_flood_set; - - err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, - mlxsw_sp_router_port(mlxsw_sp), true); - if (err) - goto err_fid_bc_flood_set; - - err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, - mlxsw_sp_fid_index(rif->fid), true); - if (err) - goto err_rif_fdb_op; - - mlxsw_sp_fid_rif_set(rif->fid, rif); - return 0; - -err_rif_fdb_op: - mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, - mlxsw_sp_router_port(mlxsw_sp), false); -err_fid_bc_flood_set: - mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, - mlxsw_sp_router_port(mlxsw_sp), false); -err_fid_mc_flood_set: - mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false); - return err; -} - -static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif) -{ - u16 fid_index = mlxsw_sp_fid_index(rif->fid); - struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; - struct mlxsw_sp_fid *fid = rif->fid; - - mlxsw_sp_fid_rif_set(fid, NULL); - mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, - mlxsw_sp_fid_index(fid), false); - mlxsw_sp_rif_macvlan_flush(rif); - mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, - mlxsw_sp_router_port(mlxsw_sp), false); - mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, - mlxsw_sp_router_port(mlxsw_sp), false); - mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false); -} - -static struct mlxsw_sp_fid * -mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif, - struct netlink_ext_ack *extack) -{ - return mlxsw_sp_bridge_fid_get(rif->mlxsw_sp, rif->dev, 0, extack); -} - -static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac) -{ - struct switchdev_notifier_fdb_info info; - struct net_device *dev; - - dev = br_fdb_find_port(rif->dev, mac, 0); - if (!dev) - return; - - info.addr = mac; - info.vid = 0; - call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info, - NULL); -} - -static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = { - .type = MLXSW_SP_RIF_TYPE_FID, - .rif_size = sizeof(struct mlxsw_sp_rif), - .configure = mlxsw_sp_rif_fid_configure, - .deconfigure = mlxsw_sp_rif_fid_deconfigure, - .fid_get = mlxsw_sp_rif_fid_fid_get, - .fdb_del = mlxsw_sp_rif_fid_fdb_del, -}; - static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_emu_ops = { .type = MLXSW_SP_RIF_TYPE_VLAN, .rif_size = sizeof(struct mlxsw_sp_rif), @@ -7733,28 +7793,32 @@ int mlxsw_sp_router_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, u16 *ul_rif_index) { struct mlxsw_sp_rif *ul_rif; + int err = 0; - ASSERT_RTNL(); - + mutex_lock(&mlxsw_sp->router->lock); ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL); - if (IS_ERR(ul_rif)) - return PTR_ERR(ul_rif); + if (IS_ERR(ul_rif)) { + err = PTR_ERR(ul_rif); + goto out; + } *ul_rif_index = ul_rif->rif_index; - - return 0; +out: + mutex_unlock(&mlxsw_sp->router->lock); + return err; } void mlxsw_sp_router_ul_rif_put(struct mlxsw_sp *mlxsw_sp, u16 ul_rif_index) { struct mlxsw_sp_rif *ul_rif; - ASSERT_RTNL(); - + mutex_lock(&mlxsw_sp->router->lock); ul_rif = mlxsw_sp->router->rifs[ul_rif_index]; if (WARN_ON(!ul_rif)) - return; + goto out; mlxsw_sp_ul_rif_put(ul_rif); +out: + mutex_unlock(&mlxsw_sp->router->lock); } static int @@ -8004,6 +8068,7 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL); if (!router) return -ENOMEM; + mutex_init(&router->lock); mlxsw_sp->router = router; router->mlxsw_sp = mlxsw_sp; @@ -8107,6 +8172,7 @@ err_router_init: err_register_inet6addr_notifier: unregister_inetaddr_notifier(&router->inetaddr_nb); err_register_inetaddr_notifier: + mutex_destroy(&mlxsw_sp->router->lock); kfree(mlxsw_sp->router); return err; } @@ -8127,5 +8193,6 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) __mlxsw_sp_router_fini(mlxsw_sp); unregister_inet6addr_notifier(&mlxsw_sp->router->inet6addr_nb); unregister_inetaddr_notifier(&mlxsw_sp->router->inetaddr_nb); + mutex_destroy(&mlxsw_sp->router->lock); kfree(mlxsw_sp->router); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index c9b94f435cdd..8418dc3ae967 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -7,6 +7,49 @@ #include "spectrum.h" #include "reg.h" +struct mlxsw_sp_router_nve_decap { + u32 ul_tb_id; + u32 tunnel_index; + enum mlxsw_sp_l3proto ul_proto; + union mlxsw_sp_l3addr ul_sip; + u8 valid:1; +}; + +struct mlxsw_sp_router { + struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_rif **rifs; + struct mlxsw_sp_vr *vrs; + struct rhashtable neigh_ht; + struct rhashtable nexthop_group_ht; + struct rhashtable nexthop_ht; + struct list_head nexthop_list; + struct { + /* One tree for each protocol: IPv4 and IPv6 */ + struct mlxsw_sp_lpm_tree *proto_trees[2]; + struct mlxsw_sp_lpm_tree *trees; + unsigned int tree_count; + } lpm; + struct { + struct delayed_work dw; + unsigned long interval; /* ms */ + } neighs_update; + struct delayed_work nexthop_probe_dw; +#define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */ + struct list_head nexthop_neighs_list; + struct list_head ipip_list; + bool aborted; + struct notifier_block fib_nb; + struct notifier_block netevent_nb; + struct notifier_block inetaddr_nb; + struct notifier_block inet6addr_nb; + const struct mlxsw_sp_rif_ops **rif_ops_arr; + const struct mlxsw_sp_ipip_ops **ipip_ops_arr; + u32 adj_discard_index; + bool adj_discard_index_valid; + struct mlxsw_sp_router_nve_decap nve_decap_config; + struct mutex lock; /* Protects shared router resources */ +}; + struct mlxsw_sp_rif_ipip_lb; struct mlxsw_sp_rif_ipip_lb_config { enum mlxsw_reg_ritr_loopback_ipip_type lb_ipipt; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c index 0cdd7954a085..9fb2e9d93929 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c @@ -3,6 +3,8 @@ #include <linux/if_bridge.h> #include <linux/list.h> +#include <linux/rtnetlink.h> +#include <linux/workqueue.h> #include <net/arp.h> #include <net/gre.h> #include <net/lag.h> @@ -14,38 +16,43 @@ #include "spectrum_span.h" #include "spectrum_switchdev.h" +struct mlxsw_sp_span { + struct work_struct work; + struct mlxsw_sp *mlxsw_sp; + atomic_t active_entries_count; + int entries_count; + struct mlxsw_sp_span_entry entries[0]; +}; + +static void mlxsw_sp_span_respin_work(struct work_struct *work); + static u64 mlxsw_sp_span_occ_get(void *priv) { const struct mlxsw_sp *mlxsw_sp = priv; - u64 occ = 0; - int i; - for (i = 0; i < mlxsw_sp->span.entries_count; i++) { - if (mlxsw_sp->span.entries[i].ref_count) - occ++; - } - - return occ; + return atomic_read(&mlxsw_sp->span->active_entries_count); } int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp) { struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); - int i; + struct mlxsw_sp_span *span; + int i, entries_count; if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_SPAN)) return -EIO; - mlxsw_sp->span.entries_count = MLXSW_CORE_RES_GET(mlxsw_sp->core, - MAX_SPAN); - mlxsw_sp->span.entries = kcalloc(mlxsw_sp->span.entries_count, - sizeof(struct mlxsw_sp_span_entry), - GFP_KERNEL); - if (!mlxsw_sp->span.entries) + entries_count = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_SPAN); + span = kzalloc(struct_size(span, entries, entries_count), GFP_KERNEL); + if (!span) return -ENOMEM; + span->entries_count = entries_count; + atomic_set(&span->active_entries_count, 0); + span->mlxsw_sp = mlxsw_sp; + mlxsw_sp->span = span; - for (i = 0; i < mlxsw_sp->span.entries_count; i++) { - struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i]; + for (i = 0; i < mlxsw_sp->span->entries_count; i++) { + struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span->entries[i]; INIT_LIST_HEAD(&curr->bound_ports_list); curr->id = i; @@ -53,6 +60,7 @@ int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp) devlink_resource_occ_get_register(devlink, MLXSW_SP_RESOURCE_SPAN, mlxsw_sp_span_occ_get, mlxsw_sp); + INIT_WORK(&span->work, mlxsw_sp_span_respin_work); return 0; } @@ -62,14 +70,15 @@ void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp) struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); int i; + cancel_work_sync(&mlxsw_sp->span->work); devlink_resource_occ_get_unregister(devlink, MLXSW_SP_RESOURCE_SPAN); - for (i = 0; i < mlxsw_sp->span.entries_count; i++) { - struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i]; + for (i = 0; i < mlxsw_sp->span->entries_count; i++) { + struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span->entries[i]; WARN_ON_ONCE(!list_empty(&curr->bound_ports_list)); } - kfree(mlxsw_sp->span.entries); + kfree(mlxsw_sp->span); } static int @@ -645,15 +654,16 @@ mlxsw_sp_span_entry_create(struct mlxsw_sp *mlxsw_sp, int i; /* find a free entry to use */ - for (i = 0; i < mlxsw_sp->span.entries_count; i++) { - if (!mlxsw_sp->span.entries[i].ref_count) { - span_entry = &mlxsw_sp->span.entries[i]; + for (i = 0; i < mlxsw_sp->span->entries_count; i++) { + if (!mlxsw_sp->span->entries[i].ref_count) { + span_entry = &mlxsw_sp->span->entries[i]; break; } } if (!span_entry) return NULL; + atomic_inc(&mlxsw_sp->span->active_entries_count); span_entry->ops = ops; span_entry->ref_count = 1; span_entry->to_dev = to_dev; @@ -662,9 +672,11 @@ mlxsw_sp_span_entry_create(struct mlxsw_sp *mlxsw_sp, return span_entry; } -static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp_span_entry *span_entry) +static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_span_entry *span_entry) { mlxsw_sp_span_entry_deconfigure(span_entry); + atomic_dec(&mlxsw_sp->span->active_entries_count); } struct mlxsw_sp_span_entry * @@ -673,8 +685,8 @@ mlxsw_sp_span_entry_find_by_port(struct mlxsw_sp *mlxsw_sp, { int i; - for (i = 0; i < mlxsw_sp->span.entries_count; i++) { - struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i]; + for (i = 0; i < mlxsw_sp->span->entries_count; i++) { + struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span->entries[i]; if (curr->ref_count && curr->to_dev == to_dev) return curr; @@ -694,8 +706,8 @@ mlxsw_sp_span_entry_find_by_id(struct mlxsw_sp *mlxsw_sp, int span_id) { int i; - for (i = 0; i < mlxsw_sp->span.entries_count; i++) { - struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i]; + for (i = 0; i < mlxsw_sp->span->entries_count; i++) { + struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span->entries[i]; if (curr->ref_count && curr->id == span_id) return curr; @@ -726,7 +738,7 @@ static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp, { WARN_ON(!span_entry->ref_count); if (--span_entry->ref_count == 0) - mlxsw_sp_span_entry_destroy(span_entry); + mlxsw_sp_span_entry_destroy(mlxsw_sp, span_entry); return 0; } @@ -736,8 +748,8 @@ static bool mlxsw_sp_span_is_egress_mirror(struct mlxsw_sp_port *port) struct mlxsw_sp_span_inspected_port *p; int i; - for (i = 0; i < mlxsw_sp->span.entries_count; i++) { - struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i]; + for (i = 0; i < mlxsw_sp->span->entries_count; i++) { + struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span->entries[i]; list_for_each_entry(p, &curr->bound_ports_list, list) if (p->local_port == port->local_port && @@ -842,9 +854,9 @@ mlxsw_sp_span_inspected_port_add(struct mlxsw_sp_port *port, * so if a binding is requested, check for conflicts. */ if (bind) - for (i = 0; i < mlxsw_sp->span.entries_count; i++) { + for (i = 0; i < mlxsw_sp->span->entries_count; i++) { struct mlxsw_sp_span_entry *curr = - &mlxsw_sp->span.entries[i]; + &mlxsw_sp->span->entries[i]; if (mlxsw_sp_span_entry_bound_port_find(curr, type, port, bind)) @@ -988,14 +1000,18 @@ void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, int span_id, mlxsw_sp_span_inspected_port_del(from, span_entry, type, bind); } -void mlxsw_sp_span_respin(struct mlxsw_sp *mlxsw_sp) +static void mlxsw_sp_span_respin_work(struct work_struct *work) { - int i; - int err; + struct mlxsw_sp_span *span; + struct mlxsw_sp *mlxsw_sp; + int i, err; - ASSERT_RTNL(); - for (i = 0; i < mlxsw_sp->span.entries_count; i++) { - struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i]; + span = container_of(work, struct mlxsw_sp_span, work); + mlxsw_sp = span->mlxsw_sp; + + rtnl_lock(); + for (i = 0; i < mlxsw_sp->span->entries_count; i++) { + struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span->entries[i]; struct mlxsw_sp_span_parms sparms = {NULL}; if (!curr->ref_count) @@ -1010,4 +1026,12 @@ void mlxsw_sp_span_respin(struct mlxsw_sp *mlxsw_sp) mlxsw_sp_span_entry_configure(mlxsw_sp, curr, sparms); } } + rtnl_unlock(); +} + +void mlxsw_sp_span_respin(struct mlxsw_sp *mlxsw_sp) +{ + if (atomic_read(&mlxsw_sp->span->active_entries_count) == 0) + return; + mlxsw_core_schedule_work(&mlxsw_sp->span->work); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index a3af171c6358..a26162b08b7d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -153,16 +153,64 @@ static void mlxsw_sp_bridge_device_rifs_destroy(struct mlxsw_sp *mlxsw_sp, mlxsw_sp); } +static int mlxsw_sp_bridge_device_vxlan_init(struct mlxsw_sp_bridge *bridge, + struct net_device *br_dev, + struct netlink_ext_ack *extack) +{ + struct net_device *dev, *stop_dev; + struct list_head *iter; + int err; + + netdev_for_each_lower_dev(br_dev, dev, iter) { + if (netif_is_vxlan(dev) && netif_running(dev)) { + err = mlxsw_sp_bridge_vxlan_join(bridge->mlxsw_sp, + br_dev, dev, 0, + extack); + if (err) { + stop_dev = dev; + goto err_vxlan_join; + } + } + } + + return 0; + +err_vxlan_join: + netdev_for_each_lower_dev(br_dev, dev, iter) { + if (netif_is_vxlan(dev) && netif_running(dev)) { + if (stop_dev == dev) + break; + mlxsw_sp_bridge_vxlan_leave(bridge->mlxsw_sp, dev); + } + } + return err; +} + +static void mlxsw_sp_bridge_device_vxlan_fini(struct mlxsw_sp_bridge *bridge, + struct net_device *br_dev) +{ + struct net_device *dev; + struct list_head *iter; + + netdev_for_each_lower_dev(br_dev, dev, iter) { + if (netif_is_vxlan(dev) && netif_running(dev)) + mlxsw_sp_bridge_vxlan_leave(bridge->mlxsw_sp, dev); + } +} + static struct mlxsw_sp_bridge_device * mlxsw_sp_bridge_device_create(struct mlxsw_sp_bridge *bridge, - struct net_device *br_dev) + struct net_device *br_dev, + struct netlink_ext_ack *extack) { struct device *dev = bridge->mlxsw_sp->bus_info->dev; struct mlxsw_sp_bridge_device *bridge_device; bool vlan_enabled = br_vlan_enabled(br_dev); + int err; if (vlan_enabled && bridge->vlan_enabled_exists) { dev_err(dev, "Only one VLAN-aware bridge is supported\n"); + NL_SET_ERR_MSG_MOD(extack, "Only one VLAN-aware bridge is supported"); return ERR_PTR(-EINVAL); } @@ -184,13 +232,29 @@ mlxsw_sp_bridge_device_create(struct mlxsw_sp_bridge *bridge, INIT_LIST_HEAD(&bridge_device->mids_list); list_add(&bridge_device->list, &bridge->bridges_list); + /* It is possible we already have VXLAN devices enslaved to the bridge. + * In which case, we need to replay their configuration as if they were + * just now enslaved to the bridge. + */ + err = mlxsw_sp_bridge_device_vxlan_init(bridge, br_dev, extack); + if (err) + goto err_vxlan_init; + return bridge_device; + +err_vxlan_init: + list_del(&bridge_device->list); + if (bridge_device->vlan_enabled) + bridge->vlan_enabled_exists = false; + kfree(bridge_device); + return ERR_PTR(err); } static void mlxsw_sp_bridge_device_destroy(struct mlxsw_sp_bridge *bridge, struct mlxsw_sp_bridge_device *bridge_device) { + mlxsw_sp_bridge_device_vxlan_fini(bridge, bridge_device->dev); mlxsw_sp_bridge_device_rifs_destroy(bridge->mlxsw_sp, bridge_device->dev); list_del(&bridge_device->list); @@ -203,7 +267,8 @@ mlxsw_sp_bridge_device_destroy(struct mlxsw_sp_bridge *bridge, static struct mlxsw_sp_bridge_device * mlxsw_sp_bridge_device_get(struct mlxsw_sp_bridge *bridge, - struct net_device *br_dev) + struct net_device *br_dev, + struct netlink_ext_ack *extack) { struct mlxsw_sp_bridge_device *bridge_device; @@ -211,7 +276,7 @@ mlxsw_sp_bridge_device_get(struct mlxsw_sp_bridge *bridge, if (bridge_device) return bridge_device; - return mlxsw_sp_bridge_device_create(bridge, br_dev); + return mlxsw_sp_bridge_device_create(bridge, br_dev, extack); } static void @@ -292,7 +357,8 @@ mlxsw_sp_bridge_port_destroy(struct mlxsw_sp_bridge_port *bridge_port) static struct mlxsw_sp_bridge_port * mlxsw_sp_bridge_port_get(struct mlxsw_sp_bridge *bridge, - struct net_device *brport_dev) + struct net_device *brport_dev, + struct netlink_ext_ack *extack) { struct net_device *br_dev = netdev_master_upper_dev_get(brport_dev); struct mlxsw_sp_bridge_device *bridge_device; @@ -305,7 +371,7 @@ mlxsw_sp_bridge_port_get(struct mlxsw_sp_bridge *bridge, return bridge_port; } - bridge_device = mlxsw_sp_bridge_device_get(bridge, br_dev); + bridge_device = mlxsw_sp_bridge_device_get(bridge, br_dev, extack); if (IS_ERR(bridge_device)) return ERR_CAST(bridge_device); @@ -1000,7 +1066,7 @@ mlxsw_sp_port_vlan_bridge_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan, &bridge_vlan->port_vlan_list); mlxsw_sp_bridge_port_get(mlxsw_sp_port->mlxsw_sp->bridge, - bridge_port->dev); + bridge_port->dev, extack); mlxsw_sp_port_vlan->bridge_port = bridge_port; return 0; @@ -1107,16 +1173,12 @@ mlxsw_sp_br_ban_rif_pvid_change(struct mlxsw_sp *mlxsw_sp, const struct net_device *br_dev, const struct switchdev_obj_port_vlan *vlan) { - struct mlxsw_sp_rif *rif; - struct mlxsw_sp_fid *fid; u16 pvid; u16 vid; - rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, br_dev); - if (!rif) + pvid = mlxsw_sp_rif_vid(mlxsw_sp, br_dev); + if (!pvid) return 0; - fid = mlxsw_sp_rif_fid(rif); - pvid = mlxsw_sp_fid_8021q_vid(fid); for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) { if (vlan->flags & BRIDGE_VLAN_INFO_PVID) { @@ -1712,36 +1774,6 @@ mlxsw_sp_port_mrouter_update_mdb(struct mlxsw_sp_port *mlxsw_sp_port, } } -struct mlxsw_sp_span_respin_work { - struct work_struct work; - struct mlxsw_sp *mlxsw_sp; -}; - -static void mlxsw_sp_span_respin_work(struct work_struct *work) -{ - struct mlxsw_sp_span_respin_work *respin_work = - container_of(work, struct mlxsw_sp_span_respin_work, work); - - rtnl_lock(); - mlxsw_sp_span_respin(respin_work->mlxsw_sp); - rtnl_unlock(); - kfree(respin_work); -} - -static void mlxsw_sp_span_respin_schedule(struct mlxsw_sp *mlxsw_sp) -{ - struct mlxsw_sp_span_respin_work *respin_work; - - respin_work = kzalloc(sizeof(*respin_work), GFP_ATOMIC); - if (!respin_work) - return; - - INIT_WORK(&respin_work->work, mlxsw_sp_span_respin_work); - respin_work->mlxsw_sp = mlxsw_sp; - - mlxsw_core_schedule_work(&respin_work->work); -} - static int mlxsw_sp_port_obj_add(struct net_device *dev, const struct switchdev_obj *obj, struct switchdev_trans *trans, @@ -1763,7 +1795,7 @@ static int mlxsw_sp_port_obj_add(struct net_device *dev, * call for later, so that the respin logic sees the * updated bridge state. */ - mlxsw_sp_span_respin_schedule(mlxsw_sp_port->mlxsw_sp); + mlxsw_sp_span_respin(mlxsw_sp_port->mlxsw_sp); } break; case SWITCHDEV_OBJ_ID_PORT_MDB: @@ -1916,7 +1948,7 @@ static int mlxsw_sp_port_obj_del(struct net_device *dev, break; } - mlxsw_sp_span_respin_schedule(mlxsw_sp_port->mlxsw_sp); + mlxsw_sp_span_respin(mlxsw_sp_port->mlxsw_sp); return err; } @@ -1990,12 +2022,11 @@ mlxsw_sp_bridge_8021q_vxlan_join(struct mlxsw_sp_bridge_device *bridge_device, return err; } - /* If no other port is member in the VLAN, then the FID does not exist. - * NVE will be enabled on the FID once a port joins the VLAN - */ - fid = mlxsw_sp_fid_8021q_lookup(mlxsw_sp, vid); - if (!fid) - return 0; + fid = mlxsw_sp_fid_8021q_get(mlxsw_sp, vid); + if (IS_ERR(fid)) { + NL_SET_ERR_MSG_MOD(extack, "Failed to create 802.1Q FID"); + return PTR_ERR(fid); + } if (mlxsw_sp_fid_vni_is_set(fid)) { NL_SET_ERR_MSG_MOD(extack, "VNI is already set on FID"); @@ -2007,11 +2038,6 @@ mlxsw_sp_bridge_8021q_vxlan_join(struct mlxsw_sp_bridge_device *bridge_device, if (err) goto err_nve_fid_enable; - /* The tunnel port does not hold a reference on the FID. Only - * local ports and the router port - */ - mlxsw_sp_fid_put(fid); - return 0; err_nve_fid_enable: @@ -2048,38 +2074,8 @@ mlxsw_sp_bridge_8021q_fid_get(struct mlxsw_sp_bridge_device *bridge_device, u16 vid, struct netlink_ext_ack *extack) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev); - struct net_device *vxlan_dev; - struct mlxsw_sp_fid *fid; - int err; - - fid = mlxsw_sp_fid_8021q_get(mlxsw_sp, vid); - if (IS_ERR(fid)) - return fid; - - if (mlxsw_sp_fid_vni_is_set(fid)) - return fid; - - /* Find the VxLAN device that has the specified VLAN configured as - * PVID and egress untagged. There can be at most one such device - */ - vxlan_dev = mlxsw_sp_bridge_8021q_vxlan_dev_find(bridge_device->dev, - vid); - if (!vxlan_dev) - return fid; - - if (!netif_running(vxlan_dev)) - return fid; - - err = mlxsw_sp_bridge_8021q_vxlan_join(bridge_device, vxlan_dev, vid, - extack); - if (err) - goto err_vxlan_join; - - return fid; -err_vxlan_join: - mlxsw_sp_fid_put(fid); - return ERR_PTR(err); + return mlxsw_sp_fid_8021q_get(mlxsw_sp, vid); } static struct mlxsw_sp_fid * @@ -2184,9 +2180,9 @@ mlxsw_sp_bridge_8021d_vxlan_join(struct mlxsw_sp_bridge_device *bridge_device, struct mlxsw_sp_fid *fid; int err; - fid = mlxsw_sp_fid_8021d_lookup(mlxsw_sp, bridge_device->dev->ifindex); - if (!fid) { - NL_SET_ERR_MSG_MOD(extack, "Did not find a corresponding FID"); + fid = mlxsw_sp_fid_8021d_get(mlxsw_sp, bridge_device->dev->ifindex); + if (IS_ERR(fid)) { + NL_SET_ERR_MSG_MOD(extack, "Failed to create 802.1D FID"); return -EINVAL; } @@ -2200,11 +2196,6 @@ mlxsw_sp_bridge_8021d_vxlan_join(struct mlxsw_sp_bridge_device *bridge_device, if (err) goto err_nve_fid_enable; - /* The tunnel port does not hold a reference on the FID. Only - * local ports and the router port - */ - mlxsw_sp_fid_put(fid); - return 0; err_nve_fid_enable: @@ -2218,34 +2209,8 @@ mlxsw_sp_bridge_8021d_fid_get(struct mlxsw_sp_bridge_device *bridge_device, u16 vid, struct netlink_ext_ack *extack) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev); - struct net_device *vxlan_dev; - struct mlxsw_sp_fid *fid; - int err; - fid = mlxsw_sp_fid_8021d_get(mlxsw_sp, bridge_device->dev->ifindex); - if (IS_ERR(fid)) - return fid; - - if (mlxsw_sp_fid_vni_is_set(fid)) - return fid; - - vxlan_dev = mlxsw_sp_bridge_vxlan_dev_find(bridge_device->dev); - if (!vxlan_dev) - return fid; - - if (!netif_running(vxlan_dev)) - return fid; - - err = mlxsw_sp_bridge_8021d_vxlan_join(bridge_device, vxlan_dev, 0, - extack); - if (err) - goto err_vxlan_join; - - return fid; - -err_vxlan_join: - mlxsw_sp_fid_put(fid); - return ERR_PTR(err); + return mlxsw_sp_fid_8021d_get(mlxsw_sp, bridge_device->dev->ifindex); } static struct mlxsw_sp_fid * @@ -2287,7 +2252,8 @@ int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_bridge_port *bridge_port; int err; - bridge_port = mlxsw_sp_bridge_port_get(mlxsw_sp->bridge, brport_dev); + bridge_port = mlxsw_sp_bridge_port_get(mlxsw_sp->bridge, brport_dev, + extack); if (IS_ERR(bridge_port)) return PTR_ERR(bridge_port); bridge_device = bridge_port->bridge_device; @@ -2351,21 +2317,11 @@ void mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp, return; mlxsw_sp_nve_fid_disable(mlxsw_sp, fid); + /* Drop both the reference we just took during lookup and the reference + * the VXLAN device took. + */ + mlxsw_sp_fid_put(fid); mlxsw_sp_fid_put(fid); -} - -struct mlxsw_sp_fid *mlxsw_sp_bridge_fid_get(struct mlxsw_sp *mlxsw_sp, - const struct net_device *br_dev, - u16 vid, - struct netlink_ext_ack *extack) -{ - struct mlxsw_sp_bridge_device *bridge_device; - - bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); - if (WARN_ON(!bridge_device)) - return ERR_PTR(-EINVAL); - - return bridge_device->ops->fid_get(bridge_device, vid, extack); } static void @@ -2718,19 +2674,24 @@ static void mlxsw_sp_fdb_notify_rec_process(struct mlxsw_sp *mlxsw_sp, } } -static void mlxsw_sp_fdb_notify_work_schedule(struct mlxsw_sp *mlxsw_sp) +static void mlxsw_sp_fdb_notify_work_schedule(struct mlxsw_sp *mlxsw_sp, + bool no_delay) { struct mlxsw_sp_bridge *bridge = mlxsw_sp->bridge; + unsigned int interval = no_delay ? 0 : bridge->fdb_notify.interval; mlxsw_core_schedule_dw(&bridge->fdb_notify.dw, - msecs_to_jiffies(bridge->fdb_notify.interval)); + msecs_to_jiffies(interval)); } +#define MLXSW_SP_FDB_SFN_QUERIES_PER_SESSION 10 + static void mlxsw_sp_fdb_notify_work(struct work_struct *work) { struct mlxsw_sp_bridge *bridge; struct mlxsw_sp *mlxsw_sp; char *sfn_pl; + int queries; u8 num_rec; int i; int err; @@ -2743,20 +2704,26 @@ static void mlxsw_sp_fdb_notify_work(struct work_struct *work) mlxsw_sp = bridge->mlxsw_sp; rtnl_lock(); - mlxsw_reg_sfn_pack(sfn_pl); - err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(sfn), sfn_pl); - if (err) { - dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to get FDB notifications\n"); - goto out; + queries = MLXSW_SP_FDB_SFN_QUERIES_PER_SESSION; + while (queries > 0) { + mlxsw_reg_sfn_pack(sfn_pl); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(sfn), sfn_pl); + if (err) { + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to get FDB notifications\n"); + goto out; + } + num_rec = mlxsw_reg_sfn_num_rec_get(sfn_pl); + for (i = 0; i < num_rec; i++) + mlxsw_sp_fdb_notify_rec_process(mlxsw_sp, sfn_pl, i); + if (num_rec != MLXSW_REG_SFN_REC_MAX_COUNT) + goto out; + queries--; } - num_rec = mlxsw_reg_sfn_num_rec_get(sfn_pl); - for (i = 0; i < num_rec; i++) - mlxsw_sp_fdb_notify_rec_process(mlxsw_sp, sfn_pl, i); out: rtnl_unlock(); kfree(sfn_pl); - mlxsw_sp_fdb_notify_work_schedule(mlxsw_sp); + mlxsw_sp_fdb_notify_work_schedule(mlxsw_sp, !queries); } struct mlxsw_sp_switchdev_event_work { @@ -3502,7 +3469,7 @@ static int mlxsw_sp_fdb_init(struct mlxsw_sp *mlxsw_sp) INIT_DELAYED_WORK(&bridge->fdb_notify.dw, mlxsw_sp_fdb_notify_work); bridge->fdb_notify.interval = MLXSW_SP_DEFAULT_LEARNING_INTERVAL; - mlxsw_sp_fdb_notify_work_schedule(mlxsw_sp); + mlxsw_sp_fdb_notify_work_schedule(mlxsw_sp, false); return 0; err_register_switchdev_blocking_notifier: diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c index 60205aa3f6a5..9096ffd89e50 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c @@ -1,13 +1,16 @@ // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 /* Copyright (c) 2019 Mellanox Technologies. All rights reserved */ +#include <linux/bitops.h> #include <linux/kernel.h> +#include <linux/netlink.h> #include <net/devlink.h> #include <uapi/linux/devlink.h> #include "core.h" #include "reg.h" #include "spectrum.h" +#include "spectrum_trap.h" /* All driver-specific traps must be documented in * Documentation/networking/devlink/mlxsw.rst @@ -25,36 +28,166 @@ enum { #define MLXSW_SP_TRAP_METADATA DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT +static int mlxsw_sp_rx_listener(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, + u8 local_port, + struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct mlxsw_sp_port_pcpu_stats *pcpu_stats; + + if (unlikely(!mlxsw_sp_port)) { + dev_warn_ratelimited(mlxsw_sp->bus_info->dev, "Port %d: skb received for non-existent port\n", + local_port); + kfree_skb(skb); + return -EINVAL; + } + + skb->dev = mlxsw_sp_port->dev; + + pcpu_stats = this_cpu_ptr(mlxsw_sp_port->pcpu_stats); + u64_stats_update_begin(&pcpu_stats->syncp); + pcpu_stats->rx_packets++; + pcpu_stats->rx_bytes += skb->len; + u64_stats_update_end(&pcpu_stats->syncp); + + skb->protocol = eth_type_trans(skb, skb->dev); + + return 0; +} + static void mlxsw_sp_rx_drop_listener(struct sk_buff *skb, u8 local_port, - void *priv); + void *trap_ctx) +{ + struct devlink_port *in_devlink_port; + struct mlxsw_sp_port *mlxsw_sp_port; + struct mlxsw_sp *mlxsw_sp; + struct devlink *devlink; + int err; + + mlxsw_sp = devlink_trap_ctx_priv(trap_ctx); + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + + err = mlxsw_sp_rx_listener(mlxsw_sp, skb, local_port, mlxsw_sp_port); + if (err) + return; + + devlink = priv_to_devlink(mlxsw_sp->core); + in_devlink_port = mlxsw_core_port_devlink_port_get(mlxsw_sp->core, + local_port); + skb_push(skb, ETH_HLEN); + devlink_trap_report(devlink, skb, trap_ctx, in_devlink_port, NULL); + consume_skb(skb); +} + +static void mlxsw_sp_rx_acl_drop_listener(struct sk_buff *skb, u8 local_port, + void *trap_ctx) +{ + u32 cookie_index = mlxsw_skb_cb(skb)->cookie_index; + const struct flow_action_cookie *fa_cookie; + struct devlink_port *in_devlink_port; + struct mlxsw_sp_port *mlxsw_sp_port; + struct mlxsw_sp *mlxsw_sp; + struct devlink *devlink; + int err; + + mlxsw_sp = devlink_trap_ctx_priv(trap_ctx); + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + + err = mlxsw_sp_rx_listener(mlxsw_sp, skb, local_port, mlxsw_sp_port); + if (err) + return; + + devlink = priv_to_devlink(mlxsw_sp->core); + in_devlink_port = mlxsw_core_port_devlink_port_get(mlxsw_sp->core, + local_port); + skb_push(skb, ETH_HLEN); + rcu_read_lock(); + fa_cookie = mlxsw_sp_acl_act_cookie_lookup(mlxsw_sp, cookie_index); + devlink_trap_report(devlink, skb, trap_ctx, in_devlink_port, fa_cookie); + rcu_read_unlock(); + consume_skb(skb); +} + static void mlxsw_sp_rx_exception_listener(struct sk_buff *skb, u8 local_port, - void *trap_ctx); + void *trap_ctx) +{ + struct devlink_port *in_devlink_port; + struct mlxsw_sp_port *mlxsw_sp_port; + struct mlxsw_sp *mlxsw_sp; + struct devlink *devlink; + int err; + + mlxsw_sp = devlink_trap_ctx_priv(trap_ctx); + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + + err = mlxsw_sp_rx_listener(mlxsw_sp, skb, local_port, mlxsw_sp_port); + if (err) + return; + + devlink = priv_to_devlink(mlxsw_sp->core); + in_devlink_port = mlxsw_core_port_devlink_port_get(mlxsw_sp->core, + local_port); + skb_push(skb, ETH_HLEN); + devlink_trap_report(devlink, skb, trap_ctx, in_devlink_port, NULL); + skb_pull(skb, ETH_HLEN); + skb->offload_fwd_mark = 1; + netif_receive_skb(skb); +} #define MLXSW_SP_TRAP_DROP(_id, _group_id) \ DEVLINK_TRAP_GENERIC(DROP, DROP, _id, \ - DEVLINK_TRAP_GROUP_GENERIC(_group_id), \ + DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id, \ MLXSW_SP_TRAP_METADATA) +#define MLXSW_SP_TRAP_DROP_EXT(_id, _group_id, _metadata) \ + DEVLINK_TRAP_GENERIC(DROP, DROP, _id, \ + DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id, \ + MLXSW_SP_TRAP_METADATA | (_metadata)) + #define MLXSW_SP_TRAP_DRIVER_DROP(_id, _group_id) \ DEVLINK_TRAP_DRIVER(DROP, DROP, DEVLINK_MLXSW_TRAP_ID_##_id, \ DEVLINK_MLXSW_TRAP_NAME_##_id, \ - DEVLINK_TRAP_GROUP_GENERIC(_group_id), \ + DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id, \ MLXSW_SP_TRAP_METADATA) #define MLXSW_SP_TRAP_EXCEPTION(_id, _group_id) \ DEVLINK_TRAP_GENERIC(EXCEPTION, TRAP, _id, \ - DEVLINK_TRAP_GROUP_GENERIC(_group_id), \ + DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id, \ MLXSW_SP_TRAP_METADATA) #define MLXSW_SP_RXL_DISCARD(_id, _group_id) \ - MLXSW_RXL(mlxsw_sp_rx_drop_listener, DISCARD_##_id, SET_FW_DEFAULT, \ - false, SP_##_group_id, DISCARD) + MLXSW_RXL_DIS(mlxsw_sp_rx_drop_listener, DISCARD_##_id, \ + TRAP_EXCEPTION_TO_CPU, false, SP_##_group_id, \ + SET_FW_DEFAULT, SP_##_group_id) + +#define MLXSW_SP_RXL_ACL_DISCARD(_id, _en_group_id, _dis_group_id) \ + MLXSW_RXL_DIS(mlxsw_sp_rx_acl_drop_listener, DISCARD_##_id, \ + TRAP_EXCEPTION_TO_CPU, false, SP_##_en_group_id, \ + SET_FW_DEFAULT, SP_##_dis_group_id) #define MLXSW_SP_RXL_EXCEPTION(_id, _group_id, _action) \ MLXSW_RXL(mlxsw_sp_rx_exception_listener, _id, \ - _action, false, SP_##_group_id, DISCARD) + _action, false, SP_##_group_id, SET_FW_DEFAULT) + +#define MLXSW_SP_TRAP_POLICER(_id, _rate, _burst) \ + DEVLINK_TRAP_POLICER(_id, _rate, _burst, \ + MLXSW_REG_QPCR_HIGHEST_CIR, \ + MLXSW_REG_QPCR_LOWEST_CIR, \ + 1 << MLXSW_REG_QPCR_HIGHEST_CBS, \ + 1 << MLXSW_REG_QPCR_LOWEST_CBS) + +/* Ordered by policer identifier */ +static const struct devlink_trap_policer mlxsw_sp_trap_policers_arr[] = { + MLXSW_SP_TRAP_POLICER(1, 10 * 1024, 128), +}; -static struct devlink_trap mlxsw_sp_traps_arr[] = { +static const struct devlink_trap_group mlxsw_sp_trap_groups_arr[] = { + DEVLINK_TRAP_GROUP_GENERIC(L2_DROPS, 1), + DEVLINK_TRAP_GROUP_GENERIC(L3_DROPS, 1), + DEVLINK_TRAP_GROUP_GENERIC(TUNNEL_DROPS, 1), + DEVLINK_TRAP_GROUP_GENERIC(ACL_DROPS, 1), +}; + +static const struct devlink_trap mlxsw_sp_traps_arr[] = { MLXSW_SP_TRAP_DROP(SMAC_MC, L2_DROPS), MLXSW_SP_TRAP_DROP(VLAN_TAG_MISMATCH, L2_DROPS), MLXSW_SP_TRAP_DROP(INGRESS_VLAN_FILTER, L2_DROPS), @@ -83,9 +216,13 @@ static struct devlink_trap mlxsw_sp_traps_arr[] = { MLXSW_SP_TRAP_DROP(NON_ROUTABLE, L3_DROPS), MLXSW_SP_TRAP_EXCEPTION(DECAP_ERROR, TUNNEL_DROPS), MLXSW_SP_TRAP_DROP(OVERLAY_SMAC_MC, TUNNEL_DROPS), + MLXSW_SP_TRAP_DROP_EXT(INGRESS_FLOW_ACTION_DROP, ACL_DROPS, + DEVLINK_TRAP_METADATA_TYPE_F_FA_COOKIE), + MLXSW_SP_TRAP_DROP_EXT(EGRESS_FLOW_ACTION_DROP, ACL_DROPS, + DEVLINK_TRAP_METADATA_TYPE_F_FA_COOKIE), }; -static struct mlxsw_listener mlxsw_sp_listeners_arr[] = { +static const struct mlxsw_listener mlxsw_sp_listeners_arr[] = { MLXSW_SP_RXL_DISCARD(ING_PACKET_SMAC_MC, L2_DISCARDS), MLXSW_SP_RXL_DISCARD(ING_SWITCH_VTAG_ALLOW, L2_DISCARDS), MLXSW_SP_RXL_DISCARD(ING_SWITCH_VLAN, L2_DISCARDS), @@ -103,34 +240,37 @@ static struct mlxsw_listener mlxsw_sp_listeners_arr[] = { MLXSW_SP_RXL_DISCARD(ING_ROUTER_IPV4_SIP_BC, L3_DISCARDS), MLXSW_SP_RXL_DISCARD(IPV6_MC_DIP_RESERVED_SCOPE, L3_DISCARDS), MLXSW_SP_RXL_DISCARD(IPV6_MC_DIP_INTERFACE_LOCAL_SCOPE, L3_DISCARDS), - MLXSW_SP_RXL_EXCEPTION(MTUERROR, ROUTER_EXP, TRAP_TO_CPU), - MLXSW_SP_RXL_EXCEPTION(TTLERROR, ROUTER_EXP, TRAP_TO_CPU), - MLXSW_SP_RXL_EXCEPTION(RPF, RPF, TRAP_TO_CPU), - MLXSW_SP_RXL_EXCEPTION(RTR_INGRESS1, REMOTE_ROUTE, TRAP_TO_CPU), - MLXSW_SP_RXL_EXCEPTION(HOST_MISS_IPV4, HOST_MISS, TRAP_TO_CPU), - MLXSW_SP_RXL_EXCEPTION(HOST_MISS_IPV6, HOST_MISS, TRAP_TO_CPU), - MLXSW_SP_RXL_EXCEPTION(DISCARD_ROUTER3, REMOTE_ROUTE, + MLXSW_SP_RXL_EXCEPTION(MTUERROR, L3_DISCARDS, TRAP_TO_CPU), + MLXSW_SP_RXL_EXCEPTION(TTLERROR, L3_DISCARDS, TRAP_TO_CPU), + MLXSW_SP_RXL_EXCEPTION(RPF, L3_DISCARDS, TRAP_TO_CPU), + MLXSW_SP_RXL_EXCEPTION(RTR_INGRESS1, L3_DISCARDS, TRAP_TO_CPU), + MLXSW_SP_RXL_EXCEPTION(HOST_MISS_IPV4, L3_DISCARDS, TRAP_TO_CPU), + MLXSW_SP_RXL_EXCEPTION(HOST_MISS_IPV6, L3_DISCARDS, TRAP_TO_CPU), + MLXSW_SP_RXL_EXCEPTION(DISCARD_ROUTER3, L3_DISCARDS, TRAP_EXCEPTION_TO_CPU), - MLXSW_SP_RXL_EXCEPTION(DISCARD_ROUTER_LPM4, ROUTER_EXP, + MLXSW_SP_RXL_EXCEPTION(DISCARD_ROUTER_LPM4, L3_DISCARDS, TRAP_EXCEPTION_TO_CPU), - MLXSW_SP_RXL_EXCEPTION(DISCARD_ROUTER_LPM6, ROUTER_EXP, + MLXSW_SP_RXL_EXCEPTION(DISCARD_ROUTER_LPM6, L3_DISCARDS, TRAP_EXCEPTION_TO_CPU), MLXSW_SP_RXL_DISCARD(ROUTER_IRIF_EN, L3_DISCARDS), MLXSW_SP_RXL_DISCARD(ROUTER_ERIF_EN, L3_DISCARDS), MLXSW_SP_RXL_DISCARD(NON_ROUTABLE, L3_DISCARDS), - MLXSW_SP_RXL_EXCEPTION(DECAP_ECN0, ROUTER_EXP, TRAP_EXCEPTION_TO_CPU), - MLXSW_SP_RXL_EXCEPTION(IPIP_DECAP_ERROR, ROUTER_EXP, + MLXSW_SP_RXL_EXCEPTION(DECAP_ECN0, TUNNEL_DISCARDS, + TRAP_EXCEPTION_TO_CPU), + MLXSW_SP_RXL_EXCEPTION(IPIP_DECAP_ERROR, TUNNEL_DISCARDS, TRAP_EXCEPTION_TO_CPU), MLXSW_SP_RXL_EXCEPTION(DISCARD_DEC_PKT, TUNNEL_DISCARDS, TRAP_EXCEPTION_TO_CPU), MLXSW_SP_RXL_DISCARD(OVERLAY_SMAC_MC, TUNNEL_DISCARDS), + MLXSW_SP_RXL_ACL_DISCARD(INGRESS_ACL, ACL_DISCARDS, DUMMY), + MLXSW_SP_RXL_ACL_DISCARD(EGRESS_ACL, ACL_DISCARDS, DUMMY), }; /* Mapping between hardware trap and devlink trap. Multiple hardware traps can * be mapped to the same devlink trap. Order is according to * 'mlxsw_sp_listeners_arr'. */ -static u16 mlxsw_sp_listener_devlink_map[] = { +static const u16 mlxsw_sp_listener_devlink_map[] = { DEVLINK_TRAP_GENERIC_ID_SMAC_MC, DEVLINK_TRAP_GENERIC_ID_VLAN_TAG_MISMATCH, DEVLINK_TRAP_GENERIC_ID_INGRESS_VLAN_FILTER, @@ -164,99 +304,168 @@ static u16 mlxsw_sp_listener_devlink_map[] = { DEVLINK_TRAP_GENERIC_ID_DECAP_ERROR, DEVLINK_TRAP_GENERIC_ID_DECAP_ERROR, DEVLINK_TRAP_GENERIC_ID_OVERLAY_SMAC_MC, + DEVLINK_TRAP_GENERIC_ID_INGRESS_FLOW_ACTION_DROP, + DEVLINK_TRAP_GENERIC_ID_EGRESS_FLOW_ACTION_DROP, }; -static int mlxsw_sp_rx_listener(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, - u8 local_port, - struct mlxsw_sp_port *mlxsw_sp_port) +#define MLXSW_SP_THIN_POLICER_ID (MLXSW_REG_HTGT_TRAP_GROUP_MAX + 1) + +static struct mlxsw_sp_trap_policer_item * +mlxsw_sp_trap_policer_item_lookup(struct mlxsw_sp *mlxsw_sp, u32 id) { - struct mlxsw_sp_port_pcpu_stats *pcpu_stats; + struct mlxsw_sp_trap_policer_item *policer_item; + struct mlxsw_sp_trap *trap = mlxsw_sp->trap; - if (unlikely(!mlxsw_sp_port)) { - dev_warn_ratelimited(mlxsw_sp->bus_info->dev, "Port %d: skb received for non-existent port\n", - local_port); - kfree_skb(skb); - return -EINVAL; + list_for_each_entry(policer_item, &trap->policer_item_list, list) { + if (policer_item->id == id) + return policer_item; } - skb->dev = mlxsw_sp_port->dev; + return NULL; +} - pcpu_stats = this_cpu_ptr(mlxsw_sp_port->pcpu_stats); - u64_stats_update_begin(&pcpu_stats->syncp); - pcpu_stats->rx_packets++; - pcpu_stats->rx_bytes += skb->len; - u64_stats_update_end(&pcpu_stats->syncp); +static int mlxsw_sp_trap_cpu_policers_set(struct mlxsw_sp *mlxsw_sp) +{ + char qpcr_pl[MLXSW_REG_QPCR_LEN]; - skb->protocol = eth_type_trans(skb, skb->dev); + /* The purpose of "thin" policer is to drop as many packets + * as possible. The dummy group is using it. + */ + __set_bit(MLXSW_SP_THIN_POLICER_ID, mlxsw_sp->trap->policers_usage); + mlxsw_reg_qpcr_pack(qpcr_pl, MLXSW_SP_THIN_POLICER_ID, + MLXSW_REG_QPCR_IR_UNITS_M, false, 1, 4); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qpcr), qpcr_pl); +} - return 0; +static int mlxsw_sp_trap_dummy_group_init(struct mlxsw_sp *mlxsw_sp) +{ + char htgt_pl[MLXSW_REG_HTGT_LEN]; + + mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_SP_DUMMY, + MLXSW_SP_THIN_POLICER_ID, 0, 1); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(htgt), htgt_pl); } -static void mlxsw_sp_rx_drop_listener(struct sk_buff *skb, u8 local_port, - void *trap_ctx) +static int mlxsw_sp_trap_policers_init(struct mlxsw_sp *mlxsw_sp) { - struct devlink_port *in_devlink_port; - struct mlxsw_sp_port *mlxsw_sp_port; - struct mlxsw_sp *mlxsw_sp; - struct devlink *devlink; + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + struct mlxsw_sp_trap *trap = mlxsw_sp->trap; + u64 free_policers = 0; + u32 last_id = 0; + int err, i; - mlxsw_sp = devlink_trap_ctx_priv(trap_ctx); - mlxsw_sp_port = mlxsw_sp->ports[local_port]; + for_each_clear_bit(i, trap->policers_usage, trap->max_policers) + free_policers++; - if (mlxsw_sp_rx_listener(mlxsw_sp, skb, local_port, mlxsw_sp_port)) - return; + if (ARRAY_SIZE(mlxsw_sp_trap_policers_arr) > free_policers) { + dev_err(mlxsw_sp->bus_info->dev, "Exceeded number of supported packet trap policers\n"); + return -ENOBUFS; + } - devlink = priv_to_devlink(mlxsw_sp->core); - in_devlink_port = mlxsw_core_port_devlink_port_get(mlxsw_sp->core, - local_port); - skb_push(skb, ETH_HLEN); - devlink_trap_report(devlink, skb, trap_ctx, in_devlink_port); - consume_skb(skb); -} + trap->policers_arr = kcalloc(free_policers, + sizeof(struct devlink_trap_policer), + GFP_KERNEL); + if (!trap->policers_arr) + return -ENOMEM; + + trap->policers_count = free_policers; + + for (i = 0; i < free_policers; i++) { + const struct devlink_trap_policer *policer; + + if (i < ARRAY_SIZE(mlxsw_sp_trap_policers_arr)) { + policer = &mlxsw_sp_trap_policers_arr[i]; + trap->policers_arr[i] = *policer; + last_id = policer->id; + } else { + /* Use parameters set for first policer and override + * relevant ones. + */ + policer = &mlxsw_sp_trap_policers_arr[0]; + trap->policers_arr[i] = *policer; + trap->policers_arr[i].id = ++last_id; + trap->policers_arr[i].init_rate = 1; + trap->policers_arr[i].init_burst = 16; + } + } -static void mlxsw_sp_rx_exception_listener(struct sk_buff *skb, u8 local_port, - void *trap_ctx) -{ - struct devlink_port *in_devlink_port; - struct mlxsw_sp_port *mlxsw_sp_port; - struct mlxsw_sp *mlxsw_sp; - struct devlink *devlink; + INIT_LIST_HEAD(&trap->policer_item_list); - mlxsw_sp = devlink_trap_ctx_priv(trap_ctx); - mlxsw_sp_port = mlxsw_sp->ports[local_port]; + err = devlink_trap_policers_register(devlink, trap->policers_arr, + trap->policers_count); + if (err) + goto err_trap_policers_register; - if (mlxsw_sp_rx_listener(mlxsw_sp, skb, local_port, mlxsw_sp_port)) - return; + return 0; - devlink = priv_to_devlink(mlxsw_sp->core); - in_devlink_port = mlxsw_core_port_devlink_port_get(mlxsw_sp->core, - local_port); - skb_push(skb, ETH_HLEN); - devlink_trap_report(devlink, skb, trap_ctx, in_devlink_port); - skb_pull(skb, ETH_HLEN); - skb->offload_fwd_mark = 1; - netif_receive_skb(skb); +err_trap_policers_register: + kfree(trap->policers_arr); + return err; +} + +static void mlxsw_sp_trap_policers_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + struct mlxsw_sp_trap *trap = mlxsw_sp->trap; + + devlink_trap_policers_unregister(devlink, trap->policers_arr, + trap->policers_count); + WARN_ON(!list_empty(&trap->policer_item_list)); + kfree(trap->policers_arr); } int mlxsw_sp_devlink_traps_init(struct mlxsw_sp *mlxsw_sp) { + size_t groups_count = ARRAY_SIZE(mlxsw_sp_trap_groups_arr); struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + int err; + + err = mlxsw_sp_trap_cpu_policers_set(mlxsw_sp); + if (err) + return err; + + err = mlxsw_sp_trap_dummy_group_init(mlxsw_sp); + if (err) + return err; if (WARN_ON(ARRAY_SIZE(mlxsw_sp_listener_devlink_map) != ARRAY_SIZE(mlxsw_sp_listeners_arr))) return -EINVAL; - return devlink_traps_register(devlink, mlxsw_sp_traps_arr, - ARRAY_SIZE(mlxsw_sp_traps_arr), - mlxsw_sp); + err = mlxsw_sp_trap_policers_init(mlxsw_sp); + if (err) + return err; + + err = devlink_trap_groups_register(devlink, mlxsw_sp_trap_groups_arr, + groups_count); + if (err) + goto err_trap_groups_register; + + err = devlink_traps_register(devlink, mlxsw_sp_traps_arr, + ARRAY_SIZE(mlxsw_sp_traps_arr), mlxsw_sp); + if (err) + goto err_traps_register; + + return 0; + +err_traps_register: + devlink_trap_groups_unregister(devlink, mlxsw_sp_trap_groups_arr, + groups_count); +err_trap_groups_register: + mlxsw_sp_trap_policers_fini(mlxsw_sp); + return err; } void mlxsw_sp_devlink_traps_fini(struct mlxsw_sp *mlxsw_sp) { + size_t groups_count = ARRAY_SIZE(mlxsw_sp_trap_groups_arr); struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); devlink_traps_unregister(devlink, mlxsw_sp_traps_arr, ARRAY_SIZE(mlxsw_sp_traps_arr)); + devlink_trap_groups_unregister(devlink, mlxsw_sp_trap_groups_arr, + groups_count); + mlxsw_sp_trap_policers_fini(mlxsw_sp); } int mlxsw_sp_trap_init(struct mlxsw_core *mlxsw_core, @@ -265,7 +474,7 @@ int mlxsw_sp_trap_init(struct mlxsw_core *mlxsw_core, int i; for (i = 0; i < ARRAY_SIZE(mlxsw_sp_listener_devlink_map); i++) { - struct mlxsw_listener *listener; + const struct mlxsw_listener *listener; int err; if (mlxsw_sp_listener_devlink_map[i] != trap->id) @@ -286,7 +495,7 @@ void mlxsw_sp_trap_fini(struct mlxsw_core *mlxsw_core, int i; for (i = 0; i < ARRAY_SIZE(mlxsw_sp_listener_devlink_map); i++) { - struct mlxsw_listener *listener; + const struct mlxsw_listener *listener; if (mlxsw_sp_listener_devlink_map[i] != trap->id) continue; @@ -303,27 +512,24 @@ int mlxsw_sp_trap_action_set(struct mlxsw_core *mlxsw_core, int i; for (i = 0; i < ARRAY_SIZE(mlxsw_sp_listener_devlink_map); i++) { - enum mlxsw_reg_hpkt_action hw_action; - struct mlxsw_listener *listener; + const struct mlxsw_listener *listener; + bool enabled; int err; if (mlxsw_sp_listener_devlink_map[i] != trap->id) continue; listener = &mlxsw_sp_listeners_arr[i]; - switch (action) { case DEVLINK_TRAP_ACTION_DROP: - hw_action = MLXSW_REG_HPKT_ACTION_SET_FW_DEFAULT; + enabled = false; break; case DEVLINK_TRAP_ACTION_TRAP: - hw_action = MLXSW_REG_HPKT_ACTION_TRAP_EXCEPTION_TO_CPU; + enabled = true; break; default: return -EINVAL; } - - err = mlxsw_core_trap_action_set(mlxsw_core, listener, - hw_action); + err = mlxsw_core_trap_state_set(mlxsw_core, listener, enabled); if (err) return err; } @@ -331,62 +537,34 @@ int mlxsw_sp_trap_action_set(struct mlxsw_core *mlxsw_core, return 0; } -#define MLXSW_SP_DISCARD_POLICER_ID (MLXSW_REG_HTGT_TRAP_GROUP_MAX + 1) - -static int -mlxsw_sp_trap_group_policer_init(struct mlxsw_sp *mlxsw_sp, - const struct devlink_trap_group *group) -{ - enum mlxsw_reg_qpcr_ir_units ir_units; - char qpcr_pl[MLXSW_REG_QPCR_LEN]; - u16 policer_id; - u8 burst_size; - bool is_bytes; - u32 rate; - - switch (group->id) { - case DEVLINK_TRAP_GROUP_GENERIC_ID_L2_DROPS: /* fall through */ - case DEVLINK_TRAP_GROUP_GENERIC_ID_L3_DROPS: /* fall through */ - case DEVLINK_TRAP_GROUP_GENERIC_ID_TUNNEL_DROPS: - policer_id = MLXSW_SP_DISCARD_POLICER_ID; - ir_units = MLXSW_REG_QPCR_IR_UNITS_M; - is_bytes = false; - rate = 10 * 1024; /* 10Kpps */ - burst_size = 7; - break; - default: - return -EINVAL; - } - - mlxsw_reg_qpcr_pack(qpcr_pl, policer_id, ir_units, is_bytes, rate, - burst_size); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qpcr), qpcr_pl); -} - static int -__mlxsw_sp_trap_group_init(struct mlxsw_sp *mlxsw_sp, - const struct devlink_trap_group *group) +__mlxsw_sp_trap_group_init(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_group *group, + u32 policer_id) { + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + u16 hw_policer_id = MLXSW_REG_HTGT_INVALID_POLICER; char htgt_pl[MLXSW_REG_HTGT_LEN]; u8 priority, tc, group_id; - u16 policer_id; switch (group->id) { case DEVLINK_TRAP_GROUP_GENERIC_ID_L2_DROPS: group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_L2_DISCARDS; - policer_id = MLXSW_SP_DISCARD_POLICER_ID; priority = 0; tc = 1; break; case DEVLINK_TRAP_GROUP_GENERIC_ID_L3_DROPS: group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_L3_DISCARDS; - policer_id = MLXSW_SP_DISCARD_POLICER_ID; priority = 0; tc = 1; break; case DEVLINK_TRAP_GROUP_GENERIC_ID_TUNNEL_DROPS: group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_TUNNEL_DISCARDS; - policer_id = MLXSW_SP_DISCARD_POLICER_ID; + priority = 0; + tc = 1; + break; + case DEVLINK_TRAP_GROUP_GENERIC_ID_ACL_DROPS: + group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_ACL_DISCARDS; priority = 0; tc = 1; break; @@ -394,23 +572,179 @@ __mlxsw_sp_trap_group_init(struct mlxsw_sp *mlxsw_sp, return -EINVAL; } - mlxsw_reg_htgt_pack(htgt_pl, group_id, policer_id, priority, tc); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(htgt), htgt_pl); + if (policer_id) { + struct mlxsw_sp_trap_policer_item *policer_item; + + policer_item = mlxsw_sp_trap_policer_item_lookup(mlxsw_sp, + policer_id); + if (WARN_ON(!policer_item)) + return -EINVAL; + hw_policer_id = policer_item->hw_id; + } + + mlxsw_reg_htgt_pack(htgt_pl, group_id, hw_policer_id, priority, tc); + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl); } int mlxsw_sp_trap_group_init(struct mlxsw_core *mlxsw_core, const struct devlink_trap_group *group) { - struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + return __mlxsw_sp_trap_group_init(mlxsw_core, group, + group->init_policer_id); +} + +int mlxsw_sp_trap_group_set(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_group *group, + const struct devlink_trap_policer *policer) +{ + u32 policer_id = policer ? policer->id : 0; + + return __mlxsw_sp_trap_group_init(mlxsw_core, group, policer_id); +} + +static struct mlxsw_sp_trap_policer_item * +mlxsw_sp_trap_policer_item_init(struct mlxsw_sp *mlxsw_sp, u32 id) +{ + struct mlxsw_sp_trap_policer_item *policer_item; + struct mlxsw_sp_trap *trap = mlxsw_sp->trap; + u16 hw_id; + + /* We should be able to allocate a policer because the number of + * policers we registered with devlink is in according with the number + * of available policers. + */ + hw_id = find_first_zero_bit(trap->policers_usage, trap->max_policers); + if (WARN_ON(hw_id == trap->max_policers)) + return ERR_PTR(-ENOBUFS); + + policer_item = kzalloc(sizeof(*policer_item), GFP_KERNEL); + if (!policer_item) + return ERR_PTR(-ENOMEM); + + __set_bit(hw_id, trap->policers_usage); + policer_item->hw_id = hw_id; + policer_item->id = id; + list_add_tail(&policer_item->list, &trap->policer_item_list); + + return policer_item; +} + +static void +mlxsw_sp_trap_policer_item_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_trap_policer_item *policer_item) +{ + list_del(&policer_item->list); + __clear_bit(policer_item->hw_id, mlxsw_sp->trap->policers_usage); + kfree(policer_item); +} + +static int mlxsw_sp_trap_policer_bs(u64 burst, u8 *p_burst_size, + struct netlink_ext_ack *extack) +{ + int bs = fls64(burst) - 1; + + if (burst != (1 << bs)) { + NL_SET_ERR_MSG_MOD(extack, "Policer burst size is not power of two"); + return -EINVAL; + } + + *p_burst_size = bs; + + return 0; +} + +static int __mlxsw_sp_trap_policer_set(struct mlxsw_sp *mlxsw_sp, u16 hw_id, + u64 rate, u64 burst, bool clear_counter, + struct netlink_ext_ack *extack) +{ + char qpcr_pl[MLXSW_REG_QPCR_LEN]; + u8 burst_size; int err; - err = mlxsw_sp_trap_group_policer_init(mlxsw_sp, group); + err = mlxsw_sp_trap_policer_bs(burst, &burst_size, extack); if (err) return err; - err = __mlxsw_sp_trap_group_init(mlxsw_sp, group); + mlxsw_reg_qpcr_pack(qpcr_pl, hw_id, MLXSW_REG_QPCR_IR_UNITS_M, false, + rate, burst_size); + mlxsw_reg_qpcr_clear_counter_set(qpcr_pl, clear_counter); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qpcr), qpcr_pl); +} + +int mlxsw_sp_trap_policer_init(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_policer *policer) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + struct mlxsw_sp_trap_policer_item *policer_item; + int err; + + policer_item = mlxsw_sp_trap_policer_item_init(mlxsw_sp, policer->id); + if (IS_ERR(policer_item)) + return PTR_ERR(policer_item); + + err = __mlxsw_sp_trap_policer_set(mlxsw_sp, policer_item->hw_id, + policer->init_rate, + policer->init_burst, true, NULL); + if (err) + goto err_trap_policer_set; + + return 0; + +err_trap_policer_set: + mlxsw_sp_trap_policer_item_fini(mlxsw_sp, policer_item); + return err; +} + +void mlxsw_sp_trap_policer_fini(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_policer *policer) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + struct mlxsw_sp_trap_policer_item *policer_item; + + policer_item = mlxsw_sp_trap_policer_item_lookup(mlxsw_sp, policer->id); + if (WARN_ON(!policer_item)) + return; + + mlxsw_sp_trap_policer_item_fini(mlxsw_sp, policer_item); +} + +int mlxsw_sp_trap_policer_set(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_policer *policer, + u64 rate, u64 burst, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + struct mlxsw_sp_trap_policer_item *policer_item; + + policer_item = mlxsw_sp_trap_policer_item_lookup(mlxsw_sp, policer->id); + if (WARN_ON(!policer_item)) + return -EINVAL; + + return __mlxsw_sp_trap_policer_set(mlxsw_sp, policer_item->hw_id, + rate, burst, false, extack); +} + +int +mlxsw_sp_trap_policer_counter_get(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_policer *policer, + u64 *p_drops) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + struct mlxsw_sp_trap_policer_item *policer_item; + char qpcr_pl[MLXSW_REG_QPCR_LEN]; + int err; + + policer_item = mlxsw_sp_trap_policer_item_lookup(mlxsw_sp, policer->id); + if (WARN_ON(!policer_item)) + return -EINVAL; + + mlxsw_reg_qpcr_pack(qpcr_pl, policer_item->hw_id, + MLXSW_REG_QPCR_IR_UNITS_M, false, 0, 0); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(qpcr), qpcr_pl); if (err) return err; + *p_drops = mlxsw_reg_qpcr_violate_count_get(qpcr_pl); + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.h new file mode 100644 index 000000000000..8c54897ba173 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2020 Mellanox Technologies. All rights reserved */ + +#ifndef _MLXSW_SPECTRUM_TRAP_H +#define _MLXSW_SPECTRUM_TRAP_H + +#include <linux/list.h> +#include <net/devlink.h> + +struct mlxsw_sp_trap { + struct devlink_trap_policer *policers_arr; /* Registered policers */ + u64 policers_count; /* Number of registered policers */ + struct list_head policer_item_list; + u64 max_policers; + unsigned long policers_usage[]; /* Usage bitmap */ +}; + +struct mlxsw_sp_trap_policer_item { + u16 hw_id; + u32 id; + struct list_head list; /* Member of policer_item_list */ +}; + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index f0e98ec8f1ee..90535820b559 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -180,7 +180,7 @@ static int mlxsw_sx_port_oper_status_get(struct mlxsw_sx_port *mlxsw_sx_port, if (err) return err; oper_status = mlxsw_reg_paos_oper_status_get(paos_pl); - *p_is_up = oper_status == MLXSW_PORT_ADMIN_STATUS_UP ? true : false; + *p_is_up = oper_status == MLXSW_PORT_ADMIN_STATUS_UP; return 0; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h index 12e1fa998d42..eaa521b7561b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/trap.h +++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h @@ -102,6 +102,8 @@ enum { MLXSW_TRAP_ID_ACL1 = 0x1C1, /* Multicast trap used for routes with trap-and-forward action */ MLXSW_TRAP_ID_ACL2 = 0x1C2, + MLXSW_TRAP_ID_DISCARD_INGRESS_ACL = 0x1C3, + MLXSW_TRAP_ID_DISCARD_EGRESS_ACL = 0x1C4, MLXSW_TRAP_ID_MAX = 0x1FF }; |