diff options
20 files changed, 610 insertions, 177 deletions
diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/switchdev.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/switchdev.rst index 01deedb71597..db62187eebce 100644 --- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/switchdev.rst +++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/switchdev.rst @@ -45,6 +45,26 @@ Following bridge VLAN functions are supported by mlx5: Subfunction =========== +Subfunction which are spawned over the E-switch are created only with devlink +device, and by default all the SF auxiliary devices are disabled. +This will allow user to configure the SF before the SF have been fully probed, +which will save time. + +Usage example: +Create SF: +$ devlink port add pci/0000:08:00.0 flavour pcisf pfnum 0 sfnum 11 +$ devlink port function set pci/0000:08:00.0/32768 \ + hw_addr 00:00:00:00:00:11 state active + +Enable ETH auxiliary device: +$ devlink dev param set auxiliary/mlx5_core.sf.1 \ + name enable_eth value true cmode driverinit + +Now, in order to fully probe the SF, use devlink reload: +$ devlink dev reload auxiliary/mlx5_core.sf.1 + +mlx5 supports ETH,rdma and vdpa (vnet) auxiliary devices devlink params (see :ref:`Documentation/networking/devlink/devlink-params.rst`) + mlx5 supports subfunction management using devlink port (see :ref:`Documentation/networking/devlink/devlink-port.rst <devlink_port>`) interface. A subfunction has its own function capabilities and its own resources. This diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c index bb95b40d25eb..fc13b41cc9b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c @@ -246,6 +246,7 @@ void mlx5_pages_debugfs_init(struct mlx5_core_dev *dev) debugfs_create_u32("fw_pages_total", 0400, pages, &dev->priv.fw_pages); debugfs_create_u32("fw_pages_vfs", 0400, pages, &dev->priv.page_counters[MLX5_VF]); + debugfs_create_u32("fw_pages_ec_vfs", 0400, pages, &dev->priv.page_counters[MLX5_EC_VF]); debugfs_create_u32("fw_pages_sfs", 0400, pages, &dev->priv.page_counters[MLX5_SF]); debugfs_create_u32("fw_pages_host_pf", 0400, pages, &dev->priv.page_counters[MLX5_HOST_PF]); debugfs_create_u32("fw_pages_alloc_failed", 0400, pages, &dev->priv.fw_pages_alloc_failed); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index 1b33533b15de..617ac7e5d75c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -323,6 +323,18 @@ static void del_adev(struct auxiliary_device *adev) auxiliary_device_uninit(adev); } +void mlx5_dev_set_lightweight(struct mlx5_core_dev *dev) +{ + mutex_lock(&mlx5_intf_mutex); + dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV; + mutex_unlock(&mlx5_intf_mutex); +} + +bool mlx5_dev_is_lightweight(struct mlx5_core_dev *dev) +{ + return dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV; +} + int mlx5_attach_device(struct mlx5_core_dev *dev) { struct mlx5_priv *priv = &dev->priv; @@ -457,6 +469,10 @@ static int add_drivers(struct mlx5_core_dev *dev) if (priv->adev[i]) continue; + if (mlx5_adev_devices[i].is_enabled && + !(mlx5_adev_devices[i].is_enabled(dev))) + continue; + if (mlx5_adev_devices[i].is_supported) is_supported = mlx5_adev_devices[i].is_supported(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 63635cc44479..3d82ec890666 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -7,7 +7,6 @@ #include "fw_reset.h" #include "fs_core.h" #include "eswitch.h" -#include "lag/lag.h" #include "esw/qos.h" #include "sf/dev/dev.h" #include "sf/sf.h" @@ -142,6 +141,13 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change, bool sf_dev_allocated; int ret = 0; + if (mlx5_dev_is_lightweight(dev)) { + if (action != DEVLINK_RELOAD_ACTION_DRIVER_REINIT) + return -EOPNOTSUPP; + mlx5_unload_one_light(dev); + return 0; + } + sf_dev_allocated = mlx5_sf_dev_allocated(dev); if (sf_dev_allocated) { /* Reload results in deleting SF device which further results in @@ -194,6 +200,10 @@ static int mlx5_devlink_reload_up(struct devlink *devlink, enum devlink_reload_a *actions_performed = BIT(action); switch (action) { case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: + if (mlx5_dev_is_lightweight(dev)) { + mlx5_fw_reporters_create(dev); + return mlx5_init_one_devl_locked(dev); + } ret = mlx5_load_one_devl_locked(dev, false); break; case DEVLINK_RELOAD_ACTION_FW_ACTIVATE: @@ -427,33 +437,6 @@ static int mlx5_devlink_large_group_num_validate(struct devlink *devlink, u32 id return 0; } - -static int mlx5_devlink_esw_multiport_set(struct devlink *devlink, u32 id, - struct devlink_param_gset_ctx *ctx) -{ - struct mlx5_core_dev *dev = devlink_priv(devlink); - - if (!MLX5_ESWITCH_MANAGER(dev)) - return -EOPNOTSUPP; - - if (ctx->val.vbool) - return mlx5_lag_mpesw_enable(dev); - - mlx5_lag_mpesw_disable(dev); - return 0; -} - -static int mlx5_devlink_esw_multiport_get(struct devlink *devlink, u32 id, - struct devlink_param_gset_ctx *ctx) -{ - struct mlx5_core_dev *dev = devlink_priv(devlink); - - if (!MLX5_ESWITCH_MANAGER(dev)) - return -EOPNOTSUPP; - - ctx->val.vbool = mlx5_lag_is_mpesw(dev); - return 0; -} #endif static int mlx5_devlink_eq_depth_validate(struct devlink *devlink, u32 id, @@ -527,12 +510,6 @@ static const struct devlink_param mlx5_devlink_params[] = { BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL, mlx5_devlink_large_group_num_validate), - DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_ESW_MULTIPORT, - "esw_multiport", DEVLINK_PARAM_TYPE_BOOL, - BIT(DEVLINK_PARAM_CMODE_RUNTIME), - mlx5_devlink_esw_multiport_get, - mlx5_devlink_esw_multiport_set, - NULL), #endif DEVLINK_PARAM_GENERIC(IO_EQ_SIZE, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL, mlx5_devlink_eq_depth_validate), @@ -545,7 +522,7 @@ static void mlx5_devlink_set_params_init_values(struct devlink *devlink) struct mlx5_core_dev *dev = devlink_priv(devlink); union devlink_param_value value; - value.vbool = MLX5_CAP_GEN(dev, roce); + value.vbool = MLX5_CAP_GEN(dev, roce) && !mlx5_dev_is_lightweight(dev); devl_param_driverinit_value_set(devlink, DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, value); @@ -595,7 +572,7 @@ static int mlx5_devlink_eth_params_register(struct devlink *devlink) if (err) return err; - value.vbool = true; + value.vbool = !mlx5_dev_is_lightweight(dev); devl_param_driverinit_value_set(devlink, DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH, value); @@ -635,6 +612,7 @@ static const struct devlink_param mlx5_devlink_rdma_params[] = { static int mlx5_devlink_rdma_params_register(struct devlink *devlink) { + struct mlx5_core_dev *dev = devlink_priv(devlink); union devlink_param_value value; int err; @@ -646,7 +624,7 @@ static int mlx5_devlink_rdma_params_register(struct devlink *devlink) if (err) return err; - value.vbool = true; + value.vbool = !mlx5_dev_is_lightweight(dev); devl_param_driverinit_value_set(devlink, DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA, value); @@ -681,7 +659,7 @@ static int mlx5_devlink_vnet_params_register(struct devlink *devlink) if (err) return err; - value.vbool = true; + value.vbool = !mlx5_dev_is_lightweight(dev); devl_param_driverinit_value_set(devlink, DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET, value); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c index 0290e0dea539..4e923a2874ae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c @@ -112,10 +112,8 @@ mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *po int err; handle = kzalloc(sizeof(*handle), GFP_KERNEL); - if (!handle) { - kfree(handle); + if (!handle) return ERR_PTR(-ENOMEM); - } post_attr->chain = 0; post_attr->prio = 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c index f370f67d9e33..af779c700278 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c @@ -18,7 +18,8 @@ static bool mlx5_esw_devlink_port_supported(struct mlx5_eswitch *esw, u16 vport_ { return vport_num == MLX5_VPORT_UPLINK || (mlx5_core_is_ecpf(esw->dev) && vport_num == MLX5_VPORT_PF) || - mlx5_eswitch_is_vf_vport(esw, vport_num); + mlx5_eswitch_is_vf_vport(esw, vport_num) || + mlx5_core_is_ec_vf_vport(esw->dev, vport_num); } static struct devlink_port *mlx5_esw_dl_port_alloc(struct mlx5_eswitch *esw, u16 vport_num) @@ -56,6 +57,11 @@ static struct devlink_port *mlx5_esw_dl_port_alloc(struct mlx5_eswitch *esw, u16 dl_port->attrs.switch_id.id_len = ppid.id_len; devlink_port_attrs_pci_vf_set(dl_port, controller_num, pfnum, vport_num - 1, external); + } else if (mlx5_core_is_ec_vf_vport(esw->dev, vport_num)) { + memcpy(dl_port->attrs.switch_id.id, ppid.id, ppid.id_len); + dl_port->attrs.switch_id.id_len = ppid.id_len; + devlink_port_attrs_pci_vf_set(dl_port, controller_num, pfnum, + vport_num - 1, false); } return dl_port; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index ecd8864d5d11..2af9c4646bc7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -41,6 +41,7 @@ #include "esw/qos.h" #include "mlx5_core.h" #include "lib/eq.h" +#include "lag/lag.h" #include "eswitch.h" #include "fs_core.h" #include "devlink.h" @@ -1051,6 +1052,18 @@ static void mlx5_eswitch_clear_vf_vports_info(struct mlx5_eswitch *esw) } } +static void mlx5_eswitch_clear_ec_vf_vports_info(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + unsigned long i; + + mlx5_esw_for_each_ec_vf_vport(esw, i, vport, esw->esw_funcs.num_ec_vfs) { + memset(&vport->qos, 0, sizeof(vport->qos)); + memset(&vport->info, 0, sizeof(vport->info)); + vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO; + } +} + /* Public E-Switch API */ int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num, enum mlx5_eswitch_vport_event enabled_events) @@ -1090,6 +1103,19 @@ void mlx5_eswitch_unload_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs) } } +static void mlx5_eswitch_unload_ec_vf_vports(struct mlx5_eswitch *esw, + u16 num_ec_vfs) +{ + struct mlx5_vport *vport; + unsigned long i; + + mlx5_esw_for_each_ec_vf_vport(esw, i, vport, num_ec_vfs) { + if (!vport->enabled) + continue; + mlx5_eswitch_unload_vport(esw, vport->vport); + } +} + int mlx5_eswitch_load_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs, enum mlx5_eswitch_vport_event enabled_events) { @@ -1110,6 +1136,26 @@ vf_err: return err; } +static int mlx5_eswitch_load_ec_vf_vports(struct mlx5_eswitch *esw, u16 num_ec_vfs, + enum mlx5_eswitch_vport_event enabled_events) +{ + struct mlx5_vport *vport; + unsigned long i; + int err; + + mlx5_esw_for_each_ec_vf_vport(esw, i, vport, num_ec_vfs) { + err = mlx5_eswitch_load_vport(esw, vport->vport, enabled_events); + if (err) + goto vf_err; + } + + return 0; + +vf_err: + mlx5_eswitch_unload_ec_vf_vports(esw, num_ec_vfs); + return err; +} + static int host_pf_enable_hca(struct mlx5_core_dev *dev) { if (!mlx5_core_is_ecpf(dev)) @@ -1154,6 +1200,12 @@ mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw, ret = mlx5_eswitch_load_vport(esw, MLX5_VPORT_ECPF, enabled_events); if (ret) goto ecpf_err; + if (mlx5_core_ec_sriov_enabled(esw->dev)) { + ret = mlx5_eswitch_load_ec_vf_vports(esw, esw->esw_funcs.num_ec_vfs, + enabled_events); + if (ret) + goto ec_vf_err; + } } /* Enable VF vports */ @@ -1164,6 +1216,9 @@ mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw, return 0; vf_err: + if (mlx5_core_ec_sriov_enabled(esw->dev)) + mlx5_eswitch_unload_ec_vf_vports(esw, esw->esw_funcs.num_ec_vfs); +ec_vf_err: if (mlx5_ecpf_vport_exists(esw->dev)) mlx5_eswitch_unload_vport(esw, MLX5_VPORT_ECPF); ecpf_err: @@ -1180,8 +1235,11 @@ void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw) { mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs); - if (mlx5_ecpf_vport_exists(esw->dev)) + if (mlx5_ecpf_vport_exists(esw->dev)) { + if (mlx5_core_ec_sriov_enabled(esw->dev)) + mlx5_eswitch_unload_ec_vf_vports(esw, esw->esw_funcs.num_vfs); mlx5_eswitch_unload_vport(esw, MLX5_VPORT_ECPF); + } host_pf_disable_hca(esw->dev); mlx5_eswitch_unload_vport(esw, MLX5_VPORT_PF); @@ -1225,6 +1283,9 @@ mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, int num_vfs) esw->esw_funcs.num_vfs = MLX5_GET(query_esw_functions_out, out, host_params_context.host_num_of_vfs); + if (mlx5_core_ec_sriov_enabled(esw->dev)) + esw->esw_funcs.num_ec_vfs = num_vfs; + kvfree(out); } @@ -1332,9 +1393,9 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs) mlx5_eswitch_event_handlers_register(esw); - esw_info(esw->dev, "Enable: mode(%s), nvfs(%d), active vports(%d)\n", + esw_info(esw->dev, "Enable: mode(%s), nvfs(%d), necvfs(%d), active vports(%d)\n", esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", - esw->esw_funcs.num_vfs, esw->enabled_vports); + esw->esw_funcs.num_vfs, esw->esw_funcs.num_ec_vfs, esw->enabled_vports); mlx5_esw_mode_change_notify(esw, esw->mode); @@ -1356,7 +1417,7 @@ abort: int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) { bool toggle_lag; - int ret; + int ret = 0; if (!mlx5_esw_allowed(esw)) return 0; @@ -1376,10 +1437,21 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) vport_events = (esw->mode == MLX5_ESWITCH_LEGACY) ? MLX5_LEGACY_SRIOV_VPORT_EVENTS : MLX5_VPORT_UC_ADDR_CHANGE; - ret = mlx5_eswitch_load_vf_vports(esw, num_vfs, vport_events); - if (!ret) - esw->esw_funcs.num_vfs = num_vfs; + /* If this is the ECPF the number of host VFs is managed via the + * eswitch function change event handler, and any num_vfs provided + * here are intended to be EC VFs. + */ + if (!mlx5_core_is_ecpf(esw->dev)) { + ret = mlx5_eswitch_load_vf_vports(esw, num_vfs, vport_events); + if (!ret) + esw->esw_funcs.num_vfs = num_vfs; + } else if (mlx5_core_ec_sriov_enabled(esw->dev)) { + ret = mlx5_eswitch_load_ec_vf_vports(esw, num_vfs, vport_events); + if (!ret) + esw->esw_funcs.num_ec_vfs = num_vfs; + } } + up_write(&esw->mode_lock); if (toggle_lag) @@ -1399,16 +1471,22 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) /* If driver is unloaded, this function is called twice by remove_one() * and mlx5_unload(). Prevent the second call. */ - if (!esw->esw_funcs.num_vfs && !clear_vf) + if (!esw->esw_funcs.num_vfs && !esw->esw_funcs.num_ec_vfs && !clear_vf) goto unlock; - esw_info(esw->dev, "Unload vfs: mode(%s), nvfs(%d), active vports(%d)\n", + esw_info(esw->dev, "Unload vfs: mode(%s), nvfs(%d), necvfs(%d), active vports(%d)\n", esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", - esw->esw_funcs.num_vfs, esw->enabled_vports); - - mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs); - if (clear_vf) - mlx5_eswitch_clear_vf_vports_info(esw); + esw->esw_funcs.num_vfs, esw->esw_funcs.num_ec_vfs, esw->enabled_vports); + + if (!mlx5_core_is_ecpf(esw->dev)) { + mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs); + if (clear_vf) + mlx5_eswitch_clear_vf_vports_info(esw); + } else if (mlx5_core_ec_sriov_enabled(esw->dev)) { + mlx5_eswitch_unload_ec_vf_vports(esw, esw->esw_funcs.num_ec_vfs); + if (clear_vf) + mlx5_eswitch_clear_ec_vf_vports_info(esw); + } if (esw->mode == MLX5_ESWITCH_OFFLOADS) { struct devlink *devlink = priv_to_devlink(esw->dev); @@ -1419,7 +1497,10 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) if (esw->mode == MLX5_ESWITCH_LEGACY) mlx5_eswitch_disable_locked(esw); - esw->esw_funcs.num_vfs = 0; + if (!mlx5_core_is_ecpf(esw->dev)) + esw->esw_funcs.num_vfs = 0; + else + esw->esw_funcs.num_ec_vfs = 0; unlock: up_write(&esw->mode_lock); @@ -1439,9 +1520,9 @@ void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw) mlx5_eswitch_event_handlers_unregister(esw); - esw_info(esw->dev, "Disable: mode(%s), nvfs(%d), active vports(%d)\n", + esw_info(esw->dev, "Disable: mode(%s), nvfs(%d), necvfs(%d), active vports(%d)\n", esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", - esw->esw_funcs.num_vfs, esw->enabled_vports); + esw->esw_funcs.num_vfs, esw->esw_funcs.num_ec_vfs, esw->enabled_vports); if (esw->fdb_table.flags & MLX5_ESW_FDB_CREATED) { esw->fdb_table.flags &= ~MLX5_ESW_FDB_CREATED; @@ -1601,6 +1682,17 @@ static int mlx5_esw_vports_init(struct mlx5_eswitch *esw) idx++; } + if (mlx5_core_ec_sriov_enabled(esw->dev)) { + int ec_vf_base_num = mlx5_core_ec_vf_vport_base(dev); + + for (i = 0; i < mlx5_core_max_ec_vfs(esw->dev); i++) { + err = mlx5_esw_vport_alloc(esw, idx, ec_vf_base_num + i); + if (err) + goto err; + idx++; + } + } + if (mlx5_ecpf_vport_exists(dev) || mlx5_core_is_ecpf_esw_manager(dev)) { err = mlx5_esw_vport_alloc(esw, idx, MLX5_VPORT_ECPF); @@ -1618,6 +1710,38 @@ err: return err; } +static int mlx5_devlink_esw_multiport_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + if (!MLX5_ESWITCH_MANAGER(dev)) + return -EOPNOTSUPP; + + if (ctx->val.vbool) + return mlx5_lag_mpesw_enable(dev); + + mlx5_lag_mpesw_disable(dev); + return 0; +} + +static int mlx5_devlink_esw_multiport_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + ctx->val.vbool = mlx5_lag_is_mpesw(dev); + return 0; +} + +static const struct devlink_param mlx5_eswitch_params[] = { + DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_ESW_MULTIPORT, + "esw_multiport", DEVLINK_PARAM_TYPE_BOOL, + BIT(DEVLINK_PARAM_CMODE_RUNTIME), + mlx5_devlink_esw_multiport_get, + mlx5_devlink_esw_multiport_set, NULL), +}; + int mlx5_eswitch_init(struct mlx5_core_dev *dev) { struct mlx5_eswitch *esw; @@ -1626,9 +1750,16 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) if (!MLX5_VPORT_MANAGER(dev) && !MLX5_ESWITCH_MANAGER(dev)) return 0; + err = devl_params_register(priv_to_devlink(dev), mlx5_eswitch_params, + ARRAY_SIZE(mlx5_eswitch_params)); + if (err) + return err; + esw = kzalloc(sizeof(*esw), GFP_KERNEL); - if (!esw) - return -ENOMEM; + if (!esw) { + err = -ENOMEM; + goto unregister_param; + } esw->dev = dev; esw->manager_vport = mlx5_eswitch_manager_vport(dev); @@ -1688,6 +1819,9 @@ abort: if (esw->work_queue) destroy_workqueue(esw->work_queue); kfree(esw); +unregister_param: + devl_params_unregister(priv_to_devlink(dev), mlx5_eswitch_params, + ARRAY_SIZE(mlx5_eswitch_params)); return err; } @@ -1711,6 +1845,8 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) esw_offloads_cleanup(esw); mlx5_esw_vports_cleanup(esw); kfree(esw); + devl_params_unregister(priv_to_devlink(esw->dev), mlx5_eswitch_params, + ARRAY_SIZE(mlx5_eswitch_params)); } /* Vport Administration */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index d3608f198e0a..266b60fefe25 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -289,6 +289,7 @@ struct mlx5_host_work { struct mlx5_esw_functions { struct mlx5_nb nb; u16 num_vfs; + u16 num_ec_vfs; }; enum { @@ -654,6 +655,18 @@ void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw); #define mlx5_esw_for_each_host_func_vport(esw, index, vport, last) \ mlx5_esw_for_each_vport_marked(esw, index, vport, last, MLX5_ESW_VPT_HOST_FN) +/* This macro should only be used if EC SRIOV is enabled. + * + * Because there were no more marks available on the xarray this uses a + * for_each_range approach. The range is only valid when EC SRIOV is enabled + */ +#define mlx5_esw_for_each_ec_vf_vport(esw, index, vport, last) \ + xa_for_each_range(&((esw)->vports), \ + index, \ + vport, \ + MLX5_CAP_GEN_2((esw->dev), ec_vf_vport_base), \ + (last) - 1) + struct mlx5_eswitch *mlx5_devlink_eswitch_get(struct devlink *devlink); struct mlx5_vport *__must_check mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index eafb098db6b0..fdf482f6fb34 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -55,13 +55,6 @@ #define mlx5_esw_for_each_rep(esw, i, rep) \ xa_for_each(&((esw)->offloads.vport_reps), i, rep) -#define mlx5_esw_for_each_sf_rep(esw, i, rep) \ - xa_for_each_marked(&((esw)->offloads.vport_reps), i, rep, MLX5_ESW_VPT_SF) - -#define mlx5_esw_for_each_vf_rep(esw, index, rep) \ - mlx5_esw_for_each_entry_marked(&((esw)->offloads.vport_reps), index, \ - rep, (esw)->esw_funcs.num_vfs, MLX5_ESW_VPT_VF) - /* There are two match-all miss flows, one for unicast dst mac and * one for multicast. */ @@ -1132,11 +1125,32 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, flows[vport->index] = flow; } + if (mlx5_core_ec_sriov_enabled(esw->dev)) { + mlx5_esw_for_each_ec_vf_vport(esw, i, vport, mlx5_core_max_ec_vfs(esw->dev)) { + if (i >= mlx5_core_max_ec_vfs(peer_dev)) + break; + esw_set_peer_miss_rule_source_port(esw, peer_dev->priv.eswitch, + spec, vport->vport); + flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, + spec, &flow_act, &dest, 1); + if (IS_ERR(flow)) { + err = PTR_ERR(flow); + goto add_ec_vf_flow_err; + } + flows[vport->index] = flow; + } + } esw->fdb_table.offloads.peer_miss_rules[mlx5_get_dev_index(peer_dev)] = flows; kvfree(spec); return 0; +add_ec_vf_flow_err: + mlx5_esw_for_each_ec_vf_vport(esw, i, vport, mlx5_core_max_ec_vfs(esw->dev)) { + if (!flows[vport->index]) + continue; + mlx5_del_flow_rules(flows[vport->index]); + } add_vf_flow_err: mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev)) { if (!flows[vport->index]) @@ -1169,6 +1183,17 @@ static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw, flows = esw->fdb_table.offloads.peer_miss_rules[mlx5_get_dev_index(peer_dev)]; + if (mlx5_core_ec_sriov_enabled(esw->dev)) { + mlx5_esw_for_each_ec_vf_vport(esw, i, vport, mlx5_core_max_ec_vfs(esw->dev)) { + /* The flow for a particular vport could be NULL if the other ECPF + * has fewer or no VFs enabled + */ + if (!flows[vport->index]) + continue; + mlx5_del_flow_rules(flows[vport->index]); + } + } + mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev)) mlx5_del_flow_rules(flows[vport->index]); @@ -2191,18 +2216,6 @@ static int esw_offloads_start(struct mlx5_eswitch *esw, return 0; } -static void mlx5_esw_offloads_rep_mark_set(struct mlx5_eswitch *esw, - struct mlx5_eswitch_rep *rep, - xa_mark_t mark) -{ - bool mark_set; - - /* Copy the mark from vport to its rep */ - mark_set = xa_get_mark(&esw->vports, rep->vport, mark); - if (mark_set) - xa_set_mark(&esw->offloads.vport_reps, rep->vport, mark); -} - static int mlx5_esw_offloads_rep_init(struct mlx5_eswitch *esw, const struct mlx5_vport *vport) { struct mlx5_eswitch_rep *rep; @@ -2222,9 +2235,6 @@ static int mlx5_esw_offloads_rep_init(struct mlx5_eswitch *esw, const struct mlx if (err) goto insert_err; - mlx5_esw_offloads_rep_mark_set(esw, rep, MLX5_ESW_VPT_HOST_FN); - mlx5_esw_offloads_rep_mark_set(esw, rep, MLX5_ESW_VPT_VF); - mlx5_esw_offloads_rep_mark_set(esw, rep, MLX5_ESW_VPT_SF); return 0; insert_err: @@ -2365,37 +2375,13 @@ static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw, esw->offloads.rep_ops[rep_type]->unload(rep); } -static void __unload_reps_sf_vport(struct mlx5_eswitch *esw, u8 rep_type) -{ - struct mlx5_eswitch_rep *rep; - unsigned long i; - - mlx5_esw_for_each_sf_rep(esw, i, rep) - __esw_offloads_unload_rep(esw, rep, rep_type); -} - static void __unload_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type) { struct mlx5_eswitch_rep *rep; unsigned long i; - __unload_reps_sf_vport(esw, rep_type); - - mlx5_esw_for_each_vf_rep(esw, i, rep) - __esw_offloads_unload_rep(esw, rep, rep_type); - - if (mlx5_ecpf_vport_exists(esw->dev)) { - rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_ECPF); - __esw_offloads_unload_rep(esw, rep, rep_type); - } - - if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { - rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF); + mlx5_esw_for_each_rep(esw, i, rep) __esw_offloads_unload_rep(esw, rep, rep_type); - } - - rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); - __esw_offloads_unload_rep(esw, rep, rep_type); } int mlx5_esw_offloads_rep_load(struct mlx5_eswitch *esw, u16 vport_num) @@ -3333,6 +3319,9 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) /* Representor will control the vport link state */ mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) vport->info.link_state = MLX5_VPORT_ADMIN_STATE_DOWN; + if (mlx5_core_ec_sriov_enabled(esw->dev)) + mlx5_esw_for_each_ec_vf_vport(esw, i, vport, esw->esw_funcs.num_ec_vfs) + vport->info.link_state = MLX5_VPORT_ADMIN_STATE_DOWN; /* Uplink vport rep must load first. */ err = esw_offloads_load_rep(esw, MLX5_VPORT_UPLINK); @@ -3570,8 +3559,27 @@ static int mlx5_esw_vports_inline_set(struct mlx5_eswitch *esw, u8 mlx5_mode, goto revert_inline_mode; } } + if (mlx5_core_ec_sriov_enabled(esw->dev)) { + mlx5_esw_for_each_ec_vf_vport(esw, i, vport, esw->esw_funcs.num_ec_vfs) { + err = mlx5_modify_nic_vport_min_inline(dev, vport->vport, mlx5_mode); + if (err) { + err_vport_num = vport->vport; + NL_SET_ERR_MSG_MOD(extack, + "Failed to set min inline on vport"); + goto revert_ec_vf_inline_mode; + } + } + } return 0; +revert_ec_vf_inline_mode: + mlx5_esw_for_each_ec_vf_vport(esw, i, vport, esw->esw_funcs.num_ec_vfs) { + if (vport->vport == err_vport_num) + break; + mlx5_modify_nic_vport_min_inline(dev, + vport->vport, + esw->offloads.inline_mode); + } revert_inline_mode: mlx5_esw_for_each_host_func_vport(esw, i, vport, esw->esw_funcs.num_vfs) { if (vport->vport == err_vport_num) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 871c32dda66e..210100a4064a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -719,7 +719,7 @@ static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = { #define MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD 30000 #define MLX5_FW_REPORTER_DEFAULT_GRACEFUL_PERIOD MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD -static void mlx5_fw_reporters_create(struct mlx5_core_dev *dev) +void mlx5_fw_reporters_create(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; struct devlink *devlink = priv_to_devlink(dev); @@ -735,17 +735,17 @@ static void mlx5_fw_reporters_create(struct mlx5_core_dev *dev) } health->fw_reporter = - devlink_health_reporter_create(devlink, &mlx5_fw_reporter_ops, - 0, dev); + devl_health_reporter_create(devlink, &mlx5_fw_reporter_ops, + 0, dev); if (IS_ERR(health->fw_reporter)) mlx5_core_warn(dev, "Failed to create fw reporter, err = %ld\n", PTR_ERR(health->fw_reporter)); health->fw_fatal_reporter = - devlink_health_reporter_create(devlink, - &mlx5_fw_fatal_reporter_ops, - grace_period, - dev); + devl_health_reporter_create(devlink, + &mlx5_fw_fatal_reporter_ops, + grace_period, + dev); if (IS_ERR(health->fw_fatal_reporter)) mlx5_core_warn(dev, "Failed to create fw fatal reporter, err = %ld\n", PTR_ERR(health->fw_fatal_reporter)); @@ -777,7 +777,8 @@ void mlx5_trigger_health_work(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; - queue_work(health->wq, &health->fatal_report_work); + if (!mlx5_dev_is_lightweight(dev)) + queue_work(health->wq, &health->fatal_report_work); } #define MLX5_MSEC_PER_HOUR (MSEC_PER_SEC * 60 * 60) @@ -905,10 +906,15 @@ void mlx5_health_cleanup(struct mlx5_core_dev *dev) int mlx5_health_init(struct mlx5_core_dev *dev) { + struct devlink *devlink = priv_to_devlink(dev); struct mlx5_core_health *health; char *name; - mlx5_fw_reporters_create(dev); + if (!mlx5_dev_is_lightweight(dev)) { + devl_lock(devlink); + mlx5_fw_reporters_create(dev); + devl_unlock(devlink); + } mlx5_reporter_vnic_create(dev); health = &dev->priv.health; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index d6ee016deae1..6fa314f8e5ee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1118,7 +1118,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) mlx5_devcom_unregister_device(dev->priv.devcom); } -static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot, u64 timeout) +static int mlx5_function_enable(struct mlx5_core_dev *dev, bool boot, u64 timeout) { int err; @@ -1183,28 +1183,56 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot, u64 timeout goto reclaim_boot_pages; } + return 0; + +reclaim_boot_pages: + mlx5_reclaim_startup_pages(dev); +err_disable_hca: + mlx5_core_disable_hca(dev, 0); +stop_health_poll: + mlx5_stop_health_poll(dev, boot); +err_cmd_cleanup: + mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN); + mlx5_cmd_cleanup(dev); + + return err; +} + +static void mlx5_function_disable(struct mlx5_core_dev *dev, bool boot) +{ + mlx5_reclaim_startup_pages(dev); + mlx5_core_disable_hca(dev, 0); + mlx5_stop_health_poll(dev, boot); + mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN); + mlx5_cmd_cleanup(dev); +} + +static int mlx5_function_open(struct mlx5_core_dev *dev) +{ + int err; + err = set_hca_ctrl(dev); if (err) { mlx5_core_err(dev, "set_hca_ctrl failed\n"); - goto reclaim_boot_pages; + return err; } err = set_hca_cap(dev); if (err) { mlx5_core_err(dev, "set_hca_cap failed\n"); - goto reclaim_boot_pages; + return err; } err = mlx5_satisfy_startup_pages(dev, 0); if (err) { mlx5_core_err(dev, "failed to allocate init pages\n"); - goto reclaim_boot_pages; + return err; } err = mlx5_cmd_init_hca(dev, sw_owner_id); if (err) { mlx5_core_err(dev, "init hca failed\n"); - goto reclaim_boot_pages; + return err; } mlx5_set_driver_version(dev); @@ -1212,26 +1240,13 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot, u64 timeout err = mlx5_query_hca_caps(dev); if (err) { mlx5_core_err(dev, "query hca failed\n"); - goto reclaim_boot_pages; + return err; } mlx5_start_health_fw_log_up(dev); - return 0; - -reclaim_boot_pages: - mlx5_reclaim_startup_pages(dev); -err_disable_hca: - mlx5_core_disable_hca(dev, 0); -stop_health_poll: - mlx5_stop_health_poll(dev, boot); -err_cmd_cleanup: - mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN); - mlx5_cmd_cleanup(dev); - - return err; } -static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot) +static int mlx5_function_close(struct mlx5_core_dev *dev) { int err; @@ -1240,15 +1255,33 @@ static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot) mlx5_core_err(dev, "tear_down_hca failed, skip cleanup\n"); return err; } - mlx5_reclaim_startup_pages(dev); - mlx5_core_disable_hca(dev, 0); - mlx5_stop_health_poll(dev, boot); - mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN); - mlx5_cmd_cleanup(dev); return 0; } +static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot, u64 timeout) +{ + int err; + + err = mlx5_function_enable(dev, boot, timeout); + if (err) + return err; + + err = mlx5_function_open(dev); + if (err) + mlx5_function_disable(dev, boot); + return err; +} + +static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot) +{ + int err = mlx5_function_close(dev); + + if (!err) + mlx5_function_disable(dev, boot); + return err; +} + static int mlx5_load(struct mlx5_core_dev *dev) { int err; @@ -1391,12 +1424,11 @@ static void mlx5_unload(struct mlx5_core_dev *dev) mlx5_put_uars_page(dev, dev->priv.uar); } -int mlx5_init_one(struct mlx5_core_dev *dev) +int mlx5_init_one_devl_locked(struct mlx5_core_dev *dev) { - struct devlink *devlink = priv_to_devlink(dev); + bool light_probe = mlx5_dev_is_lightweight(dev); int err = 0; - devl_lock(devlink); mutex_lock(&dev->intf_state_mutex); dev->state = MLX5_DEVICE_STATE_UP; @@ -1410,9 +1442,14 @@ int mlx5_init_one(struct mlx5_core_dev *dev) goto function_teardown; } - err = mlx5_devlink_params_register(priv_to_devlink(dev)); - if (err) - goto err_devlink_params_reg; + /* In case of light_probe, mlx5_devlink is already registered. + * Hence, don't register devlink again. + */ + if (!light_probe) { + err = mlx5_devlink_params_register(priv_to_devlink(dev)); + if (err) + goto err_devlink_params_reg; + } err = mlx5_load(dev); if (err) @@ -1425,14 +1462,14 @@ int mlx5_init_one(struct mlx5_core_dev *dev) goto err_register; mutex_unlock(&dev->intf_state_mutex); - devl_unlock(devlink); return 0; err_register: clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); mlx5_unload(dev); err_load: - mlx5_devlink_params_unregister(priv_to_devlink(dev)); + if (!light_probe) + mlx5_devlink_params_unregister(priv_to_devlink(dev)); err_devlink_params_reg: mlx5_cleanup_once(dev); function_teardown: @@ -1440,6 +1477,16 @@ function_teardown: err_function: dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; mutex_unlock(&dev->intf_state_mutex); + return err; +} + +int mlx5_init_one(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + int err; + + devl_lock(devlink); + err = mlx5_init_one_devl_locked(dev); devl_unlock(devlink); return err; } @@ -1557,6 +1604,100 @@ void mlx5_unload_one(struct mlx5_core_dev *dev, bool suspend) devl_unlock(devlink); } +/* In case of light probe, we don't need a full query of hca_caps, but only the bellow caps. + * A full query of hca_caps will be done when the device will reload. + */ +static int mlx5_query_hca_caps_light(struct mlx5_core_dev *dev) +{ + int err; + + err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL); + if (err) + return err; + + if (MLX5_CAP_GEN(dev, eth_net_offloads)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_ETHERNET_OFFLOADS); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, nic_flow_table) || + MLX5_CAP_GEN(dev, ipoib_enhanced_offloads)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_FLOW_TABLE); + if (err) + return err; + } + + if (MLX5_CAP_GEN_64(dev, general_obj_types) & + MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q) { + err = mlx5_core_get_caps(dev, MLX5_CAP_VDPA_EMULATION); + if (err) + return err; + } + + return 0; +} + +int mlx5_init_one_light(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + int err; + + dev->state = MLX5_DEVICE_STATE_UP; + err = mlx5_function_enable(dev, true, mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT)); + if (err) { + mlx5_core_warn(dev, "mlx5_function_enable err=%d\n", err); + goto out; + } + + err = mlx5_query_hca_caps_light(dev); + if (err) { + mlx5_core_warn(dev, "mlx5_query_hca_caps_light err=%d\n", err); + goto query_hca_caps_err; + } + + devl_lock(devlink); + err = mlx5_devlink_params_register(priv_to_devlink(dev)); + devl_unlock(devlink); + if (err) { + mlx5_core_warn(dev, "mlx5_devlink_param_reg err = %d\n", err); + goto query_hca_caps_err; + } + + return 0; + +query_hca_caps_err: + mlx5_function_disable(dev, true); +out: + dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; + return err; +} + +void mlx5_uninit_one_light(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + + devl_lock(devlink); + mlx5_devlink_params_unregister(priv_to_devlink(dev)); + devl_unlock(devlink); + if (dev->state != MLX5_DEVICE_STATE_UP) + return; + mlx5_function_disable(dev, true); +} + +/* xxx_light() function are used in order to configure the device without full + * init (light init). e.g.: There isn't a point in reload a device to light state. + * Hence, mlx5_load_one_light() isn't needed. + */ + +void mlx5_unload_one_light(struct mlx5_core_dev *dev) +{ + if (dev->state != MLX5_DEVICE_STATE_UP) + return; + mlx5_function_disable(dev, false); + dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; +} + static const int types[] = { MLX5_CAP_GENERAL, MLX5_CAP_GENERAL_2, @@ -1809,7 +1950,7 @@ static void remove_one(struct pci_dev *pdev) mlx5_drain_fw_reset(dev); mlx5_drain_health_wq(dev); devlink_unregister(devlink); - mlx5_sriov_disable(pdev); + mlx5_sriov_disable(pdev, false); mlx5_thermal_uninit(dev); mlx5_crdump_disable(dev); mlx5_uninit_one(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 1d879374acaa..464c6885a01c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -195,7 +195,7 @@ void mlx5_sriov_cleanup(struct mlx5_core_dev *dev); int mlx5_sriov_attach(struct mlx5_core_dev *dev); void mlx5_sriov_detach(struct mlx5_core_dev *dev); int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs); -void mlx5_sriov_disable(struct pci_dev *pdev); +void mlx5_sriov_disable(struct pci_dev *pdev, bool num_vf_change); int mlx5_core_sriov_set_msix_vec_count(struct pci_dev *vf, int msix_vec_count); int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id); int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id); @@ -240,11 +240,14 @@ int mlx5_attach_device(struct mlx5_core_dev *dev); void mlx5_detach_device(struct mlx5_core_dev *dev, bool suspend); int mlx5_register_device(struct mlx5_core_dev *dev); void mlx5_unregister_device(struct mlx5_core_dev *dev); +void mlx5_dev_set_lightweight(struct mlx5_core_dev *dev); +bool mlx5_dev_is_lightweight(struct mlx5_core_dev *dev); struct mlx5_core_dev *mlx5_get_next_phys_dev_lag(struct mlx5_core_dev *dev); void mlx5_dev_list_lock(void); void mlx5_dev_list_unlock(void); int mlx5_dev_list_trylock(void); +void mlx5_fw_reporters_create(struct mlx5_core_dev *dev); int mlx5_query_mtpps(struct mlx5_core_dev *dev, u32 *mtpps, u32 mtpps_size); int mlx5_set_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size); int mlx5_query_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 *arm, u8 *mode); @@ -319,16 +322,20 @@ static inline bool mlx5_core_is_sf(const struct mlx5_core_dev *dev) int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx); void mlx5_mdev_uninit(struct mlx5_core_dev *dev); int mlx5_init_one(struct mlx5_core_dev *dev); +int mlx5_init_one_devl_locked(struct mlx5_core_dev *dev); void mlx5_uninit_one(struct mlx5_core_dev *dev); void mlx5_unload_one(struct mlx5_core_dev *dev, bool suspend); void mlx5_unload_one_devl_locked(struct mlx5_core_dev *dev, bool suspend); int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery); int mlx5_load_one_devl_locked(struct mlx5_core_dev *dev, bool recovery); +int mlx5_init_one_light(struct mlx5_core_dev *dev); +void mlx5_uninit_one_light(struct mlx5_core_dev *dev); +void mlx5_unload_one_light(struct mlx5_core_dev *dev); -int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap, u16 function_id, +int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap, u16 vport, u16 opmod); -#define mlx5_vport_get_other_func_general_cap(dev, fid, out) \ - mlx5_vport_get_other_func_cap(dev, fid, out, MLX5_CAP_GENERAL) +#define mlx5_vport_get_other_func_general_cap(dev, vport, out) \ + mlx5_vport_get_other_func_cap(dev, vport, out, MLX5_CAP_GENERAL) void mlx5_events_work_enqueue(struct mlx5_core_dev *dev, struct work_struct *work); static inline u32 mlx5_sriov_get_vf_total_msix(struct pci_dev *pdev) @@ -343,4 +350,24 @@ bool mlx5_rdma_supported(struct mlx5_core_dev *dev); bool mlx5_vnet_supported(struct mlx5_core_dev *dev); bool mlx5_same_hw_devs(struct mlx5_core_dev *dev, struct mlx5_core_dev *peer_dev); +static inline u16 mlx5_core_ec_vf_vport_base(const struct mlx5_core_dev *dev) +{ + return MLX5_CAP_GEN_2(dev, ec_vf_vport_base); +} + +static inline u16 mlx5_core_ec_sriov_enabled(const struct mlx5_core_dev *dev) +{ + return mlx5_core_is_ecpf(dev) && mlx5_core_ec_vf_vport_base(dev); +} + +static inline bool mlx5_core_is_ec_vf_vport(const struct mlx5_core_dev *dev, u16 vport_num) +{ + int base_vport = mlx5_core_ec_vf_vport_base(dev); + int max_vport = base_vport + mlx5_core_max_ec_vfs(dev); + + if (!mlx5_core_ec_sriov_enabled(dev)) + return false; + + return (vport_num >= base_vport && vport_num < max_vport); +} #endif /* __MLX5_CORE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 95dc67fb3001..dcf58efac159 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -79,7 +79,13 @@ static u16 func_id_to_type(struct mlx5_core_dev *dev, u16 func_id, bool ec_funct if (!func_id) return mlx5_core_is_ecpf(dev) && !ec_function ? MLX5_HOST_PF : MLX5_PF; - return func_id <= mlx5_core_max_vfs(dev) ? MLX5_VF : MLX5_SF; + if (func_id <= max(mlx5_core_max_vfs(dev), mlx5_core_max_ec_vfs(dev))) { + if (ec_function) + return MLX5_EC_VF; + else + return MLX5_VF; + } + return MLX5_SF; } static u32 mlx5_get_ec_function(u32 function) @@ -730,6 +736,9 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev) WARN(dev->priv.page_counters[MLX5_HOST_PF], "External host PF FW pages counter is %d after reclaiming all pages\n", dev->priv.page_counters[MLX5_HOST_PF]); + WARN(dev->priv.page_counters[MLX5_EC_VF], + "EC VFs FW pages counter is %d after reclaiming all pages\n", + dev->priv.page_counters[MLX5_EC_VF]); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 843da89a9035..b2dbae763ca6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -41,6 +41,15 @@ struct mlx5_irq_table { struct mlx5_irq_pool *sf_comp_pool; }; +static int mlx5_core_func_to_vport(const struct mlx5_core_dev *dev, + int func, + bool ec_vf_func) +{ + if (!ec_vf_func) + return func; + return mlx5_core_ec_vf_vport_base(dev) + func - 1; +} + /** * mlx5_get_default_msix_vec_count - Get the default number of MSI-X vectors * to be ssigned to each VF. @@ -79,6 +88,8 @@ int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id, int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); void *hca_cap = NULL, *query_cap = NULL, *cap; int num_vf_msix, min_msix, max_msix; + bool ec_vf_function; + int vport; int ret; num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix); @@ -104,7 +115,9 @@ int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id, goto out; } - ret = mlx5_vport_get_other_func_general_cap(dev, function_id, query_cap); + ec_vf_function = mlx5_core_ec_sriov_enabled(dev); + vport = mlx5_core_func_to_vport(dev, function_id, ec_vf_function); + ret = mlx5_vport_get_other_func_general_cap(dev, vport, query_cap); if (ret) goto out; @@ -115,6 +128,7 @@ int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id, MLX5_SET(set_hca_cap_in, hca_cap, opcode, MLX5_CMD_OP_SET_HCA_CAP); MLX5_SET(set_hca_cap_in, hca_cap, other_function, 1); + MLX5_SET(set_hca_cap_in, hca_cap, ec_vf_function, ec_vf_function); MLX5_SET(set_hca_cap_in, hca_cap, function_id, function_id); MLX5_SET(set_hca_cap_in, hca_cap, op_mod, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c index 0692363cf80e..8fe82f1191bb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c @@ -3,6 +3,7 @@ #include <linux/mlx5/driver.h> #include <linux/mlx5/device.h> +#include <linux/mlx5/eswitch.h> #include "mlx5_core.h" #include "dev.h" #include "devlink.h" @@ -28,6 +29,10 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia mdev->priv.adev_idx = adev->id; sf_dev->mdev = mdev; + /* Only local SFs do light probe */ + if (MLX5_ESWITCH_MANAGER(sf_dev->parent_mdev)) + mlx5_dev_set_lightweight(mdev); + err = mlx5_mdev_init(mdev, MLX5_SF_PROF); if (err) { mlx5_core_warn(mdev, "mlx5_mdev_init on err=%d\n", err); @@ -41,7 +46,10 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia goto remap_err; } - err = mlx5_init_one(mdev); + if (MLX5_ESWITCH_MANAGER(sf_dev->parent_mdev)) + err = mlx5_init_one_light(mdev); + else + err = mlx5_init_one(mdev); if (err) { mlx5_core_warn(mdev, "mlx5_init_one err=%d\n", err); goto init_one_err; @@ -65,7 +73,10 @@ static void mlx5_sf_dev_remove(struct auxiliary_device *adev) mlx5_drain_health_wq(sf_dev->mdev); devlink_unregister(devlink); - mlx5_uninit_one(sf_dev->mdev); + if (mlx5_dev_is_lightweight(sf_dev->mdev)) + mlx5_uninit_one_light(sf_dev->mdev); + else + mlx5_uninit_one(sf_dev->mdev); iounmap(sf_dev->mdev->iseg); mlx5_mdev_uninit(sf_dev->mdev); mlx5_devlink_free(devlink); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index f07d00929162..4e42a3b9b8ee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -37,7 +37,7 @@ #include "mlx5_irq.h" #include "eswitch.h" -static int sriov_restore_guids(struct mlx5_core_dev *dev, int vf) +static int sriov_restore_guids(struct mlx5_core_dev *dev, int vf, u16 func_id) { struct mlx5_core_sriov *sriov = &dev->priv.sriov; struct mlx5_hca_vport_context *in; @@ -59,7 +59,7 @@ static int sriov_restore_guids(struct mlx5_core_dev *dev, int vf) !!(in->node_guid) * MLX5_HCA_VPORT_SEL_NODE_GUID | !!(in->policy) * MLX5_HCA_VPORT_SEL_STATE_POLICY; - err = mlx5_core_modify_hca_vport_context(dev, 1, 1, vf + 1, in); + err = mlx5_core_modify_hca_vport_context(dev, 1, 1, func_id, in); if (err) mlx5_core_warn(dev, "modify vport context failed, unable to restore VF %d settings\n", vf); @@ -73,6 +73,7 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) { struct mlx5_core_sriov *sriov = &dev->priv.sriov; int err, vf, num_msix_count; + int vport_num; err = mlx5_eswitch_enable(dev->priv.eswitch, num_vfs); if (err) { @@ -104,7 +105,10 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) sriov->vfs_ctx[vf].enabled = 1; if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) { - err = sriov_restore_guids(dev, vf); + vport_num = mlx5_core_ec_sriov_enabled(dev) ? + mlx5_core_ec_vf_vport_base(dev) + vf + : vf + 1; + err = sriov_restore_guids(dev, vf, vport_num); if (err) { mlx5_core_warn(dev, "failed to restore VF %d settings, err %d\n", @@ -119,9 +123,11 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) } static void -mlx5_device_disable_sriov(struct mlx5_core_dev *dev, int num_vfs, bool clear_vf) +mlx5_device_disable_sriov(struct mlx5_core_dev *dev, int num_vfs, bool clear_vf, bool num_vf_change) { struct mlx5_core_sriov *sriov = &dev->priv.sriov; + bool wait_for_ec_vf_pages = true; + bool wait_for_vf_pages = true; int err; int vf; @@ -143,11 +149,30 @@ mlx5_device_disable_sriov(struct mlx5_core_dev *dev, int num_vfs, bool clear_vf) mlx5_eswitch_disable_sriov(dev->priv.eswitch, clear_vf); + /* There are a number of scenarios when SRIOV is being disabled: + * 1. VFs or ECVFs had been created, and now set back to 0 (num_vf_change == true). + * - If EC SRIOV is enabled then this flow is happening on the + * embedded platform, wait for only EC VF pages. + * - If EC SRIOV is not enabled this flow is happening on non-embedded + * platform, wait for the VF pages. + * + * 2. The driver is being unloaded. In this case wait for all pages. + */ + if (num_vf_change) { + if (mlx5_core_ec_sriov_enabled(dev)) + wait_for_vf_pages = false; + else + wait_for_ec_vf_pages = false; + } + + if (wait_for_ec_vf_pages && mlx5_wait_for_pages(dev, &dev->priv.page_counters[MLX5_EC_VF])) + mlx5_core_warn(dev, "timeout reclaiming EC VFs pages\n"); + /* For ECPFs, skip waiting for host VF pages until ECPF is destroyed */ if (mlx5_core_is_ecpf(dev)) return; - if (mlx5_wait_for_pages(dev, &dev->priv.page_counters[MLX5_VF])) + if (wait_for_vf_pages && mlx5_wait_for_pages(dev, &dev->priv.page_counters[MLX5_VF])) mlx5_core_warn(dev, "timeout reclaiming VFs pages\n"); } @@ -168,12 +193,12 @@ static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs) err = pci_enable_sriov(pdev, num_vfs); if (err) { mlx5_core_warn(dev, "pci_enable_sriov failed : %d\n", err); - mlx5_device_disable_sriov(dev, num_vfs, true); + mlx5_device_disable_sriov(dev, num_vfs, true, true); } return err; } -void mlx5_sriov_disable(struct pci_dev *pdev) +void mlx5_sriov_disable(struct pci_dev *pdev, bool num_vf_change) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); struct devlink *devlink = priv_to_devlink(dev); @@ -181,7 +206,7 @@ void mlx5_sriov_disable(struct pci_dev *pdev) pci_disable_sriov(pdev); devl_lock(devlink); - mlx5_device_disable_sriov(dev, num_vfs, true); + mlx5_device_disable_sriov(dev, num_vfs, true, num_vf_change); devl_unlock(devlink); } @@ -196,7 +221,7 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) if (num_vfs) err = mlx5_sriov_enable(pdev, num_vfs); else - mlx5_sriov_disable(pdev); + mlx5_sriov_disable(pdev, true); if (!err) sriov->num_vfs = num_vfs; @@ -241,7 +266,7 @@ void mlx5_sriov_detach(struct mlx5_core_dev *dev) if (!mlx5_core_is_pf(dev)) return; - mlx5_device_disable_sriov(dev, pci_num_vf(dev->pdev), false); + mlx5_device_disable_sriov(dev, pci_num_vf(dev->pdev), false, false); } static u16 mlx5_get_max_vfs(struct mlx5_core_dev *dev) @@ -280,6 +305,7 @@ int mlx5_sriov_init(struct mlx5_core_dev *dev) total_vfs = pci_sriov_get_totalvfs(pdev); sriov->max_vfs = mlx5_get_max_vfs(dev); sriov->num_vfs = pci_num_vf(pdev); + sriov->max_ec_vfs = mlx5_core_ec_sriov_enabled(dev) ? pci_sriov_get_totalvfs(dev->pdev) : 0; sriov->vfs_ctx = kcalloc(total_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL); if (!sriov->vfs_ctx) return -ENOMEM; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index bc66b078a8a1..6d3984dd5b21 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -1161,23 +1161,32 @@ u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev) } EXPORT_SYMBOL_GPL(mlx5_query_nic_system_image_guid); -int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out, +static int mlx5_vport_to_func_id(const struct mlx5_core_dev *dev, u16 vport, bool ec_vf_func) +{ + return ec_vf_func ? vport - mlx5_core_ec_vf_vport_base(dev) + : vport; +} + +int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 vport, void *out, u16 opmod) { + bool ec_vf_func = mlx5_core_is_ec_vf_vport(dev, vport); u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)] = {}; opmod = (opmod << 1) | (HCA_CAP_OPMOD_GET_MAX & 0x01); MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); MLX5_SET(query_hca_cap_in, in, op_mod, opmod); - MLX5_SET(query_hca_cap_in, in, function_id, function_id); + MLX5_SET(query_hca_cap_in, in, function_id, mlx5_vport_to_func_id(dev, vport, ec_vf_func)); MLX5_SET(query_hca_cap_in, in, other_function, true); + MLX5_SET(query_hca_cap_in, in, ec_vf_function, ec_vf_func); return mlx5_cmd_exec_inout(dev, query_hca_cap, in, out); } EXPORT_SYMBOL_GPL(mlx5_vport_get_other_func_cap); int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap, - u16 function_id, u16 opmod) + u16 vport, u16 opmod) { + bool ec_vf_func = mlx5_core_is_ec_vf_vport(dev, vport); int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); void *set_hca_cap; void *set_ctx; @@ -1191,8 +1200,10 @@ int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap MLX5_SET(set_hca_cap_in, set_ctx, op_mod, opmod << 1); set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability); memcpy(set_hca_cap, hca_cap, MLX5_ST_SZ_BYTES(cmd_hca_cap)); - MLX5_SET(set_hca_cap_in, set_ctx, function_id, function_id); + MLX5_SET(set_hca_cap_in, set_ctx, function_id, + mlx5_vport_to_func_id(dev, vport, ec_vf_func)); MLX5_SET(set_hca_cap_in, set_ctx, other_function, true); + MLX5_SET(set_hca_cap_in, set_ctx, ec_vf_function, ec_vf_func); ret = mlx5_cmd_exec_in(dev, set_hca_cap, set_ctx); kfree(set_ctx); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 9a744c48eec2..18a608a1f567 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -474,6 +474,7 @@ struct mlx5_core_sriov { struct mlx5_vf_context *vfs_ctx; int num_vfs; u16 max_vfs; + u16 max_ec_vfs; }; struct mlx5_fc_pool { @@ -580,6 +581,7 @@ enum mlx5_func_type { MLX5_VF, MLX5_SF, MLX5_HOST_PF, + MLX5_EC_VF, MLX5_FUNC_TYPE_NUM, }; @@ -1244,6 +1246,11 @@ static inline u16 mlx5_core_max_vfs(const struct mlx5_core_dev *dev) return dev->priv.sriov.max_vfs; } +static inline u16 mlx5_core_max_ec_vfs(const struct mlx5_core_dev *dev) +{ + return dev->priv.sriov.max_ec_vfs; +} + static inline int mlx5_get_gid_table_len(u16 param) { if (param > 4) { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index af3a92ad2e6b..1f4f62cb9f34 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1992,7 +1992,10 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 ts_cqe_metadata_size2wqe_counter[0x5]; u8 reserved_at_250[0x10]; - u8 reserved_at_260[0x5a0]; + u8 reserved_at_260[0x120]; + u8 reserved_at_380[0x10]; + u8 ec_vf_vport_base[0x10]; + u8 reserved_at_3a0[0x460]; }; enum mlx5_ifc_flow_destination_type { @@ -4805,7 +4808,8 @@ struct mlx5_ifc_set_hca_cap_in_bits { u8 op_mod[0x10]; u8 other_function[0x1]; - u8 reserved_at_41[0xf]; + u8 ec_vf_function[0x1]; + u8 reserved_at_42[0xe]; u8 function_id[0x10]; u8 reserved_at_60[0x20]; @@ -5956,7 +5960,8 @@ struct mlx5_ifc_query_hca_cap_in_bits { u8 op_mod[0x10]; u8 other_function[0x1]; - u8 reserved_at_41[0xf]; + u8 ec_vf_function[0x1]; + u8 reserved_at_42[0xe]; u8 function_id[0x10]; u8 reserved_at_60[0x20]; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 7f31432f44c2..fbb9bf447889 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -132,6 +132,6 @@ int mlx5_nic_vport_affiliate_multiport(struct mlx5_core_dev *master_mdev, int mlx5_nic_vport_unaffiliate_multiport(struct mlx5_core_dev *port_mdev); u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev); -int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out, +int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 vport, void *out, u16 opmod); #endif /* __MLX5_VPORT_H__ */ |