diff options
Diffstat (limited to 'drivers/net/ethernet/intel/ice/devlink')
-rw-r--r-- | drivers/net/ethernet/intel/ice/devlink/devlink.c | 2090 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/ice/devlink/devlink.h | 26 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/ice/devlink/health.c | 550 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/ice/devlink/health.h | 71 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/ice/devlink/port.c | 999 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/ice/devlink/port.h | 58 |
6 files changed, 3794 insertions, 0 deletions
diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink.c b/drivers/net/ethernet/intel/ice/devlink/devlink.c new file mode 100644 index 000000000000..4af60e2f37df --- /dev/null +++ b/drivers/net/ethernet/intel/ice/devlink/devlink.c @@ -0,0 +1,2090 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020, Intel Corporation. */ + +#include <linux/vmalloc.h> + +#include "ice.h" +#include "ice_lib.h" +#include "devlink.h" +#include "port.h" +#include "ice_eswitch.h" +#include "ice_fw_update.h" +#include "ice_dcb_lib.h" +#include "ice_sf_eth.h" + +/* context for devlink info version reporting */ +struct ice_info_ctx { + char buf[128]; + struct ice_orom_info pending_orom; + struct ice_nvm_info pending_nvm; + struct ice_netlist_info pending_netlist; + struct ice_hw_dev_caps dev_caps; +}; + +/* The following functions are used to format specific strings for various + * devlink info versions. The ctx parameter is used to provide the storage + * buffer, as well as any ancillary information calculated when the info + * request was made. + * + * If a version does not exist, for example when attempting to get the + * inactive version of flash when there is no pending update, the function + * should leave the buffer in the ctx structure empty. + */ + +static void ice_info_get_dsn(struct ice_pf *pf, struct ice_info_ctx *ctx) +{ + u8 dsn[8]; + + /* Copy the DSN into an array in Big Endian format */ + put_unaligned_be64(pci_get_dsn(pf->pdev), dsn); + + snprintf(ctx->buf, sizeof(ctx->buf), "%8phD", dsn); +} + +static void ice_info_pba(struct ice_pf *pf, struct ice_info_ctx *ctx) +{ + struct ice_hw *hw = &pf->hw; + int status; + + status = ice_read_pba_string(hw, (u8 *)ctx->buf, sizeof(ctx->buf)); + if (status) + /* We failed to locate the PBA, so just skip this entry */ + dev_dbg(ice_pf_to_dev(pf), "Failed to read Product Board Assembly string, status %d\n", + status); +} + +static void ice_info_fw_mgmt(struct ice_pf *pf, struct ice_info_ctx *ctx) +{ + struct ice_hw *hw = &pf->hw; + + snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u", + hw->fw_maj_ver, hw->fw_min_ver, hw->fw_patch); +} + +static void ice_info_fw_api(struct ice_pf *pf, struct ice_info_ctx *ctx) +{ + struct ice_hw *hw = &pf->hw; + + snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u", hw->api_maj_ver, + hw->api_min_ver, hw->api_patch); +} + +static void ice_info_fw_build(struct ice_pf *pf, struct ice_info_ctx *ctx) +{ + struct ice_hw *hw = &pf->hw; + + snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", hw->fw_build); +} + +static void ice_info_orom_ver(struct ice_pf *pf, struct ice_info_ctx *ctx) +{ + struct ice_orom_info *orom = &pf->hw.flash.orom; + + snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u", + orom->major, orom->build, orom->patch); +} + +static void +ice_info_pending_orom_ver(struct ice_pf __always_unused *pf, + struct ice_info_ctx *ctx) +{ + struct ice_orom_info *orom = &ctx->pending_orom; + + if (ctx->dev_caps.common_cap.nvm_update_pending_orom) + snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u", + orom->major, orom->build, orom->patch); +} + +static void ice_info_nvm_ver(struct ice_pf *pf, struct ice_info_ctx *ctx) +{ + struct ice_nvm_info *nvm = &pf->hw.flash.nvm; + + snprintf(ctx->buf, sizeof(ctx->buf), "%x.%02x", nvm->major, nvm->minor); +} + +static void +ice_info_pending_nvm_ver(struct ice_pf __always_unused *pf, + struct ice_info_ctx *ctx) +{ + struct ice_nvm_info *nvm = &ctx->pending_nvm; + + if (ctx->dev_caps.common_cap.nvm_update_pending_nvm) + snprintf(ctx->buf, sizeof(ctx->buf), "%x.%02x", + nvm->major, nvm->minor); +} + +static void ice_info_eetrack(struct ice_pf *pf, struct ice_info_ctx *ctx) +{ + struct ice_nvm_info *nvm = &pf->hw.flash.nvm; + + snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", nvm->eetrack); +} + +static void +ice_info_pending_eetrack(struct ice_pf *pf, struct ice_info_ctx *ctx) +{ + struct ice_nvm_info *nvm = &ctx->pending_nvm; + + if (ctx->dev_caps.common_cap.nvm_update_pending_nvm) + snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", nvm->eetrack); +} + +static void ice_info_ddp_pkg_name(struct ice_pf *pf, struct ice_info_ctx *ctx) +{ + struct ice_hw *hw = &pf->hw; + + snprintf(ctx->buf, sizeof(ctx->buf), "%s", hw->active_pkg_name); +} + +static void +ice_info_ddp_pkg_version(struct ice_pf *pf, struct ice_info_ctx *ctx) +{ + struct ice_pkg_ver *pkg = &pf->hw.active_pkg_ver; + + snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u.%u", + pkg->major, pkg->minor, pkg->update, pkg->draft); +} + +static void +ice_info_ddp_pkg_bundle_id(struct ice_pf *pf, struct ice_info_ctx *ctx) +{ + snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", pf->hw.active_track_id); +} + +static void ice_info_netlist_ver(struct ice_pf *pf, struct ice_info_ctx *ctx) +{ + struct ice_netlist_info *netlist = &pf->hw.flash.netlist; + + /* The netlist version fields are BCD formatted */ + snprintf(ctx->buf, sizeof(ctx->buf), "%x.%x.%x-%x.%x.%x", + netlist->major, netlist->minor, + netlist->type >> 16, netlist->type & 0xFFFF, + netlist->rev, netlist->cust_ver); +} + +static void ice_info_netlist_build(struct ice_pf *pf, struct ice_info_ctx *ctx) +{ + struct ice_netlist_info *netlist = &pf->hw.flash.netlist; + + snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", netlist->hash); +} + +static void +ice_info_pending_netlist_ver(struct ice_pf __always_unused *pf, + struct ice_info_ctx *ctx) +{ + struct ice_netlist_info *netlist = &ctx->pending_netlist; + + /* The netlist version fields are BCD formatted */ + if (ctx->dev_caps.common_cap.nvm_update_pending_netlist) + snprintf(ctx->buf, sizeof(ctx->buf), "%x.%x.%x-%x.%x.%x", + netlist->major, netlist->minor, + netlist->type >> 16, netlist->type & 0xFFFF, + netlist->rev, netlist->cust_ver); +} + +static void +ice_info_pending_netlist_build(struct ice_pf __always_unused *pf, + struct ice_info_ctx *ctx) +{ + struct ice_netlist_info *netlist = &ctx->pending_netlist; + + if (ctx->dev_caps.common_cap.nvm_update_pending_netlist) + snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", netlist->hash); +} + +static void ice_info_cgu_fw_build(struct ice_pf *pf, struct ice_info_ctx *ctx) +{ + u32 id, cfg_ver, fw_ver; + + if (!ice_is_feature_supported(pf, ICE_F_CGU)) + return; + if (ice_aq_get_cgu_info(&pf->hw, &id, &cfg_ver, &fw_ver)) + return; + snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u", id, cfg_ver, fw_ver); +} + +static void ice_info_cgu_id(struct ice_pf *pf, struct ice_info_ctx *ctx) +{ + if (!ice_is_feature_supported(pf, ICE_F_CGU)) + return; + snprintf(ctx->buf, sizeof(ctx->buf), "%u", pf->hw.cgu_part_number); +} + +#define fixed(key, getter) { ICE_VERSION_FIXED, key, getter, NULL } +#define running(key, getter) { ICE_VERSION_RUNNING, key, getter, NULL } +#define stored(key, getter, fallback) { ICE_VERSION_STORED, key, getter, fallback } + +/* The combined() macro inserts both the running entry as well as a stored + * entry. The running entry will always report the version from the active + * handler. The stored entry will first try the pending handler, and fallback + * to the active handler if the pending function does not report a version. + * The pending handler should check the status of a pending update for the + * relevant flash component. It should only fill in the buffer in the case + * where a valid pending version is available. This ensures that the related + * stored and running versions remain in sync, and that stored versions are + * correctly reported as expected. + */ +#define combined(key, active, pending) \ + running(key, active), \ + stored(key, pending, active) + +enum ice_version_type { + ICE_VERSION_FIXED, + ICE_VERSION_RUNNING, + ICE_VERSION_STORED, +}; + +static const struct ice_devlink_version { + enum ice_version_type type; + const char *key; + void (*getter)(struct ice_pf *pf, struct ice_info_ctx *ctx); + void (*fallback)(struct ice_pf *pf, struct ice_info_ctx *ctx); +} ice_devlink_versions[] = { + fixed(DEVLINK_INFO_VERSION_GENERIC_BOARD_ID, ice_info_pba), + running(DEVLINK_INFO_VERSION_GENERIC_FW_MGMT, ice_info_fw_mgmt), + running("fw.mgmt.api", ice_info_fw_api), + running("fw.mgmt.build", ice_info_fw_build), + combined(DEVLINK_INFO_VERSION_GENERIC_FW_UNDI, ice_info_orom_ver, ice_info_pending_orom_ver), + combined("fw.psid.api", ice_info_nvm_ver, ice_info_pending_nvm_ver), + combined(DEVLINK_INFO_VERSION_GENERIC_FW_BUNDLE_ID, ice_info_eetrack, ice_info_pending_eetrack), + running("fw.app.name", ice_info_ddp_pkg_name), + running(DEVLINK_INFO_VERSION_GENERIC_FW_APP, ice_info_ddp_pkg_version), + running("fw.app.bundle_id", ice_info_ddp_pkg_bundle_id), + combined("fw.netlist", ice_info_netlist_ver, ice_info_pending_netlist_ver), + combined("fw.netlist.build", ice_info_netlist_build, ice_info_pending_netlist_build), + fixed("cgu.id", ice_info_cgu_id), + running("fw.cgu", ice_info_cgu_fw_build), +}; + +/** + * ice_devlink_info_get - .info_get devlink handler + * @devlink: devlink instance structure + * @req: the devlink info request + * @extack: extended netdev ack structure + * + * Callback for the devlink .info_get operation. Reports information about the + * device. + * + * Return: zero on success or an error code on failure. + */ +static int ice_devlink_info_get(struct devlink *devlink, + struct devlink_info_req *req, + struct netlink_ext_ack *extack) +{ + struct ice_pf *pf = devlink_priv(devlink); + struct device *dev = ice_pf_to_dev(pf); + struct ice_hw *hw = &pf->hw; + struct ice_info_ctx *ctx; + size_t i; + int err; + + err = ice_wait_for_reset(pf, 10 * HZ); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Device is busy resetting"); + return err; + } + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + /* discover capabilities first */ + err = ice_discover_dev_caps(hw, &ctx->dev_caps); + if (err) { + dev_dbg(dev, "Failed to discover device capabilities, status %d aq_err %s\n", + err, ice_aq_str(hw->adminq.sq_last_status)); + NL_SET_ERR_MSG_MOD(extack, "Unable to discover device capabilities"); + goto out_free_ctx; + } + + if (ctx->dev_caps.common_cap.nvm_update_pending_orom) { + err = ice_get_inactive_orom_ver(hw, &ctx->pending_orom); + if (err) { + dev_dbg(dev, "Unable to read inactive Option ROM version data, status %d aq_err %s\n", + err, ice_aq_str(hw->adminq.sq_last_status)); + + /* disable display of pending Option ROM */ + ctx->dev_caps.common_cap.nvm_update_pending_orom = false; + } + } + + if (ctx->dev_caps.common_cap.nvm_update_pending_nvm) { + err = ice_get_inactive_nvm_ver(hw, &ctx->pending_nvm); + if (err) { + dev_dbg(dev, "Unable to read inactive NVM version data, status %d aq_err %s\n", + err, ice_aq_str(hw->adminq.sq_last_status)); + + /* disable display of pending Option ROM */ + ctx->dev_caps.common_cap.nvm_update_pending_nvm = false; + } + } + + if (ctx->dev_caps.common_cap.nvm_update_pending_netlist) { + err = ice_get_inactive_netlist_ver(hw, &ctx->pending_netlist); + if (err) { + dev_dbg(dev, "Unable to read inactive Netlist version data, status %d aq_err %s\n", + err, ice_aq_str(hw->adminq.sq_last_status)); + + /* disable display of pending Option ROM */ + ctx->dev_caps.common_cap.nvm_update_pending_netlist = false; + } + } + + ice_info_get_dsn(pf, ctx); + + err = devlink_info_serial_number_put(req, ctx->buf); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Unable to set serial number"); + goto out_free_ctx; + } + + for (i = 0; i < ARRAY_SIZE(ice_devlink_versions); i++) { + enum ice_version_type type = ice_devlink_versions[i].type; + const char *key = ice_devlink_versions[i].key; + + memset(ctx->buf, 0, sizeof(ctx->buf)); + + ice_devlink_versions[i].getter(pf, ctx); + + /* If the default getter doesn't report a version, use the + * fallback function. This is primarily useful in the case of + * "stored" versions that want to report the same value as the + * running version in the normal case of no pending update. + */ + if (ctx->buf[0] == '\0' && ice_devlink_versions[i].fallback) + ice_devlink_versions[i].fallback(pf, ctx); + + /* Do not report missing versions */ + if (ctx->buf[0] == '\0') + continue; + + switch (type) { + case ICE_VERSION_FIXED: + err = devlink_info_version_fixed_put(req, key, ctx->buf); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Unable to set fixed version"); + goto out_free_ctx; + } + break; + case ICE_VERSION_RUNNING: + err = devlink_info_version_running_put_ext(req, key, + ctx->buf, + DEVLINK_INFO_VERSION_TYPE_COMPONENT); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Unable to set running version"); + goto out_free_ctx; + } + break; + case ICE_VERSION_STORED: + err = devlink_info_version_stored_put_ext(req, key, + ctx->buf, + DEVLINK_INFO_VERSION_TYPE_COMPONENT); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Unable to set stored version"); + goto out_free_ctx; + } + break; + } + } + +out_free_ctx: + kfree(ctx); + return err; +} + +/** + * ice_devlink_reload_empr_start - Start EMP reset to activate new firmware + * @pf: pointer to the pf instance + * @extack: netlink extended ACK structure + * + * Allow user to activate new Embedded Management Processor firmware by + * issuing device specific EMP reset. Called in response to + * a DEVLINK_CMD_RELOAD with the DEVLINK_RELOAD_ACTION_FW_ACTIVATE. + * + * Note that teardown and rebuild of the driver state happens automatically as + * part of an interrupt and watchdog task. This is because all physical + * functions on the device must be able to reset when an EMP reset occurs from + * any source. + */ +static int +ice_devlink_reload_empr_start(struct ice_pf *pf, + struct netlink_ext_ack *extack) +{ + struct device *dev = ice_pf_to_dev(pf); + struct ice_hw *hw = &pf->hw; + u8 pending; + int err; + + err = ice_get_pending_updates(pf, &pending, extack); + if (err) + return err; + + /* pending is a bitmask of which flash banks have a pending update, + * including the main NVM bank, the Option ROM bank, and the netlist + * bank. If any of these bits are set, then there is a pending update + * waiting to be activated. + */ + if (!pending) { + NL_SET_ERR_MSG_MOD(extack, "No pending firmware update"); + return -ECANCELED; + } + + if (pf->fw_emp_reset_disabled) { + NL_SET_ERR_MSG_MOD(extack, "EMP reset is not available. To activate firmware, a reboot or power cycle is needed"); + return -ECANCELED; + } + + dev_dbg(dev, "Issuing device EMP reset to activate firmware\n"); + + err = ice_aq_nvm_update_empr(hw); + if (err) { + dev_err(dev, "Failed to trigger EMP device reset to reload firmware, err %d aq_err %s\n", + err, ice_aq_str(hw->adminq.sq_last_status)); + NL_SET_ERR_MSG_MOD(extack, "Failed to trigger EMP device reset to reload firmware"); + return err; + } + + return 0; +} + +/** + * ice_devlink_reinit_down - unload given PF + * @pf: pointer to the PF struct + */ +static void ice_devlink_reinit_down(struct ice_pf *pf) +{ + /* No need to take devl_lock, it's already taken by devlink API */ + ice_unload(pf); + rtnl_lock(); + ice_vsi_decfg(ice_get_main_vsi(pf)); + rtnl_unlock(); + ice_deinit_dev(pf); +} + +/** + * ice_devlink_reload_down - prepare for reload + * @devlink: pointer to the devlink instance to reload + * @netns_change: if true, the network namespace is changing + * @action: the action to perform + * @limit: limits on what reload should do, such as not resetting + * @extack: netlink extended ACK structure + */ +static int +ice_devlink_reload_down(struct devlink *devlink, bool netns_change, + enum devlink_reload_action action, + enum devlink_reload_limit limit, + struct netlink_ext_ack *extack) +{ + struct ice_pf *pf = devlink_priv(devlink); + + switch (action) { + case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: + if (ice_is_eswitch_mode_switchdev(pf)) { + NL_SET_ERR_MSG_MOD(extack, + "Go to legacy mode before doing reinit"); + return -EOPNOTSUPP; + } + if (ice_is_adq_active(pf)) { + NL_SET_ERR_MSG_MOD(extack, + "Turn off ADQ before doing reinit"); + return -EOPNOTSUPP; + } + if (ice_has_vfs(pf)) { + NL_SET_ERR_MSG_MOD(extack, + "Remove all VFs before doing reinit"); + return -EOPNOTSUPP; + } + ice_devlink_reinit_down(pf); + return 0; + case DEVLINK_RELOAD_ACTION_FW_ACTIVATE: + return ice_devlink_reload_empr_start(pf, extack); + default: + WARN_ON(1); + return -EOPNOTSUPP; + } +} + +/** + * ice_devlink_reload_empr_finish - Wait for EMP reset to finish + * @pf: pointer to the pf instance + * @extack: netlink extended ACK structure + * + * Wait for driver to finish rebuilding after EMP reset is completed. This + * includes time to wait for both the actual device reset as well as the time + * for the driver's rebuild to complete. + */ +static int +ice_devlink_reload_empr_finish(struct ice_pf *pf, + struct netlink_ext_ack *extack) +{ + int err; + + err = ice_wait_for_reset(pf, 60 * HZ); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Device still resetting after 1 minute"); + return err; + } + + return 0; +} + +/** + * ice_get_tx_topo_user_sel - Read user's choice from flash + * @pf: pointer to pf structure + * @layers: value read from flash will be saved here + * + * Reads user's preference for Tx Scheduler Topology Tree from PFA TLV. + * + * Return: zero when read was successful, negative values otherwise. + */ +static int ice_get_tx_topo_user_sel(struct ice_pf *pf, uint8_t *layers) +{ + struct ice_aqc_nvm_tx_topo_user_sel usr_sel = {}; + struct ice_hw *hw = &pf->hw; + int err; + + err = ice_acquire_nvm(hw, ICE_RES_READ); + if (err) + return err; + + err = ice_aq_read_nvm(hw, ICE_AQC_NVM_TX_TOPO_MOD_ID, 0, + sizeof(usr_sel), &usr_sel, true, true, NULL); + if (err) + goto exit_release_res; + + if (usr_sel.data & ICE_AQC_NVM_TX_TOPO_USER_SEL) + *layers = ICE_SCHED_5_LAYERS; + else + *layers = ICE_SCHED_9_LAYERS; + +exit_release_res: + ice_release_nvm(hw); + + return err; +} + +/** + * ice_update_tx_topo_user_sel - Save user's preference in flash + * @pf: pointer to pf structure + * @layers: value to be saved in flash + * + * Variable "layers" defines user's preference about number of layers in Tx + * Scheduler Topology Tree. This choice should be stored in PFA TLV field + * and be picked up by driver, next time during init. + * + * Return: zero when save was successful, negative values otherwise. + */ +static int ice_update_tx_topo_user_sel(struct ice_pf *pf, int layers) +{ + struct ice_aqc_nvm_tx_topo_user_sel usr_sel = {}; + struct ice_hw *hw = &pf->hw; + int err; + + err = ice_acquire_nvm(hw, ICE_RES_WRITE); + if (err) + return err; + + err = ice_aq_read_nvm(hw, ICE_AQC_NVM_TX_TOPO_MOD_ID, 0, + sizeof(usr_sel), &usr_sel, true, true, NULL); + if (err) + goto exit_release_res; + + if (layers == ICE_SCHED_5_LAYERS) + usr_sel.data |= ICE_AQC_NVM_TX_TOPO_USER_SEL; + else + usr_sel.data &= ~ICE_AQC_NVM_TX_TOPO_USER_SEL; + + err = ice_write_one_nvm_block(pf, ICE_AQC_NVM_TX_TOPO_MOD_ID, 2, + sizeof(usr_sel.data), &usr_sel.data, + true, NULL, NULL); +exit_release_res: + ice_release_nvm(hw); + + return err; +} + +/** + * ice_devlink_tx_sched_layers_get - Get tx_scheduling_layers parameter + * @devlink: pointer to the devlink instance + * @id: the parameter ID to set + * @ctx: context to store the parameter value + * + * Return: zero on success and negative value on failure. + */ +static int ice_devlink_tx_sched_layers_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct ice_pf *pf = devlink_priv(devlink); + int err; + + err = ice_get_tx_topo_user_sel(pf, &ctx->val.vu8); + if (err) + return err; + + return 0; +} + +/** + * ice_devlink_tx_sched_layers_set - Set tx_scheduling_layers parameter + * @devlink: pointer to the devlink instance + * @id: the parameter ID to set + * @ctx: context to get the parameter value + * @extack: netlink extended ACK structure + * + * Return: zero on success and negative value on failure. + */ +static int ice_devlink_tx_sched_layers_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) +{ + struct ice_pf *pf = devlink_priv(devlink); + int err; + + err = ice_update_tx_topo_user_sel(pf, ctx->val.vu8); + if (err) + return err; + + NL_SET_ERR_MSG_MOD(extack, + "Tx scheduling layers have been changed on this device. You must do the PCI slot powercycle for the change to take effect."); + + return 0; +} + +/** + * ice_devlink_tx_sched_layers_validate - Validate passed tx_scheduling_layers + * parameter value + * @devlink: unused pointer to devlink instance + * @id: the parameter ID to validate + * @val: value to validate + * @extack: netlink extended ACK structure + * + * Supported values are: + * - 5 - five layers Tx Scheduler Topology Tree + * - 9 - nine layers Tx Scheduler Topology Tree + * + * Return: zero when passed parameter value is supported. Negative value on + * error. + */ +static int ice_devlink_tx_sched_layers_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + if (val.vu8 != ICE_SCHED_5_LAYERS && val.vu8 != ICE_SCHED_9_LAYERS) { + NL_SET_ERR_MSG_MOD(extack, + "Wrong number of tx scheduler layers provided."); + return -EINVAL; + } + + return 0; +} + +/** + * ice_tear_down_devlink_rate_tree - removes devlink-rate exported tree + * @pf: pf struct + * + * This function tears down tree exported during VF's creation. + */ +void ice_tear_down_devlink_rate_tree(struct ice_pf *pf) +{ + struct devlink *devlink; + struct ice_vf *vf; + unsigned int bkt; + + devlink = priv_to_devlink(pf); + + devl_lock(devlink); + mutex_lock(&pf->vfs.table_lock); + ice_for_each_vf(pf, bkt, vf) { + if (vf->devlink_port.devlink_rate) + devl_rate_leaf_destroy(&vf->devlink_port); + } + mutex_unlock(&pf->vfs.table_lock); + + devl_rate_nodes_destroy(devlink); + devl_unlock(devlink); +} + +/** + * ice_enable_custom_tx - try to enable custom Tx feature + * @pf: pf struct + * + * This function tries to enable custom Tx feature, + * it's not possible to enable it, if DCB or ADQ is active. + */ +static bool ice_enable_custom_tx(struct ice_pf *pf) +{ + struct ice_port_info *pi = ice_get_main_vsi(pf)->port_info; + struct device *dev = ice_pf_to_dev(pf); + + if (pi->is_custom_tx_enabled) + /* already enabled, return true */ + return true; + + if (ice_is_adq_active(pf)) { + dev_err(dev, "ADQ active, can't modify Tx scheduler tree\n"); + return false; + } + + if (ice_is_dcb_active(pf)) { + dev_err(dev, "DCB active, can't modify Tx scheduler tree\n"); + return false; + } + + pi->is_custom_tx_enabled = true; + + return true; +} + +/** + * ice_traverse_tx_tree - traverse Tx scheduler tree + * @devlink: devlink struct + * @node: current node, used for recursion + * @tc_node: tc_node struct, that is treated as a root + * @pf: pf struct + * + * This function traverses Tx scheduler tree and exports + * entire structure to the devlink-rate. + */ +static void ice_traverse_tx_tree(struct devlink *devlink, struct ice_sched_node *node, + struct ice_sched_node *tc_node, struct ice_pf *pf) +{ + struct devlink_rate *rate_node = NULL; + struct ice_dynamic_port *sf; + struct ice_vf *vf; + int i; + + if (node->rate_node) + /* already added, skip to the next */ + goto traverse_children; + + if (node->parent == tc_node) { + /* create root node */ + rate_node = devl_rate_node_create(devlink, node, node->name, NULL); + } else if (node->vsi_handle && + pf->vsi[node->vsi_handle]->type == ICE_VSI_VF && + pf->vsi[node->vsi_handle]->vf) { + vf = pf->vsi[node->vsi_handle]->vf; + if (!vf->devlink_port.devlink_rate) + /* leaf nodes doesn't have children + * so we don't set rate_node + */ + devl_rate_leaf_create(&vf->devlink_port, node, + node->parent->rate_node); + } else if (node->vsi_handle && + pf->vsi[node->vsi_handle]->type == ICE_VSI_SF && + pf->vsi[node->vsi_handle]->sf) { + sf = pf->vsi[node->vsi_handle]->sf; + if (!sf->devlink_port.devlink_rate) + /* leaf nodes doesn't have children + * so we don't set rate_node + */ + devl_rate_leaf_create(&sf->devlink_port, node, + node->parent->rate_node); + } else if (node->info.data.elem_type != ICE_AQC_ELEM_TYPE_LEAF && + node->parent->rate_node) { + rate_node = devl_rate_node_create(devlink, node, node->name, + node->parent->rate_node); + } + + if (rate_node && !IS_ERR(rate_node)) + node->rate_node = rate_node; + +traverse_children: + for (i = 0; i < node->num_children; i++) + ice_traverse_tx_tree(devlink, node->children[i], tc_node, pf); +} + +/** + * ice_devlink_rate_init_tx_topology - export Tx scheduler tree to devlink rate + * @devlink: devlink struct + * @vsi: main vsi struct + * + * This function finds a root node, then calls ice_traverse_tx tree, which + * traverses the tree and exports it's contents to devlink rate. + */ +int ice_devlink_rate_init_tx_topology(struct devlink *devlink, struct ice_vsi *vsi) +{ + struct ice_port_info *pi = vsi->port_info; + struct ice_sched_node *tc_node; + struct ice_pf *pf = vsi->back; + int i; + + tc_node = pi->root->children[0]; + mutex_lock(&pi->sched_lock); + for (i = 0; i < tc_node->num_children; i++) + ice_traverse_tx_tree(devlink, tc_node->children[i], tc_node, pf); + mutex_unlock(&pi->sched_lock); + + return 0; +} + +static void ice_clear_rate_nodes(struct ice_sched_node *node) +{ + node->rate_node = NULL; + + for (int i = 0; i < node->num_children; i++) + ice_clear_rate_nodes(node->children[i]); +} + +/** + * ice_devlink_rate_clear_tx_topology - clear node->rate_node + * @vsi: main vsi struct + * + * Clear rate_node to cleanup creation of Tx topology. + * + */ +void ice_devlink_rate_clear_tx_topology(struct ice_vsi *vsi) +{ + struct ice_port_info *pi = vsi->port_info; + + mutex_lock(&pi->sched_lock); + ice_clear_rate_nodes(pi->root->children[0]); + mutex_unlock(&pi->sched_lock); +} + +/** + * ice_set_object_tx_share - sets node scheduling parameter + * @pi: devlink struct instance + * @node: node struct instance + * @bw: bandwidth in bytes per second + * @extack: extended netdev ack structure + * + * This function sets ICE_MIN_BW scheduling BW limit. + */ +static int ice_set_object_tx_share(struct ice_port_info *pi, struct ice_sched_node *node, + u64 bw, struct netlink_ext_ack *extack) +{ + int status; + + mutex_lock(&pi->sched_lock); + /* converts bytes per second to kilo bits per second */ + node->tx_share = div_u64(bw, 125); + status = ice_sched_set_node_bw_lmt(pi, node, ICE_MIN_BW, node->tx_share); + mutex_unlock(&pi->sched_lock); + + if (status) + NL_SET_ERR_MSG_MOD(extack, "Can't set scheduling node tx_share"); + + return status; +} + +/** + * ice_set_object_tx_max - sets node scheduling parameter + * @pi: devlink struct instance + * @node: node struct instance + * @bw: bandwidth in bytes per second + * @extack: extended netdev ack structure + * + * This function sets ICE_MAX_BW scheduling BW limit. + */ +static int ice_set_object_tx_max(struct ice_port_info *pi, struct ice_sched_node *node, + u64 bw, struct netlink_ext_ack *extack) +{ + int status; + + mutex_lock(&pi->sched_lock); + /* converts bytes per second value to kilo bits per second */ + node->tx_max = div_u64(bw, 125); + status = ice_sched_set_node_bw_lmt(pi, node, ICE_MAX_BW, node->tx_max); + mutex_unlock(&pi->sched_lock); + + if (status) + NL_SET_ERR_MSG_MOD(extack, "Can't set scheduling node tx_max"); + + return status; +} + +/** + * ice_set_object_tx_priority - sets node scheduling parameter + * @pi: devlink struct instance + * @node: node struct instance + * @priority: value representing priority for strict priority arbitration + * @extack: extended netdev ack structure + * + * This function sets priority of node among siblings. + */ +static int ice_set_object_tx_priority(struct ice_port_info *pi, struct ice_sched_node *node, + u32 priority, struct netlink_ext_ack *extack) +{ + int status; + + if (priority >= 8) { + NL_SET_ERR_MSG_MOD(extack, "Priority should be less than 8"); + return -EINVAL; + } + + mutex_lock(&pi->sched_lock); + node->tx_priority = priority; + status = ice_sched_set_node_priority(pi, node, node->tx_priority); + mutex_unlock(&pi->sched_lock); + + if (status) + NL_SET_ERR_MSG_MOD(extack, "Can't set scheduling node tx_priority"); + + return status; +} + +/** + * ice_set_object_tx_weight - sets node scheduling parameter + * @pi: devlink struct instance + * @node: node struct instance + * @weight: value represeting relative weight for WFQ arbitration + * @extack: extended netdev ack structure + * + * This function sets node weight for WFQ algorithm. + */ +static int ice_set_object_tx_weight(struct ice_port_info *pi, struct ice_sched_node *node, + u32 weight, struct netlink_ext_ack *extack) +{ + int status; + + if (weight > 200 || weight < 1) { + NL_SET_ERR_MSG_MOD(extack, "Weight must be between 1 and 200"); + return -EINVAL; + } + + mutex_lock(&pi->sched_lock); + node->tx_weight = weight; + status = ice_sched_set_node_weight(pi, node, node->tx_weight); + mutex_unlock(&pi->sched_lock); + + if (status) + NL_SET_ERR_MSG_MOD(extack, "Can't set scheduling node tx_weight"); + + return status; +} + +/** + * ice_get_pi_from_dev_rate - get port info from devlink_rate + * @rate_node: devlink struct instance + * + * This function returns corresponding port_info struct of devlink_rate + */ +static struct ice_port_info *ice_get_pi_from_dev_rate(struct devlink_rate *rate_node) +{ + struct ice_pf *pf = devlink_priv(rate_node->devlink); + + return ice_get_main_vsi(pf)->port_info; +} + +static int ice_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv, + struct netlink_ext_ack *extack) +{ + struct ice_sched_node *node; + struct ice_port_info *pi; + + pi = ice_get_pi_from_dev_rate(rate_node); + + if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink))) + return -EBUSY; + + /* preallocate memory for ice_sched_node */ + node = devm_kzalloc(ice_hw_to_dev(pi->hw), sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + + *priv = node; + + return 0; +} + +static int ice_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv, + struct netlink_ext_ack *extack) +{ + struct ice_sched_node *node, *tc_node; + struct ice_port_info *pi; + + pi = ice_get_pi_from_dev_rate(rate_node); + tc_node = pi->root->children[0]; + node = priv; + + if (!rate_node->parent || !node || tc_node == node || !extack) + return 0; + + if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink))) + return -EBUSY; + + /* can't allow to delete a node with children */ + if (node->num_children) + return -EINVAL; + + mutex_lock(&pi->sched_lock); + ice_free_sched_node(pi, node); + mutex_unlock(&pi->sched_lock); + + return 0; +} + +static int ice_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv, + u64 tx_max, struct netlink_ext_ack *extack) +{ + struct ice_sched_node *node = priv; + + if (!ice_enable_custom_tx(devlink_priv(rate_leaf->devlink))) + return -EBUSY; + + if (!node) + return 0; + + return ice_set_object_tx_max(ice_get_pi_from_dev_rate(rate_leaf), + node, tx_max, extack); +} + +static int ice_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv, + u64 tx_share, struct netlink_ext_ack *extack) +{ + struct ice_sched_node *node = priv; + + if (!ice_enable_custom_tx(devlink_priv(rate_leaf->devlink))) + return -EBUSY; + + if (!node) + return 0; + + return ice_set_object_tx_share(ice_get_pi_from_dev_rate(rate_leaf), node, + tx_share, extack); +} + +static int ice_devlink_rate_leaf_tx_priority_set(struct devlink_rate *rate_leaf, void *priv, + u32 tx_priority, struct netlink_ext_ack *extack) +{ + struct ice_sched_node *node = priv; + + if (!ice_enable_custom_tx(devlink_priv(rate_leaf->devlink))) + return -EBUSY; + + if (!node) + return 0; + + return ice_set_object_tx_priority(ice_get_pi_from_dev_rate(rate_leaf), node, + tx_priority, extack); +} + +static int ice_devlink_rate_leaf_tx_weight_set(struct devlink_rate *rate_leaf, void *priv, + u32 tx_weight, struct netlink_ext_ack *extack) +{ + struct ice_sched_node *node = priv; + + if (!ice_enable_custom_tx(devlink_priv(rate_leaf->devlink))) + return -EBUSY; + + if (!node) + return 0; + + return ice_set_object_tx_weight(ice_get_pi_from_dev_rate(rate_leaf), node, + tx_weight, extack); +} + +static int ice_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv, + u64 tx_max, struct netlink_ext_ack *extack) +{ + struct ice_sched_node *node = priv; + + if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink))) + return -EBUSY; + + if (!node) + return 0; + + return ice_set_object_tx_max(ice_get_pi_from_dev_rate(rate_node), + node, tx_max, extack); +} + +static int ice_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv, + u64 tx_share, struct netlink_ext_ack *extack) +{ + struct ice_sched_node *node = priv; + + if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink))) + return -EBUSY; + + if (!node) + return 0; + + return ice_set_object_tx_share(ice_get_pi_from_dev_rate(rate_node), + node, tx_share, extack); +} + +static int ice_devlink_rate_node_tx_priority_set(struct devlink_rate *rate_node, void *priv, + u32 tx_priority, struct netlink_ext_ack *extack) +{ + struct ice_sched_node *node = priv; + + if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink))) + return -EBUSY; + + if (!node) + return 0; + + return ice_set_object_tx_priority(ice_get_pi_from_dev_rate(rate_node), + node, tx_priority, extack); +} + +static int ice_devlink_rate_node_tx_weight_set(struct devlink_rate *rate_node, void *priv, + u32 tx_weight, struct netlink_ext_ack *extack) +{ + struct ice_sched_node *node = priv; + + if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink))) + return -EBUSY; + + if (!node) + return 0; + + return ice_set_object_tx_weight(ice_get_pi_from_dev_rate(rate_node), + node, tx_weight, extack); +} + +static int ice_devlink_set_parent(struct devlink_rate *devlink_rate, + struct devlink_rate *parent, + void *priv, void *parent_priv, + struct netlink_ext_ack *extack) +{ + struct ice_port_info *pi = ice_get_pi_from_dev_rate(devlink_rate); + struct ice_sched_node *tc_node, *node, *parent_node; + u16 num_nodes_added; + u32 first_node_teid; + u32 node_teid; + int status; + + tc_node = pi->root->children[0]; + node = priv; + + if (!extack) + return 0; + + if (!ice_enable_custom_tx(devlink_priv(devlink_rate->devlink))) + return -EBUSY; + + if (!parent) { + if (!node || tc_node == node || node->num_children) + return -EINVAL; + + mutex_lock(&pi->sched_lock); + ice_free_sched_node(pi, node); + mutex_unlock(&pi->sched_lock); + + return 0; + } + + parent_node = parent_priv; + + /* if the node doesn't exist, create it */ + if (!node->parent) { + mutex_lock(&pi->sched_lock); + status = ice_sched_add_elems(pi, tc_node, parent_node, + parent_node->tx_sched_layer + 1, + 1, &num_nodes_added, &first_node_teid, + &node); + mutex_unlock(&pi->sched_lock); + + if (status) { + NL_SET_ERR_MSG_MOD(extack, "Can't add a new node"); + return status; + } + + if (devlink_rate->tx_share) + ice_set_object_tx_share(pi, node, devlink_rate->tx_share, extack); + if (devlink_rate->tx_max) + ice_set_object_tx_max(pi, node, devlink_rate->tx_max, extack); + if (devlink_rate->tx_priority) + ice_set_object_tx_priority(pi, node, devlink_rate->tx_priority, extack); + if (devlink_rate->tx_weight) + ice_set_object_tx_weight(pi, node, devlink_rate->tx_weight, extack); + } else { + node_teid = le32_to_cpu(node->info.node_teid); + mutex_lock(&pi->sched_lock); + status = ice_sched_move_nodes(pi, parent_node, 1, &node_teid); + mutex_unlock(&pi->sched_lock); + + if (status) + NL_SET_ERR_MSG_MOD(extack, "Can't move existing node to a new parent"); + } + + return status; +} + +static void ice_set_min_max_msix(struct ice_pf *pf) +{ + struct devlink *devlink = priv_to_devlink(pf); + union devlink_param_value val; + int err; + + err = devl_param_driverinit_value_get(devlink, + DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MIN, + &val); + if (!err) + pf->msix.min = val.vu32; + + err = devl_param_driverinit_value_get(devlink, + DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MAX, + &val); + if (!err) + pf->msix.max = val.vu32; +} + +/** + * ice_devlink_reinit_up - do reinit of the given PF + * @pf: pointer to the PF struct + */ +static int ice_devlink_reinit_up(struct ice_pf *pf) +{ + struct ice_vsi *vsi = ice_get_main_vsi(pf); + int err; + + err = ice_init_hw(&pf->hw); + if (err) { + dev_err(ice_pf_to_dev(pf), "ice_init_hw failed: %d\n", err); + return err; + } + + /* load MSI-X values */ + ice_set_min_max_msix(pf); + + err = ice_init_dev(pf); + if (err) + goto unroll_hw_init; + + vsi->flags = ICE_VSI_FLAG_INIT; + + rtnl_lock(); + err = ice_vsi_cfg(vsi); + rtnl_unlock(); + if (err) + goto err_vsi_cfg; + + /* No need to take devl_lock, it's already taken by devlink API */ + err = ice_load(pf); + if (err) + goto err_load; + + return 0; + +err_load: + rtnl_lock(); + ice_vsi_decfg(vsi); + rtnl_unlock(); +err_vsi_cfg: + ice_deinit_dev(pf); +unroll_hw_init: + ice_deinit_hw(&pf->hw); + return err; +} + +/** + * ice_devlink_reload_up - do reload up after reinit + * @devlink: pointer to the devlink instance reloading + * @action: the action requested + * @limit: limits imposed by userspace, such as not resetting + * @actions_performed: on return, indicate what actions actually performed + * @extack: netlink extended ACK structure + */ +static int +ice_devlink_reload_up(struct devlink *devlink, + enum devlink_reload_action action, + enum devlink_reload_limit limit, + u32 *actions_performed, + struct netlink_ext_ack *extack) +{ + struct ice_pf *pf = devlink_priv(devlink); + + switch (action) { + case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: + *actions_performed = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT); + return ice_devlink_reinit_up(pf); + case DEVLINK_RELOAD_ACTION_FW_ACTIVATE: + *actions_performed = BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE); + return ice_devlink_reload_empr_finish(pf, extack); + default: + WARN_ON(1); + return -EOPNOTSUPP; + } +} + +static const struct devlink_ops ice_devlink_ops = { + .supported_flash_update_params = DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK, + .reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) | + BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE), + .reload_down = ice_devlink_reload_down, + .reload_up = ice_devlink_reload_up, + .eswitch_mode_get = ice_eswitch_mode_get, + .eswitch_mode_set = ice_eswitch_mode_set, + .info_get = ice_devlink_info_get, + .flash_update = ice_devlink_flash_update, + + .rate_node_new = ice_devlink_rate_node_new, + .rate_node_del = ice_devlink_rate_node_del, + + .rate_leaf_tx_max_set = ice_devlink_rate_leaf_tx_max_set, + .rate_leaf_tx_share_set = ice_devlink_rate_leaf_tx_share_set, + .rate_leaf_tx_priority_set = ice_devlink_rate_leaf_tx_priority_set, + .rate_leaf_tx_weight_set = ice_devlink_rate_leaf_tx_weight_set, + + .rate_node_tx_max_set = ice_devlink_rate_node_tx_max_set, + .rate_node_tx_share_set = ice_devlink_rate_node_tx_share_set, + .rate_node_tx_priority_set = ice_devlink_rate_node_tx_priority_set, + .rate_node_tx_weight_set = ice_devlink_rate_node_tx_weight_set, + + .rate_leaf_parent_set = ice_devlink_set_parent, + .rate_node_parent_set = ice_devlink_set_parent, + + .port_new = ice_devlink_port_new, +}; + +static const struct devlink_ops ice_sf_devlink_ops; + +static int +ice_devlink_enable_roce_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct ice_pf *pf = devlink_priv(devlink); + struct iidc_rdma_core_dev_info *cdev; + + cdev = pf->cdev_info; + if (!cdev) + return -ENODEV; + + ctx->val.vbool = !!(cdev->rdma_protocol & IIDC_RDMA_PROTOCOL_ROCEV2); + + return 0; +} + +static int ice_devlink_enable_roce_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) +{ + struct ice_pf *pf = devlink_priv(devlink); + struct iidc_rdma_core_dev_info *cdev; + bool roce_ena = ctx->val.vbool; + int ret; + + cdev = pf->cdev_info; + if (!cdev) + return -ENODEV; + + if (!roce_ena) { + ice_unplug_aux_dev(pf); + cdev->rdma_protocol &= ~IIDC_RDMA_PROTOCOL_ROCEV2; + return 0; + } + + cdev->rdma_protocol |= IIDC_RDMA_PROTOCOL_ROCEV2; + ret = ice_plug_aux_dev(pf); + if (ret) + cdev->rdma_protocol &= ~IIDC_RDMA_PROTOCOL_ROCEV2; + + return ret; +} + +static int +ice_devlink_enable_roce_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct ice_pf *pf = devlink_priv(devlink); + struct iidc_rdma_core_dev_info *cdev; + + cdev = pf->cdev_info; + if (!cdev) + return -ENODEV; + + if (!test_bit(ICE_FLAG_RDMA_ENA, pf->flags)) + return -EOPNOTSUPP; + + if (cdev->rdma_protocol & IIDC_RDMA_PROTOCOL_IWARP) { + NL_SET_ERR_MSG_MOD(extack, "iWARP is currently enabled. This device cannot enable iWARP and RoCEv2 simultaneously"); + return -EOPNOTSUPP; + } + + return 0; +} + +static int +ice_devlink_enable_iw_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct ice_pf *pf = devlink_priv(devlink); + struct iidc_rdma_core_dev_info *cdev; + + cdev = pf->cdev_info; + if (!cdev) + return -ENODEV; + + ctx->val.vbool = !!(cdev->rdma_protocol & IIDC_RDMA_PROTOCOL_IWARP); + + return 0; +} + +static int ice_devlink_enable_iw_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) +{ + struct ice_pf *pf = devlink_priv(devlink); + struct iidc_rdma_core_dev_info *cdev; + bool iw_ena = ctx->val.vbool; + int ret; + + cdev = pf->cdev_info; + if (!cdev) + return -ENODEV; + + if (!iw_ena) { + ice_unplug_aux_dev(pf); + cdev->rdma_protocol &= ~IIDC_RDMA_PROTOCOL_IWARP; + return 0; + } + + cdev->rdma_protocol |= IIDC_RDMA_PROTOCOL_IWARP; + ret = ice_plug_aux_dev(pf); + if (ret) + cdev->rdma_protocol &= ~IIDC_RDMA_PROTOCOL_IWARP; + + return ret; +} + +static int +ice_devlink_enable_iw_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct ice_pf *pf = devlink_priv(devlink); + + if (!test_bit(ICE_FLAG_RDMA_ENA, pf->flags)) + return -EOPNOTSUPP; + + if (pf->cdev_info->rdma_protocol & IIDC_RDMA_PROTOCOL_ROCEV2) { + NL_SET_ERR_MSG_MOD(extack, "RoCEv2 is currently enabled. This device cannot enable iWARP and RoCEv2 simultaneously"); + return -EOPNOTSUPP; + } + + return 0; +} + +#define DEVLINK_LOCAL_FWD_DISABLED_STR "disabled" +#define DEVLINK_LOCAL_FWD_ENABLED_STR "enabled" +#define DEVLINK_LOCAL_FWD_PRIORITIZED_STR "prioritized" + +/** + * ice_devlink_local_fwd_mode_to_str - Get string for local_fwd mode. + * @mode: local forwarding for mode used in port_info struct. + * + * Return: Mode respective string or "Invalid". + */ +static const char * +ice_devlink_local_fwd_mode_to_str(enum ice_local_fwd_mode mode) +{ + switch (mode) { + case ICE_LOCAL_FWD_MODE_ENABLED: + return DEVLINK_LOCAL_FWD_ENABLED_STR; + case ICE_LOCAL_FWD_MODE_PRIORITIZED: + return DEVLINK_LOCAL_FWD_PRIORITIZED_STR; + case ICE_LOCAL_FWD_MODE_DISABLED: + return DEVLINK_LOCAL_FWD_DISABLED_STR; + } + + return "Invalid"; +} + +/** + * ice_devlink_local_fwd_str_to_mode - Get local_fwd mode from string name. + * @mode_str: local forwarding mode string. + * + * Return: Mode value or negative number if invalid. + */ +static int ice_devlink_local_fwd_str_to_mode(const char *mode_str) +{ + if (!strcmp(mode_str, DEVLINK_LOCAL_FWD_ENABLED_STR)) + return ICE_LOCAL_FWD_MODE_ENABLED; + else if (!strcmp(mode_str, DEVLINK_LOCAL_FWD_PRIORITIZED_STR)) + return ICE_LOCAL_FWD_MODE_PRIORITIZED; + else if (!strcmp(mode_str, DEVLINK_LOCAL_FWD_DISABLED_STR)) + return ICE_LOCAL_FWD_MODE_DISABLED; + + return -EINVAL; +} + +/** + * ice_devlink_local_fwd_get - Get local_fwd parameter. + * @devlink: Pointer to the devlink instance. + * @id: The parameter ID to set. + * @ctx: Context to store the parameter value. + * + * Return: Zero. + */ +static int ice_devlink_local_fwd_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct ice_pf *pf = devlink_priv(devlink); + struct ice_port_info *pi; + const char *mode_str; + + pi = pf->hw.port_info; + mode_str = ice_devlink_local_fwd_mode_to_str(pi->local_fwd_mode); + snprintf(ctx->val.vstr, sizeof(ctx->val.vstr), "%s", mode_str); + + return 0; +} + +/** + * ice_devlink_local_fwd_set - Set local_fwd parameter. + * @devlink: Pointer to the devlink instance. + * @id: The parameter ID to set. + * @ctx: Context to get the parameter value. + * @extack: Netlink extended ACK structure. + * + * Return: Zero. + */ +static int ice_devlink_local_fwd_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) +{ + int new_local_fwd_mode = ice_devlink_local_fwd_str_to_mode(ctx->val.vstr); + struct ice_pf *pf = devlink_priv(devlink); + struct device *dev = ice_pf_to_dev(pf); + struct ice_port_info *pi; + + pi = pf->hw.port_info; + if (pi->local_fwd_mode != new_local_fwd_mode) { + pi->local_fwd_mode = new_local_fwd_mode; + dev_info(dev, "Setting local_fwd to %s\n", ctx->val.vstr); + ice_schedule_reset(pf, ICE_RESET_CORER); + } + + return 0; +} + +/** + * ice_devlink_local_fwd_validate - Validate passed local_fwd parameter value. + * @devlink: Unused pointer to devlink instance. + * @id: The parameter ID to validate. + * @val: Value to validate. + * @extack: Netlink extended ACK structure. + * + * Supported values are: + * "enabled" - local_fwd is enabled, "disabled" - local_fwd is disabled + * "prioritized" - local_fwd traffic is prioritized in scheduling. + * + * Return: Zero when passed parameter value is supported. Negative value on + * error. + */ +static int ice_devlink_local_fwd_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + if (ice_devlink_local_fwd_str_to_mode(val.vstr) < 0) { + NL_SET_ERR_MSG_MOD(extack, "Error: Requested value is not supported."); + return -EINVAL; + } + + return 0; +} + +static int +ice_devlink_msix_max_pf_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct ice_pf *pf = devlink_priv(devlink); + + if (val.vu32 > pf->hw.func_caps.common_cap.num_msix_vectors) + return -EINVAL; + + return 0; +} + +static int +ice_devlink_msix_min_pf_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + if (val.vu32 < ICE_MIN_MSIX) + return -EINVAL; + + return 0; +} + +static int ice_devlink_enable_rdma_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct ice_pf *pf = devlink_priv(devlink); + bool new_state = val.vbool; + + if (new_state && !test_bit(ICE_FLAG_RDMA_ENA, pf->flags)) + return -EOPNOTSUPP; + + return 0; +} + +enum ice_param_id { + ICE_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX, + ICE_DEVLINK_PARAM_ID_TX_SCHED_LAYERS, + ICE_DEVLINK_PARAM_ID_LOCAL_FWD, +}; + +static const struct devlink_param ice_dvl_rdma_params[] = { + DEVLINK_PARAM_GENERIC(ENABLE_ROCE, BIT(DEVLINK_PARAM_CMODE_RUNTIME), + ice_devlink_enable_roce_get, + ice_devlink_enable_roce_set, + ice_devlink_enable_roce_validate), + DEVLINK_PARAM_GENERIC(ENABLE_IWARP, BIT(DEVLINK_PARAM_CMODE_RUNTIME), + ice_devlink_enable_iw_get, + ice_devlink_enable_iw_set, + ice_devlink_enable_iw_validate), + DEVLINK_PARAM_GENERIC(ENABLE_RDMA, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, ice_devlink_enable_rdma_validate), +}; + +static const struct devlink_param ice_dvl_msix_params[] = { + DEVLINK_PARAM_GENERIC(MSIX_VEC_PER_PF_MAX, + BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, ice_devlink_msix_max_pf_validate), + DEVLINK_PARAM_GENERIC(MSIX_VEC_PER_PF_MIN, + BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, ice_devlink_msix_min_pf_validate), +}; + +static const struct devlink_param ice_dvl_sched_params[] = { + DEVLINK_PARAM_DRIVER(ICE_DEVLINK_PARAM_ID_TX_SCHED_LAYERS, + "tx_scheduling_layers", + DEVLINK_PARAM_TYPE_U8, + BIT(DEVLINK_PARAM_CMODE_PERMANENT), + ice_devlink_tx_sched_layers_get, + ice_devlink_tx_sched_layers_set, + ice_devlink_tx_sched_layers_validate), + DEVLINK_PARAM_DRIVER(ICE_DEVLINK_PARAM_ID_LOCAL_FWD, + "local_forwarding", DEVLINK_PARAM_TYPE_STRING, + BIT(DEVLINK_PARAM_CMODE_RUNTIME), + ice_devlink_local_fwd_get, + ice_devlink_local_fwd_set, + ice_devlink_local_fwd_validate), +}; + +static void ice_devlink_free(void *devlink_ptr) +{ + devlink_free((struct devlink *)devlink_ptr); +} + +/** + * ice_allocate_pf - Allocate devlink and return PF structure pointer + * @dev: the device to allocate for + * + * Allocate a devlink instance for this device and return the private area as + * the PF structure. The devlink memory is kept track of through devres by + * adding an action to remove it when unwinding. + */ +struct ice_pf *ice_allocate_pf(struct device *dev) +{ + struct devlink *devlink; + + devlink = devlink_alloc(&ice_devlink_ops, sizeof(struct ice_pf), dev); + if (!devlink) + return NULL; + + /* Add an action to teardown the devlink when unwinding the driver */ + if (devm_add_action_or_reset(dev, ice_devlink_free, devlink)) + return NULL; + + return devlink_priv(devlink); +} + +/** + * ice_allocate_sf - Allocate devlink and return SF structure pointer + * @dev: the device to allocate for + * @pf: pointer to the PF structure + * + * Allocate a devlink instance for SF. + * + * Return: ice_sf_priv pointer to allocated memory or ERR_PTR in case of error + */ +struct ice_sf_priv *ice_allocate_sf(struct device *dev, struct ice_pf *pf) +{ + struct devlink *devlink; + int err; + + devlink = devlink_alloc(&ice_sf_devlink_ops, sizeof(struct ice_sf_priv), + dev); + if (!devlink) + return ERR_PTR(-ENOMEM); + + err = devl_nested_devlink_set(priv_to_devlink(pf), devlink); + if (err) { + devlink_free(devlink); + return ERR_PTR(err); + } + + return devlink_priv(devlink); +} + +/** + * ice_devlink_register - Register devlink interface for this PF + * @pf: the PF to register the devlink for. + * + * Register the devlink instance associated with this physical function. + * + * Return: zero on success or an error code on failure. + */ +void ice_devlink_register(struct ice_pf *pf) +{ + struct devlink *devlink = priv_to_devlink(pf); + + devl_register(devlink); +} + +/** + * ice_devlink_unregister - Unregister devlink resources for this PF. + * @pf: the PF structure to cleanup + * + * Releases resources used by devlink and cleans up associated memory. + */ +void ice_devlink_unregister(struct ice_pf *pf) +{ + devl_unregister(priv_to_devlink(pf)); +} + +int ice_devlink_register_params(struct ice_pf *pf) +{ + struct devlink *devlink = priv_to_devlink(pf); + union devlink_param_value value; + struct ice_hw *hw = &pf->hw; + int status; + + status = devl_params_register(devlink, ice_dvl_rdma_params, + ARRAY_SIZE(ice_dvl_rdma_params)); + if (status) + return status; + + status = devl_params_register(devlink, ice_dvl_msix_params, + ARRAY_SIZE(ice_dvl_msix_params)); + if (status) + goto unregister_rdma_params; + + if (hw->func_caps.common_cap.tx_sched_topo_comp_mode_en) + status = devl_params_register(devlink, ice_dvl_sched_params, + ARRAY_SIZE(ice_dvl_sched_params)); + if (status) + goto unregister_msix_params; + + value.vu32 = pf->msix.max; + devl_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MAX, + value); + value.vu32 = pf->msix.min; + devl_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MIN, + value); + + value.vbool = test_bit(ICE_FLAG_RDMA_ENA, pf->flags); + devl_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA, + value); + + return 0; + +unregister_msix_params: + devl_params_unregister(devlink, ice_dvl_msix_params, + ARRAY_SIZE(ice_dvl_msix_params)); +unregister_rdma_params: + devl_params_unregister(devlink, ice_dvl_rdma_params, + ARRAY_SIZE(ice_dvl_rdma_params)); + return status; +} + +void ice_devlink_unregister_params(struct ice_pf *pf) +{ + struct devlink *devlink = priv_to_devlink(pf); + struct ice_hw *hw = &pf->hw; + + devl_params_unregister(devlink, ice_dvl_rdma_params, + ARRAY_SIZE(ice_dvl_rdma_params)); + devl_params_unregister(devlink, ice_dvl_msix_params, + ARRAY_SIZE(ice_dvl_msix_params)); + + if (hw->func_caps.common_cap.tx_sched_topo_comp_mode_en) + devl_params_unregister(devlink, ice_dvl_sched_params, + ARRAY_SIZE(ice_dvl_sched_params)); +} + +#define ICE_DEVLINK_READ_BLK_SIZE (1024 * 1024) + +static const struct devlink_region_ops ice_nvm_region_ops; +static const struct devlink_region_ops ice_sram_region_ops; + +/** + * ice_devlink_nvm_snapshot - Capture a snapshot of the NVM flash contents + * @devlink: the devlink instance + * @ops: the devlink region to snapshot + * @extack: extended ACK response structure + * @data: on exit points to snapshot data buffer + * + * This function is called in response to a DEVLINK_CMD_REGION_NEW for either + * the nvm-flash or shadow-ram region. + * + * It captures a snapshot of the NVM or Shadow RAM flash contents. This + * snapshot can then later be viewed via the DEVLINK_CMD_REGION_READ netlink + * interface. + * + * @returns zero on success, and updates the data pointer. Returns a non-zero + * error code on failure. + */ +static int ice_devlink_nvm_snapshot(struct devlink *devlink, + const struct devlink_region_ops *ops, + struct netlink_ext_ack *extack, u8 **data) +{ + struct ice_pf *pf = devlink_priv(devlink); + struct device *dev = ice_pf_to_dev(pf); + struct ice_hw *hw = &pf->hw; + bool read_shadow_ram; + u8 *nvm_data, *tmp, i; + u32 nvm_size, left; + s8 num_blks; + int status; + + if (ops == &ice_nvm_region_ops) { + read_shadow_ram = false; + nvm_size = hw->flash.flash_size; + } else if (ops == &ice_sram_region_ops) { + read_shadow_ram = true; + nvm_size = hw->flash.sr_words * 2u; + } else { + NL_SET_ERR_MSG_MOD(extack, "Unexpected region in snapshot function"); + return -EOPNOTSUPP; + } + + nvm_data = vzalloc(nvm_size); + if (!nvm_data) + return -ENOMEM; + + num_blks = DIV_ROUND_UP(nvm_size, ICE_DEVLINK_READ_BLK_SIZE); + tmp = nvm_data; + left = nvm_size; + + /* Some systems take longer to read the NVM than others which causes the + * FW to reclaim the NVM lock before the entire NVM has been read. Fix + * this by breaking the reads of the NVM into smaller chunks that will + * probably not take as long. This has some overhead since we are + * increasing the number of AQ commands, but it should always work + */ + for (i = 0; i < num_blks; i++) { + u32 read_sz = min_t(u32, ICE_DEVLINK_READ_BLK_SIZE, left); + + status = ice_acquire_nvm(hw, ICE_RES_READ); + if (status) { + dev_dbg(dev, "ice_acquire_nvm failed, err %d aq_err %d\n", + status, hw->adminq.sq_last_status); + NL_SET_ERR_MSG_MOD(extack, "Failed to acquire NVM semaphore"); + vfree(nvm_data); + return -EIO; + } + + status = ice_read_flat_nvm(hw, i * ICE_DEVLINK_READ_BLK_SIZE, + &read_sz, tmp, read_shadow_ram); + if (status) { + dev_dbg(dev, "ice_read_flat_nvm failed after reading %u bytes, err %d aq_err %d\n", + read_sz, status, hw->adminq.sq_last_status); + NL_SET_ERR_MSG_MOD(extack, "Failed to read NVM contents"); + ice_release_nvm(hw); + vfree(nvm_data); + return -EIO; + } + ice_release_nvm(hw); + + tmp += read_sz; + left -= read_sz; + } + + *data = nvm_data; + + return 0; +} + +/** + * ice_devlink_nvm_read - Read a portion of NVM flash contents + * @devlink: the devlink instance + * @ops: the devlink region to snapshot + * @extack: extended ACK response structure + * @offset: the offset to start at + * @size: the amount to read + * @data: the data buffer to read into + * + * This function is called in response to DEVLINK_CMD_REGION_READ to directly + * read a section of the NVM contents. + * + * It reads from either the nvm-flash or shadow-ram region contents. + * + * @returns zero on success, and updates the data pointer. Returns a non-zero + * error code on failure. + */ +static int ice_devlink_nvm_read(struct devlink *devlink, + const struct devlink_region_ops *ops, + struct netlink_ext_ack *extack, + u64 offset, u32 size, u8 *data) +{ + struct ice_pf *pf = devlink_priv(devlink); + struct device *dev = ice_pf_to_dev(pf); + struct ice_hw *hw = &pf->hw; + bool read_shadow_ram; + u64 nvm_size; + int status; + + if (ops == &ice_nvm_region_ops) { + read_shadow_ram = false; + nvm_size = hw->flash.flash_size; + } else if (ops == &ice_sram_region_ops) { + read_shadow_ram = true; + nvm_size = hw->flash.sr_words * 2u; + } else { + NL_SET_ERR_MSG_MOD(extack, "Unexpected region in snapshot function"); + return -EOPNOTSUPP; + } + + if (offset + size >= nvm_size) { + NL_SET_ERR_MSG_MOD(extack, "Cannot read beyond the region size"); + return -ERANGE; + } + + status = ice_acquire_nvm(hw, ICE_RES_READ); + if (status) { + dev_dbg(dev, "ice_acquire_nvm failed, err %d aq_err %d\n", + status, hw->adminq.sq_last_status); + NL_SET_ERR_MSG_MOD(extack, "Failed to acquire NVM semaphore"); + return -EIO; + } + + status = ice_read_flat_nvm(hw, (u32)offset, &size, data, + read_shadow_ram); + if (status) { + dev_dbg(dev, "ice_read_flat_nvm failed after reading %u bytes, err %d aq_err %d\n", + size, status, hw->adminq.sq_last_status); + NL_SET_ERR_MSG_MOD(extack, "Failed to read NVM contents"); + ice_release_nvm(hw); + return -EIO; + } + ice_release_nvm(hw); + + return 0; +} + +/** + * ice_devlink_devcaps_snapshot - Capture snapshot of device capabilities + * @devlink: the devlink instance + * @ops: the devlink region being snapshotted + * @extack: extended ACK response structure + * @data: on exit points to snapshot data buffer + * + * This function is called in response to the DEVLINK_CMD_REGION_TRIGGER for + * the device-caps devlink region. It captures a snapshot of the device + * capabilities reported by firmware. + * + * @returns zero on success, and updates the data pointer. Returns a non-zero + * error code on failure. + */ +static int +ice_devlink_devcaps_snapshot(struct devlink *devlink, + const struct devlink_region_ops *ops, + struct netlink_ext_ack *extack, u8 **data) +{ + struct ice_pf *pf = devlink_priv(devlink); + struct device *dev = ice_pf_to_dev(pf); + struct ice_hw *hw = &pf->hw; + void *devcaps; + int status; + + devcaps = vzalloc(ICE_AQ_MAX_BUF_LEN); + if (!devcaps) + return -ENOMEM; + + status = ice_aq_list_caps(hw, devcaps, ICE_AQ_MAX_BUF_LEN, NULL, + ice_aqc_opc_list_dev_caps, NULL); + if (status) { + dev_dbg(dev, "ice_aq_list_caps: failed to read device capabilities, err %d aq_err %d\n", + status, hw->adminq.sq_last_status); + NL_SET_ERR_MSG_MOD(extack, "Failed to read device capabilities"); + vfree(devcaps); + return status; + } + + *data = (u8 *)devcaps; + + return 0; +} + +static const struct devlink_region_ops ice_nvm_region_ops = { + .name = "nvm-flash", + .destructor = vfree, + .snapshot = ice_devlink_nvm_snapshot, + .read = ice_devlink_nvm_read, +}; + +static const struct devlink_region_ops ice_sram_region_ops = { + .name = "shadow-ram", + .destructor = vfree, + .snapshot = ice_devlink_nvm_snapshot, + .read = ice_devlink_nvm_read, +}; + +static const struct devlink_region_ops ice_devcaps_region_ops = { + .name = "device-caps", + .destructor = vfree, + .snapshot = ice_devlink_devcaps_snapshot, +}; + +/** + * ice_devlink_init_regions - Initialize devlink regions + * @pf: the PF device structure + * + * Create devlink regions used to enable access to dump the contents of the + * flash memory on the device. + */ +void ice_devlink_init_regions(struct ice_pf *pf) +{ + struct devlink *devlink = priv_to_devlink(pf); + struct device *dev = ice_pf_to_dev(pf); + u64 nvm_size, sram_size; + + nvm_size = pf->hw.flash.flash_size; + pf->nvm_region = devl_region_create(devlink, &ice_nvm_region_ops, 1, + nvm_size); + if (IS_ERR(pf->nvm_region)) { + dev_err(dev, "failed to create NVM devlink region, err %ld\n", + PTR_ERR(pf->nvm_region)); + pf->nvm_region = NULL; + } + + sram_size = pf->hw.flash.sr_words * 2u; + pf->sram_region = devl_region_create(devlink, &ice_sram_region_ops, + 1, sram_size); + if (IS_ERR(pf->sram_region)) { + dev_err(dev, "failed to create shadow-ram devlink region, err %ld\n", + PTR_ERR(pf->sram_region)); + pf->sram_region = NULL; + } + + pf->devcaps_region = devl_region_create(devlink, + &ice_devcaps_region_ops, 10, + ICE_AQ_MAX_BUF_LEN); + if (IS_ERR(pf->devcaps_region)) { + dev_err(dev, "failed to create device-caps devlink region, err %ld\n", + PTR_ERR(pf->devcaps_region)); + pf->devcaps_region = NULL; + } +} + +/** + * ice_devlink_destroy_regions - Destroy devlink regions + * @pf: the PF device structure + * + * Remove previously created regions for this PF. + */ +void ice_devlink_destroy_regions(struct ice_pf *pf) +{ + if (pf->nvm_region) + devl_region_destroy(pf->nvm_region); + + if (pf->sram_region) + devl_region_destroy(pf->sram_region); + + if (pf->devcaps_region) + devl_region_destroy(pf->devcaps_region); +} diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink.h b/drivers/net/ethernet/intel/ice/devlink/devlink.h new file mode 100644 index 000000000000..1af3b0763fbb --- /dev/null +++ b/drivers/net/ethernet/intel/ice/devlink/devlink.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019, Intel Corporation. */ + +#ifndef _ICE_DEVLINK_H_ +#define _ICE_DEVLINK_H_ + +struct ice_pf *ice_allocate_pf(struct device *dev); +struct ice_sf_priv *ice_allocate_sf(struct device *dev, struct ice_pf *pf); + +void ice_devlink_register(struct ice_pf *pf); +void ice_devlink_unregister(struct ice_pf *pf); +int ice_devlink_register_params(struct ice_pf *pf); +void ice_devlink_unregister_params(struct ice_pf *pf); +int ice_devlink_create_pf_port(struct ice_pf *pf); +void ice_devlink_destroy_pf_port(struct ice_pf *pf); +int ice_devlink_create_vf_port(struct ice_vf *vf); +void ice_devlink_destroy_vf_port(struct ice_vf *vf); + +void ice_devlink_init_regions(struct ice_pf *pf); +void ice_devlink_destroy_regions(struct ice_pf *pf); + +int ice_devlink_rate_init_tx_topology(struct devlink *devlink, struct ice_vsi *vsi); +void ice_tear_down_devlink_rate_tree(struct ice_pf *pf); +void ice_devlink_rate_clear_tx_topology(struct ice_vsi *vsi); + +#endif /* _ICE_DEVLINK_H_ */ diff --git a/drivers/net/ethernet/intel/ice/devlink/health.c b/drivers/net/ethernet/intel/ice/devlink/health.c new file mode 100644 index 000000000000..19c3d37aa768 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/devlink/health.c @@ -0,0 +1,550 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024, Intel Corporation. */ + +#include "ice.h" +#include "ice_adminq_cmd.h" /* for enum ice_aqc_health_status_elem */ +#include "health.h" + +#define ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, obj, name) \ + devlink_fmsg_put(fmsg, #name, (obj)->name) + +#define ICE_HEALTH_STATUS_DATA_SIZE 2 + +struct ice_health_status { + enum ice_aqc_health_status code; + const char *description; + const char *solution; + const char *data_label[ICE_HEALTH_STATUS_DATA_SIZE]; +}; + +/* + * In addition to the health status codes provided below, the firmware might + * generate Health Status Codes that are not pertinent to the end-user. + * For instance, Health Code 0x1002 is triggered when the command fails. + * Such codes should be disregarded by the end-user. + * The below lookup requires to be sorted by code. + */ + +static const char ice_common_port_solutions[] = + "Check your cable connection. Change or replace the module or cable. Manually set speed and duplex."; +static const char ice_port_number_label[] = "Port Number"; +static const char ice_update_nvm_solution[] = "Update to the latest NVM image."; + +static const struct ice_health_status ice_health_status_lookup[] = { + {ICE_AQC_HEALTH_STATUS_ERR_UNKNOWN_MOD_STRICT, "An unsupported module was detected.", + ice_common_port_solutions, {ice_port_number_label}}, + {ICE_AQC_HEALTH_STATUS_ERR_MOD_TYPE, "Module type is not supported.", + "Change or replace the module or cable.", {ice_port_number_label}}, + {ICE_AQC_HEALTH_STATUS_ERR_MOD_QUAL, "Module is not qualified.", + ice_common_port_solutions, {ice_port_number_label}}, + {ICE_AQC_HEALTH_STATUS_ERR_MOD_COMM, + "Device cannot communicate with the module.", + "Check your cable connection. Change or replace the module or cable. Manually set speed and duplex.", + {ice_port_number_label}}, + {ICE_AQC_HEALTH_STATUS_ERR_MOD_CONFLICT, "Unresolved module conflict.", + "Manually set speed/duplex or change the port option. If the problem persists, use a cable/module that is found in the supported modules and cables list for this device.", + {ice_port_number_label}}, + {ICE_AQC_HEALTH_STATUS_ERR_MOD_NOT_PRESENT, "Module is not present.", + "Check that the module is inserted correctly. If the problem persists, use a cable/module that is found in the supported modules and cables list for this device.", + {ice_port_number_label}}, + {ICE_AQC_HEALTH_STATUS_INFO_MOD_UNDERUTILIZED, "Underutilized module.", + "Change or replace the module or cable. Change the port option.", + {ice_port_number_label}}, + {ICE_AQC_HEALTH_STATUS_ERR_UNKNOWN_MOD_LENIENT, "An unsupported module was detected.", + ice_common_port_solutions, {ice_port_number_label}}, + {ICE_AQC_HEALTH_STATUS_ERR_INVALID_LINK_CFG, "Invalid link configuration.", + NULL, {ice_port_number_label}}, + {ICE_AQC_HEALTH_STATUS_ERR_PORT_ACCESS, "Port hardware access error.", + ice_update_nvm_solution, {ice_port_number_label}}, + {ICE_AQC_HEALTH_STATUS_ERR_PORT_UNREACHABLE, "A port is unreachable.", + "Change the port option. Update to the latest NVM image."}, + {ICE_AQC_HEALTH_STATUS_INFO_PORT_SPEED_MOD_LIMITED, "Port speed is limited due to module.", + "Change the module or configure the port option to match the current module speed. Change the port option.", + {ice_port_number_label}}, + {ICE_AQC_HEALTH_STATUS_ERR_PARALLEL_FAULT, + "All configured link modes were attempted but failed to establish link. The device will restart the process to establish link.", + "Check link partner connection and configuration.", + {ice_port_number_label}}, + {ICE_AQC_HEALTH_STATUS_INFO_PORT_SPEED_PHY_LIMITED, + "Port speed is limited by PHY capabilities.", + "Change the module to align to port option.", {ice_port_number_label}}, + {ICE_AQC_HEALTH_STATUS_ERR_NETLIST_TOPO, "LOM topology netlist is corrupted.", + ice_update_nvm_solution, {ice_port_number_label}}, + {ICE_AQC_HEALTH_STATUS_ERR_NETLIST, "Unrecoverable netlist error.", + ice_update_nvm_solution, {ice_port_number_label}}, + {ICE_AQC_HEALTH_STATUS_ERR_TOPO_CONFLICT, "Port topology conflict.", + "Change the port option. Update to the latest NVM image."}, + {ICE_AQC_HEALTH_STATUS_ERR_LINK_HW_ACCESS, "Unrecoverable hardware access error.", + ice_update_nvm_solution, {ice_port_number_label}}, + {ICE_AQC_HEALTH_STATUS_ERR_LINK_RUNTIME, "Unrecoverable runtime error.", + ice_update_nvm_solution, {ice_port_number_label}}, + {ICE_AQC_HEALTH_STATUS_ERR_DNL_INIT, "Link management engine failed to initialize.", + ice_update_nvm_solution, {ice_port_number_label}}, + {ICE_AQC_HEALTH_STATUS_ERR_PHY_FW_LOAD, + "Failed to load the firmware image in the external PHY.", + ice_update_nvm_solution, {ice_port_number_label}}, + {ICE_AQC_HEALTH_STATUS_INFO_RECOVERY, "The device is in firmware recovery mode.", + ice_update_nvm_solution, {"Extended Error"}}, + {ICE_AQC_HEALTH_STATUS_ERR_FLASH_ACCESS, "The flash chip cannot be accessed.", + "If issue persists, call customer support.", {"Access Type"}}, + {ICE_AQC_HEALTH_STATUS_ERR_NVM_AUTH, "NVM authentication failed.", + ice_update_nvm_solution}, + {ICE_AQC_HEALTH_STATUS_ERR_OROM_AUTH, "Option ROM authentication failed.", + ice_update_nvm_solution}, + {ICE_AQC_HEALTH_STATUS_ERR_DDP_AUTH, "DDP package authentication failed.", + "Update to latest base driver and DDP package."}, + {ICE_AQC_HEALTH_STATUS_ERR_NVM_COMPAT, "NVM image is incompatible.", + ice_update_nvm_solution}, + {ICE_AQC_HEALTH_STATUS_ERR_OROM_COMPAT, "Option ROM is incompatible.", + ice_update_nvm_solution, {"Expected PCI Device ID", "Expected Module ID"}}, + {ICE_AQC_HEALTH_STATUS_ERR_DCB_MIB, + "Supplied MIB file is invalid. DCB reverted to default configuration.", + "Disable FW-LLDP and check DCBx system configuration.", + {ice_port_number_label, "MIB ID"}}, +}; + +static int ice_health_status_lookup_compare(const void *a, const void *b) +{ + return ((struct ice_health_status *)a)->code - ((struct ice_health_status *)b)->code; +} + +static const struct ice_health_status *ice_get_health_status(u16 code) +{ + struct ice_health_status key = { .code = code }; + + return bsearch(&key, ice_health_status_lookup, ARRAY_SIZE(ice_health_status_lookup), + sizeof(struct ice_health_status), ice_health_status_lookup_compare); +} + +static void ice_describe_status_code(struct devlink_fmsg *fmsg, + struct ice_aqc_health_status_elem *hse) +{ + static const char *const aux_label[] = { "Aux Data 1", "Aux Data 2" }; + const struct ice_health_status *health_code; + u32 internal_data[2]; + u16 status_code; + + status_code = le16_to_cpu(hse->health_status_code); + + devlink_fmsg_put(fmsg, "Syndrome", status_code); + if (status_code) { + internal_data[0] = le32_to_cpu(hse->internal_data1); + internal_data[1] = le32_to_cpu(hse->internal_data2); + + health_code = ice_get_health_status(status_code); + if (!health_code) + return; + + devlink_fmsg_string_pair_put(fmsg, "Description", health_code->description); + if (health_code->solution) + devlink_fmsg_string_pair_put(fmsg, "Possible Solution", + health_code->solution); + + for (size_t i = 0; i < ICE_HEALTH_STATUS_DATA_SIZE; i++) { + if (internal_data[i] != ICE_AQC_HEALTH_STATUS_UNDEFINED_DATA) + devlink_fmsg_u32_pair_put(fmsg, + health_code->data_label[i] ? + health_code->data_label[i] : + aux_label[i], + internal_data[i]); + } + } +} + +static int +ice_port_reporter_diagnose(struct devlink_health_reporter *reporter, struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) +{ + struct ice_pf *pf = devlink_health_reporter_priv(reporter); + + ice_describe_status_code(fmsg, &pf->health_reporters.port_status); + return 0; +} + +static int +ice_port_reporter_dump(struct devlink_health_reporter *reporter, struct devlink_fmsg *fmsg, + void *priv_ctx, struct netlink_ext_ack __always_unused *extack) +{ + struct ice_pf *pf = devlink_health_reporter_priv(reporter); + + ice_describe_status_code(fmsg, &pf->health_reporters.port_status); + return 0; +} + +static int +ice_fw_reporter_diagnose(struct devlink_health_reporter *reporter, struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) +{ + struct ice_pf *pf = devlink_health_reporter_priv(reporter); + + ice_describe_status_code(fmsg, &pf->health_reporters.fw_status); + return 0; +} + +static int +ice_fw_reporter_dump(struct devlink_health_reporter *reporter, struct devlink_fmsg *fmsg, + void *priv_ctx, struct netlink_ext_ack *extack) +{ + struct ice_pf *pf = devlink_health_reporter_priv(reporter); + + ice_describe_status_code(fmsg, &pf->health_reporters.fw_status); + return 0; +} + +static void ice_config_health_events(struct ice_pf *pf, bool enable) +{ + u8 enable_bits = 0; + int ret; + + if (enable) + enable_bits = ICE_AQC_HEALTH_STATUS_SET_PF_SPECIFIC_MASK | + ICE_AQC_HEALTH_STATUS_SET_GLOBAL_MASK; + + ret = ice_aq_set_health_status_cfg(&pf->hw, enable_bits); + if (ret) + dev_err(ice_pf_to_dev(pf), "Failed to %s firmware health events, err %d aq_err %s\n", + str_enable_disable(enable), ret, + ice_aq_str(pf->hw.adminq.sq_last_status)); +} + +/** + * ice_process_health_status_event - Process the health status event from FW + * @pf: pointer to the PF structure + * @event: event structure containing the Health Status Event opcode + * + * Decode the Health Status Events and print the associated messages + */ +void ice_process_health_status_event(struct ice_pf *pf, struct ice_rq_event_info *event) +{ + const struct ice_aqc_health_status_elem *health_info; + u16 count; + + health_info = (struct ice_aqc_health_status_elem *)event->msg_buf; + count = le16_to_cpu(event->desc.params.get_health_status.health_status_count); + + if (count > (event->buf_len / sizeof(*health_info))) { + dev_err(ice_pf_to_dev(pf), "Received a health status event with invalid element count\n"); + return; + } + + for (size_t i = 0; i < count; i++) { + const struct ice_health_status *health_code; + u16 status_code; + + status_code = le16_to_cpu(health_info->health_status_code); + health_code = ice_get_health_status(status_code); + + if (health_code) { + switch (le16_to_cpu(health_info->event_source)) { + case ICE_AQC_HEALTH_STATUS_GLOBAL: + pf->health_reporters.fw_status = *health_info; + devlink_health_report(pf->health_reporters.fw, + "FW syndrome reported", NULL); + break; + case ICE_AQC_HEALTH_STATUS_PF: + case ICE_AQC_HEALTH_STATUS_PORT: + pf->health_reporters.port_status = *health_info; + devlink_health_report(pf->health_reporters.port, + "Port syndrome reported", NULL); + break; + default: + dev_err(ice_pf_to_dev(pf), "Health code with unknown source\n"); + } + } else { + u32 data1, data2; + u16 source; + + source = le16_to_cpu(health_info->event_source); + data1 = le32_to_cpu(health_info->internal_data1); + data2 = le32_to_cpu(health_info->internal_data2); + dev_dbg(ice_pf_to_dev(pf), + "Received internal health status code 0x%08x, source: 0x%08x, data1: 0x%08x, data2: 0x%08x", + status_code, source, data1, data2); + } + health_info++; + } +} + +/** + * ice_devlink_health_report - boilerplate to call given @reporter + * + * @reporter: devlink health reporter to call, do nothing on NULL + * @msg: message to pass up, "event name" is fine + * @priv_ctx: typically some event struct + */ +static void ice_devlink_health_report(struct devlink_health_reporter *reporter, + const char *msg, void *priv_ctx) +{ + if (!reporter) + return; + + /* We do not do auto recovering, so return value of the below function + * will always be 0, thus we do ignore it. + */ + devlink_health_report(reporter, msg, priv_ctx); +} + +struct ice_mdd_event { + enum ice_mdd_src src; + u16 vf_num; + u16 queue; + u8 pf_num; + u8 event; +}; + +static const char *ice_mdd_src_to_str(enum ice_mdd_src src) +{ + switch (src) { + case ICE_MDD_SRC_TX_PQM: + return "tx_pqm"; + case ICE_MDD_SRC_TX_TCLAN: + return "tx_tclan"; + case ICE_MDD_SRC_TX_TDPU: + return "tx_tdpu"; + case ICE_MDD_SRC_RX: + return "rx"; + default: + return "invalid"; + } +} + +static int +ice_mdd_reporter_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *priv_ctx, + struct netlink_ext_ack *extack) +{ + struct ice_mdd_event *mdd_event = priv_ctx; + const char *src; + + if (!mdd_event) + return 0; + + src = ice_mdd_src_to_str(mdd_event->src); + + devlink_fmsg_obj_nest_start(fmsg); + devlink_fmsg_put(fmsg, "src", src); + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, mdd_event, pf_num); + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, mdd_event, vf_num); + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, mdd_event, event); + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, mdd_event, queue); + devlink_fmsg_obj_nest_end(fmsg); + + return 0; +} + +/** + * ice_report_mdd_event - Report an MDD event through devlink health + * @pf: the PF device structure + * @src: the HW block that was the source of this MDD event + * @pf_num: the pf_num on which the MDD event occurred + * @vf_num: the vf_num on which the MDD event occurred + * @event: the event type of the MDD event + * @queue: the queue on which the MDD event occurred + * + * Report an MDD event that has occurred on this PF. + */ +void ice_report_mdd_event(struct ice_pf *pf, enum ice_mdd_src src, u8 pf_num, + u16 vf_num, u8 event, u16 queue) +{ + struct ice_mdd_event ev = { + .src = src, + .pf_num = pf_num, + .vf_num = vf_num, + .event = event, + .queue = queue, + }; + + ice_devlink_health_report(pf->health_reporters.mdd, "MDD event", &ev); +} + +/** + * ice_fmsg_put_ptr - put hex value of pointer into fmsg + * + * @fmsg: devlink fmsg under construction + * @name: name to pass + * @ptr: 64 bit value to print as hex and put into fmsg + */ +static void ice_fmsg_put_ptr(struct devlink_fmsg *fmsg, const char *name, + void *ptr) +{ + char buf[sizeof(ptr) * 3]; + + sprintf(buf, "%p", ptr); + devlink_fmsg_put(fmsg, name, buf); +} + +struct ice_tx_hang_event { + u32 head; + u32 intr; + u16 vsi_num; + u16 queue; + u16 next_to_clean; + u16 next_to_use; + struct ice_tx_ring *tx_ring; +}; + +static int ice_tx_hang_reporter_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *priv_ctx, + struct netlink_ext_ack *extack) +{ + struct ice_tx_hang_event *event = priv_ctx; + struct sk_buff *skb; + + if (!event) + return 0; + + skb = event->tx_ring->tx_buf->skb; + devlink_fmsg_obj_nest_start(fmsg); + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, head); + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, intr); + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, vsi_num); + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, queue); + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, next_to_clean); + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, next_to_use); + devlink_fmsg_put(fmsg, "irq-mapping", event->tx_ring->q_vector->name); + ice_fmsg_put_ptr(fmsg, "desc-ptr", event->tx_ring->desc); + ice_fmsg_put_ptr(fmsg, "dma-ptr", (void *)(long)event->tx_ring->dma); + ice_fmsg_put_ptr(fmsg, "skb-ptr", skb); + devlink_fmsg_binary_pair_put(fmsg, "desc", event->tx_ring->desc, + event->tx_ring->count * sizeof(struct ice_tx_desc)); + devlink_fmsg_dump_skb(fmsg, skb); + devlink_fmsg_obj_nest_end(fmsg); + + return 0; +} + +void ice_prep_tx_hang_report(struct ice_pf *pf, struct ice_tx_ring *tx_ring, + u16 vsi_num, u32 head, u32 intr) +{ + struct ice_health_tx_hang_buf *buf = &pf->health_reporters.tx_hang_buf; + + buf->tx_ring = tx_ring; + buf->vsi_num = vsi_num; + buf->head = head; + buf->intr = intr; +} + +void ice_report_tx_hang(struct ice_pf *pf) +{ + struct ice_health_tx_hang_buf *buf = &pf->health_reporters.tx_hang_buf; + struct ice_tx_ring *tx_ring = buf->tx_ring; + + struct ice_tx_hang_event ev = { + .head = buf->head, + .intr = buf->intr, + .vsi_num = buf->vsi_num, + .queue = tx_ring->q_index, + .next_to_clean = tx_ring->next_to_clean, + .next_to_use = tx_ring->next_to_use, + .tx_ring = tx_ring, + }; + + ice_devlink_health_report(pf->health_reporters.tx_hang, "Tx hang", &ev); +} + +static struct devlink_health_reporter * +ice_init_devlink_rep(struct ice_pf *pf, + const struct devlink_health_reporter_ops *ops) +{ + struct devlink *devlink = priv_to_devlink(pf); + struct devlink_health_reporter *rep; + const u64 graceful_period = 0; + + rep = devl_health_reporter_create(devlink, ops, graceful_period, pf); + if (IS_ERR(rep)) { + struct device *dev = ice_pf_to_dev(pf); + + dev_err(dev, "failed to create devlink %s health report er", + ops->name); + return NULL; + } + return rep; +} + +#define ICE_HEALTH_REPORTER_OPS_FIELD(_name, _field) \ + ._field = ice_##_name##_reporter_##_field, + +#define ICE_DEFINE_HEALTH_REPORTER_OPS_1(_name, _field1) \ + static const struct devlink_health_reporter_ops ice_##_name##_reporter_ops = { \ + .name = #_name, \ + ICE_HEALTH_REPORTER_OPS_FIELD(_name, _field1) \ + } + +#define ICE_DEFINE_HEALTH_REPORTER_OPS_2(_name, _field1, _field2) \ + static const struct devlink_health_reporter_ops ice_##_name##_reporter_ops = { \ + .name = #_name, \ + ICE_HEALTH_REPORTER_OPS_FIELD(_name, _field1) \ + ICE_HEALTH_REPORTER_OPS_FIELD(_name, _field2) \ + } + +ICE_DEFINE_HEALTH_REPORTER_OPS_1(mdd, dump); +ICE_DEFINE_HEALTH_REPORTER_OPS_1(tx_hang, dump); +ICE_DEFINE_HEALTH_REPORTER_OPS_2(fw, dump, diagnose); +ICE_DEFINE_HEALTH_REPORTER_OPS_2(port, dump, diagnose); + +/** + * ice_health_init - allocate and init all ice devlink health reporters and + * accompanied data + * + * @pf: PF struct + */ +void ice_health_init(struct ice_pf *pf) +{ + struct ice_health *reps = &pf->health_reporters; + + reps->mdd = ice_init_devlink_rep(pf, &ice_mdd_reporter_ops); + reps->tx_hang = ice_init_devlink_rep(pf, &ice_tx_hang_reporter_ops); + + if (ice_is_fw_health_report_supported(&pf->hw)) { + reps->fw = ice_init_devlink_rep(pf, &ice_fw_reporter_ops); + reps->port = ice_init_devlink_rep(pf, &ice_port_reporter_ops); + ice_config_health_events(pf, true); + } +} + +/** + * ice_deinit_devl_reporter - destroy given devlink health reporter + * @reporter: reporter to destroy + */ +static void ice_deinit_devl_reporter(struct devlink_health_reporter *reporter) +{ + if (reporter) + devl_health_reporter_destroy(reporter); +} + +/** + * ice_health_deinit - deallocate all ice devlink health reporters and + * accompanied data + * + * @pf: PF struct + */ +void ice_health_deinit(struct ice_pf *pf) +{ + ice_deinit_devl_reporter(pf->health_reporters.mdd); + ice_deinit_devl_reporter(pf->health_reporters.tx_hang); + if (ice_is_fw_health_report_supported(&pf->hw)) { + ice_deinit_devl_reporter(pf->health_reporters.fw); + ice_deinit_devl_reporter(pf->health_reporters.port); + ice_config_health_events(pf, false); + } +} + +static +void ice_health_assign_healthy_state(struct devlink_health_reporter *reporter) +{ + if (reporter) + devlink_health_reporter_state_update(reporter, + DEVLINK_HEALTH_REPORTER_STATE_HEALTHY); +} + +/** + * ice_health_clear - clear devlink health issues after a reset + * @pf: the PF device structure + * + * Mark the PF in healthy state again after a reset has completed. + */ +void ice_health_clear(struct ice_pf *pf) +{ + ice_health_assign_healthy_state(pf->health_reporters.mdd); + ice_health_assign_healthy_state(pf->health_reporters.tx_hang); +} diff --git a/drivers/net/ethernet/intel/ice/devlink/health.h b/drivers/net/ethernet/intel/ice/devlink/health.h new file mode 100644 index 000000000000..5edfc4d2adce --- /dev/null +++ b/drivers/net/ethernet/intel/ice/devlink/health.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2024, Intel Corporation. */ + +#ifndef _HEALTH_H_ +#define _HEALTH_H_ + +#include <linux/types.h> + +/** + * DOC: health.h + * + * This header file stores everything that is needed for broadly understood + * devlink health mechanism for ice driver. + */ + +struct ice_aqc_health_status_elem; +struct ice_pf; +struct ice_tx_ring; +struct ice_rq_event_info; + +enum ice_mdd_src { + ICE_MDD_SRC_TX_PQM, + ICE_MDD_SRC_TX_TCLAN, + ICE_MDD_SRC_TX_TDPU, + ICE_MDD_SRC_RX, +}; + +/** + * struct ice_health - stores ice devlink health reporters and accompanied data + * @fw: devlink health reporter for FW Health Status events + * @mdd: devlink health reporter for MDD detection event + * @port: devlink health reporter for Port Health Status events + * @tx_hang: devlink health reporter for tx_hang event + * @tx_hang_buf: pre-allocated place to put info for Tx hang reporter from + * non-sleeping context + * @tx_ring: ring that the hang occurred on + * @head: descriptor head + * @intr: interrupt register value + * @vsi_num: VSI owning the queue that the hang occurred on + * @fw_status: buffer for last received FW Status event + * @port_status: buffer for last received Port Status event + */ +struct ice_health { + struct devlink_health_reporter *fw; + struct devlink_health_reporter *mdd; + struct devlink_health_reporter *port; + struct devlink_health_reporter *tx_hang; + struct_group_tagged(ice_health_tx_hang_buf, tx_hang_buf, + struct ice_tx_ring *tx_ring; + u32 head; + u32 intr; + u16 vsi_num; + ); + struct ice_aqc_health_status_elem fw_status; + struct ice_aqc_health_status_elem port_status; +}; + +void ice_process_health_status_event(struct ice_pf *pf, + struct ice_rq_event_info *event); + +void ice_health_init(struct ice_pf *pf); +void ice_health_deinit(struct ice_pf *pf); +void ice_health_clear(struct ice_pf *pf); + +void ice_prep_tx_hang_report(struct ice_pf *pf, struct ice_tx_ring *tx_ring, + u16 vsi_num, u32 head, u32 intr); +void ice_report_mdd_event(struct ice_pf *pf, enum ice_mdd_src src, u8 pf_num, + u16 vf_num, u8 event, u16 queue); +void ice_report_tx_hang(struct ice_pf *pf); + +#endif /* _HEALTH_H_ */ diff --git a/drivers/net/ethernet/intel/ice/devlink/port.c b/drivers/net/ethernet/intel/ice/devlink/port.c new file mode 100644 index 000000000000..767419a67fef --- /dev/null +++ b/drivers/net/ethernet/intel/ice/devlink/port.c @@ -0,0 +1,999 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024, Intel Corporation. */ + +#include <linux/vmalloc.h> + +#include "ice.h" +#include "devlink.h" +#include "port.h" +#include "ice_lib.h" +#include "ice_fltr.h" + +static int ice_active_port_option = -1; + +/** + * ice_devlink_port_opt_speed_str - convert speed to a string + * @speed: speed value + */ +static const char *ice_devlink_port_opt_speed_str(u8 speed) +{ + switch (speed & ICE_AQC_PORT_OPT_MAX_LANE_M) { + case ICE_AQC_PORT_OPT_MAX_LANE_100M: + return "0.1"; + case ICE_AQC_PORT_OPT_MAX_LANE_1G: + return "1"; + case ICE_AQC_PORT_OPT_MAX_LANE_2500M: + return "2.5"; + case ICE_AQC_PORT_OPT_MAX_LANE_5G: + return "5"; + case ICE_AQC_PORT_OPT_MAX_LANE_10G: + return "10"; + case ICE_AQC_PORT_OPT_MAX_LANE_25G: + return "25"; + case ICE_AQC_PORT_OPT_MAX_LANE_50G: + return "50"; + case ICE_AQC_PORT_OPT_MAX_LANE_100G: + return "100"; + } + + return "-"; +} + +#define ICE_PORT_OPT_DESC_LEN 50 +/** + * ice_devlink_port_options_print - Print available port split options + * @pf: the PF to print split port options + * + * Prints a table with available port split options and max port speeds + */ +static void ice_devlink_port_options_print(struct ice_pf *pf) +{ + u8 i, j, options_count, cnt, speed, pending_idx, active_idx; + struct ice_aqc_get_port_options_elem *options, *opt; + struct device *dev = ice_pf_to_dev(pf); + bool active_valid, pending_valid; + char desc[ICE_PORT_OPT_DESC_LEN]; + const char *str; + int status; + + options = kcalloc(ICE_AQC_PORT_OPT_MAX * ICE_MAX_PORT_PER_PCI_DEV, + sizeof(*options), GFP_KERNEL); + if (!options) + return; + + for (i = 0; i < ICE_MAX_PORT_PER_PCI_DEV; i++) { + opt = options + i * ICE_AQC_PORT_OPT_MAX; + options_count = ICE_AQC_PORT_OPT_MAX; + active_valid = 0; + + status = ice_aq_get_port_options(&pf->hw, opt, &options_count, + i, true, &active_idx, + &active_valid, &pending_idx, + &pending_valid); + if (status) { + dev_dbg(dev, "Couldn't read port option for port %d, err %d\n", + i, status); + goto err; + } + } + + dev_dbg(dev, "Available port split options and max port speeds (Gbps):\n"); + dev_dbg(dev, "Status Split Quad 0 Quad 1\n"); + dev_dbg(dev, " count L0 L1 L2 L3 L4 L5 L6 L7\n"); + + for (i = 0; i < options_count; i++) { + cnt = 0; + + if (i == ice_active_port_option) + str = "Active"; + else if ((i == pending_idx) && pending_valid) + str = "Pending"; + else + str = ""; + + cnt += snprintf(&desc[cnt], ICE_PORT_OPT_DESC_LEN - cnt, + "%-8s", str); + + cnt += snprintf(&desc[cnt], ICE_PORT_OPT_DESC_LEN - cnt, + "%-6u", options[i].pmd); + + for (j = 0; j < ICE_MAX_PORT_PER_PCI_DEV; ++j) { + speed = options[i + j * ICE_AQC_PORT_OPT_MAX].max_lane_speed; + str = ice_devlink_port_opt_speed_str(speed); + cnt += snprintf(&desc[cnt], ICE_PORT_OPT_DESC_LEN - cnt, + "%3s ", str); + } + + dev_dbg(dev, "%s\n", desc); + } + +err: + kfree(options); +} + +/** + * ice_devlink_aq_set_port_option - Send set port option admin queue command + * @pf: the PF to print split port options + * @option_idx: selected port option + * @extack: extended netdev ack structure + * + * Sends set port option admin queue command with selected port option and + * calls NVM write activate. + */ +static int +ice_devlink_aq_set_port_option(struct ice_pf *pf, u8 option_idx, + struct netlink_ext_ack *extack) +{ + struct device *dev = ice_pf_to_dev(pf); + int status; + + status = ice_aq_set_port_option(&pf->hw, 0, true, option_idx); + if (status) { + dev_dbg(dev, "ice_aq_set_port_option, err %d aq_err %d\n", + status, pf->hw.adminq.sq_last_status); + NL_SET_ERR_MSG_MOD(extack, "Port split request failed"); + return -EIO; + } + + status = ice_acquire_nvm(&pf->hw, ICE_RES_WRITE); + if (status) { + dev_dbg(dev, "ice_acquire_nvm failed, err %d aq_err %d\n", + status, pf->hw.adminq.sq_last_status); + NL_SET_ERR_MSG_MOD(extack, "Failed to acquire NVM semaphore"); + return -EIO; + } + + status = ice_nvm_write_activate(&pf->hw, ICE_AQC_NVM_ACTIV_REQ_EMPR, NULL); + if (status) { + dev_dbg(dev, "ice_nvm_write_activate failed, err %d aq_err %d\n", + status, pf->hw.adminq.sq_last_status); + NL_SET_ERR_MSG_MOD(extack, "Port split request failed to save data"); + ice_release_nvm(&pf->hw); + return -EIO; + } + + ice_release_nvm(&pf->hw); + + NL_SET_ERR_MSG_MOD(extack, "Reboot required to finish port split"); + return 0; +} + +/** + * ice_devlink_port_split - .port_split devlink handler + * @devlink: devlink instance structure + * @port: devlink port structure + * @count: number of ports to split to + * @extack: extended netdev ack structure + * + * Callback for the devlink .port_split operation. + * + * Unfortunately, the devlink expression of available options is limited + * to just a number, so search for an FW port option which supports + * the specified number. As there could be multiple FW port options with + * the same port split count, allow switching between them. When the same + * port split count request is issued again, switch to the next FW port + * option with the same port split count. + * + * Return: zero on success or an error code on failure. + */ +static int +ice_devlink_port_split(struct devlink *devlink, struct devlink_port *port, + unsigned int count, struct netlink_ext_ack *extack) +{ + struct ice_aqc_get_port_options_elem options[ICE_AQC_PORT_OPT_MAX]; + u8 i, j, active_idx, pending_idx, new_option; + struct ice_pf *pf = devlink_priv(devlink); + u8 option_count = ICE_AQC_PORT_OPT_MAX; + struct device *dev = ice_pf_to_dev(pf); + bool active_valid, pending_valid; + int status; + + status = ice_aq_get_port_options(&pf->hw, options, &option_count, + 0, true, &active_idx, &active_valid, + &pending_idx, &pending_valid); + if (status) { + dev_dbg(dev, "Couldn't read port split options, err = %d\n", + status); + NL_SET_ERR_MSG_MOD(extack, "Failed to get available port split options"); + return -EIO; + } + + new_option = ICE_AQC_PORT_OPT_MAX; + active_idx = pending_valid ? pending_idx : active_idx; + for (i = 1; i <= option_count; i++) { + /* In order to allow switching between FW port options with + * the same port split count, search for a new option starting + * from the active/pending option (with array wrap around). + */ + j = (active_idx + i) % option_count; + + if (count == options[j].pmd) { + new_option = j; + break; + } + } + + if (new_option == active_idx) { + dev_dbg(dev, "request to split: count: %u is already set and there are no other options\n", + count); + NL_SET_ERR_MSG_MOD(extack, "Requested split count is already set"); + ice_devlink_port_options_print(pf); + return -EINVAL; + } + + if (new_option == ICE_AQC_PORT_OPT_MAX) { + dev_dbg(dev, "request to split: count: %u not found\n", count); + NL_SET_ERR_MSG_MOD(extack, "Port split requested unsupported port config"); + ice_devlink_port_options_print(pf); + return -EINVAL; + } + + status = ice_devlink_aq_set_port_option(pf, new_option, extack); + if (status) + return status; + + ice_devlink_port_options_print(pf); + + return 0; +} + +/** + * ice_devlink_port_unsplit - .port_unsplit devlink handler + * @devlink: devlink instance structure + * @port: devlink port structure + * @extack: extended netdev ack structure + * + * Callback for the devlink .port_unsplit operation. + * Calls ice_devlink_port_split with split count set to 1. + * There could be no FW option available with split count 1. + * + * Return: zero on success or an error code on failure. + */ +static int +ice_devlink_port_unsplit(struct devlink *devlink, struct devlink_port *port, + struct netlink_ext_ack *extack) +{ + return ice_devlink_port_split(devlink, port, 1, extack); +} + +/** + * ice_devlink_set_port_split_options - Set port split options + * @pf: the PF to set port split options + * @attrs: devlink attributes + * + * Sets devlink port split options based on available FW port options + */ +static void +ice_devlink_set_port_split_options(struct ice_pf *pf, + struct devlink_port_attrs *attrs) +{ + struct ice_aqc_get_port_options_elem options[ICE_AQC_PORT_OPT_MAX]; + u8 i, active_idx, pending_idx, option_count = ICE_AQC_PORT_OPT_MAX; + bool active_valid, pending_valid; + int status; + + status = ice_aq_get_port_options(&pf->hw, options, &option_count, + 0, true, &active_idx, &active_valid, + &pending_idx, &pending_valid); + if (status) { + dev_dbg(ice_pf_to_dev(pf), "Couldn't read port split options, err = %d\n", + status); + return; + } + + /* find the biggest available port split count */ + for (i = 0; i < option_count; i++) + attrs->lanes = max_t(int, attrs->lanes, options[i].pmd); + + attrs->splittable = attrs->lanes ? 1 : 0; + ice_active_port_option = active_idx; +} + +static const struct devlink_port_ops ice_devlink_port_ops = { + .port_split = ice_devlink_port_split, + .port_unsplit = ice_devlink_port_unsplit, +}; + +/** + * ice_devlink_set_switch_id - Set unique switch id based on pci dsn + * @pf: the PF to create a devlink port for + * @ppid: struct with switch id information + */ +static void +ice_devlink_set_switch_id(struct ice_pf *pf, struct netdev_phys_item_id *ppid) +{ + struct pci_dev *pdev = pf->pdev; + u64 id; + + id = pci_get_dsn(pdev); + + ppid->id_len = sizeof(id); + put_unaligned_be64(id, &ppid->id); +} + +/** + * ice_devlink_create_pf_port - Create a devlink port for this PF + * @pf: the PF to create a devlink port for + * + * Create and register a devlink_port for this PF. + * This function has to be called under devl_lock. + * + * Return: zero on success or an error code on failure. + */ +int ice_devlink_create_pf_port(struct ice_pf *pf) +{ + struct devlink_port_attrs attrs = {}; + struct devlink_port *devlink_port; + struct devlink *devlink; + struct ice_vsi *vsi; + struct device *dev; + int err; + + devlink = priv_to_devlink(pf); + + dev = ice_pf_to_dev(pf); + + devlink_port = &pf->devlink_port; + + vsi = ice_get_main_vsi(pf); + if (!vsi) + return -EIO; + + attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; + attrs.phys.port_number = pf->hw.pf_id; + + /* As FW supports only port split options for whole device, + * set port split options only for first PF. + */ + if (pf->hw.pf_id == 0) + ice_devlink_set_port_split_options(pf, &attrs); + + ice_devlink_set_switch_id(pf, &attrs.switch_id); + + devlink_port_attrs_set(devlink_port, &attrs); + + err = devl_port_register_with_ops(devlink, devlink_port, vsi->idx, + &ice_devlink_port_ops); + if (err) { + dev_err(dev, "Failed to create devlink port for PF %d, error %d\n", + pf->hw.pf_id, err); + return err; + } + + return 0; +} + +/** + * ice_devlink_destroy_pf_port - Destroy the devlink_port for this PF + * @pf: the PF to cleanup + * + * Unregisters the devlink_port structure associated with this PF. + * This function has to be called under devl_lock. + */ +void ice_devlink_destroy_pf_port(struct ice_pf *pf) +{ + devl_port_unregister(&pf->devlink_port); +} + +/** + * ice_devlink_port_get_vf_fn_mac - .port_fn_hw_addr_get devlink handler + * @port: devlink port structure + * @hw_addr: MAC address of the port + * @hw_addr_len: length of MAC address + * @extack: extended netdev ack structure + * + * Callback for the devlink .port_fn_hw_addr_get operation + * Return: zero on success or an error code on failure. + */ +static int ice_devlink_port_get_vf_fn_mac(struct devlink_port *port, + u8 *hw_addr, int *hw_addr_len, + struct netlink_ext_ack *extack) +{ + struct ice_vf *vf = container_of(port, struct ice_vf, devlink_port); + + ether_addr_copy(hw_addr, vf->dev_lan_addr); + *hw_addr_len = ETH_ALEN; + + return 0; +} + +/** + * ice_devlink_port_set_vf_fn_mac - .port_fn_hw_addr_set devlink handler + * @port: devlink port structure + * @hw_addr: MAC address of the port + * @hw_addr_len: length of MAC address + * @extack: extended netdev ack structure + * + * Callback for the devlink .port_fn_hw_addr_set operation + * Return: zero on success or an error code on failure. + */ +static int ice_devlink_port_set_vf_fn_mac(struct devlink_port *port, + const u8 *hw_addr, + int hw_addr_len, + struct netlink_ext_ack *extack) + +{ + struct devlink_port_attrs *attrs = &port->attrs; + struct devlink_port_pci_vf_attrs *pci_vf; + struct devlink *devlink = port->devlink; + struct ice_pf *pf; + u16 vf_id; + + pf = devlink_priv(devlink); + pci_vf = &attrs->pci_vf; + vf_id = pci_vf->vf; + + return __ice_set_vf_mac(pf, vf_id, hw_addr); +} + +static const struct devlink_port_ops ice_devlink_vf_port_ops = { + .port_fn_hw_addr_get = ice_devlink_port_get_vf_fn_mac, + .port_fn_hw_addr_set = ice_devlink_port_set_vf_fn_mac, +}; + +/** + * ice_devlink_create_vf_port - Create a devlink port for this VF + * @vf: the VF to create a port for + * + * Create and register a devlink_port for this VF. + * + * Return: zero on success or an error code on failure. + */ +int ice_devlink_create_vf_port(struct ice_vf *vf) +{ + struct devlink_port_attrs attrs = {}; + struct devlink_port *devlink_port; + struct devlink *devlink; + struct ice_vsi *vsi; + struct device *dev; + struct ice_pf *pf; + int err; + + pf = vf->pf; + dev = ice_pf_to_dev(pf); + devlink_port = &vf->devlink_port; + + vsi = ice_get_vf_vsi(vf); + if (!vsi) + return -EINVAL; + + attrs.flavour = DEVLINK_PORT_FLAVOUR_PCI_VF; + attrs.pci_vf.pf = pf->hw.pf_id; + attrs.pci_vf.vf = vf->vf_id; + + ice_devlink_set_switch_id(pf, &attrs.switch_id); + + devlink_port_attrs_set(devlink_port, &attrs); + devlink = priv_to_devlink(pf); + + err = devl_port_register_with_ops(devlink, devlink_port, vsi->idx, + &ice_devlink_vf_port_ops); + if (err) { + dev_err(dev, "Failed to create devlink port for VF %d, error %d\n", + vf->vf_id, err); + return err; + } + + return 0; +} + +/** + * ice_devlink_destroy_vf_port - Destroy the devlink_port for this VF + * @vf: the VF to cleanup + * + * Unregisters the devlink_port structure associated with this VF. + */ +void ice_devlink_destroy_vf_port(struct ice_vf *vf) +{ + devl_rate_leaf_destroy(&vf->devlink_port); + devl_port_unregister(&vf->devlink_port); +} + +/** + * ice_devlink_create_sf_dev_port - Register virtual port for a subfunction + * @sf_dev: the subfunction device to create a devlink port for + * + * Register virtual flavour devlink port for the subfunction auxiliary device + * created after activating a dynamically added devlink port. + * + * Return: zero on success or an error code on failure. + */ +int ice_devlink_create_sf_dev_port(struct ice_sf_dev *sf_dev) +{ + struct devlink_port_attrs attrs = {}; + struct ice_dynamic_port *dyn_port; + struct devlink_port *devlink_port; + struct devlink *devlink; + struct ice_vsi *vsi; + + dyn_port = sf_dev->dyn_port; + vsi = dyn_port->vsi; + + devlink_port = &sf_dev->priv->devlink_port; + + attrs.flavour = DEVLINK_PORT_FLAVOUR_VIRTUAL; + + devlink_port_attrs_set(devlink_port, &attrs); + devlink = priv_to_devlink(sf_dev->priv); + + return devl_port_register(devlink, devlink_port, vsi->idx); +} + +/** + * ice_devlink_destroy_sf_dev_port - Destroy virtual port for a subfunction + * @sf_dev: the subfunction device to create a devlink port for + * + * Unregisters the virtual port associated with this subfunction. + */ +void ice_devlink_destroy_sf_dev_port(struct ice_sf_dev *sf_dev) +{ + devl_port_unregister(&sf_dev->priv->devlink_port); +} + +/** + * ice_activate_dynamic_port - Activate a dynamic port + * @dyn_port: dynamic port instance to activate + * @extack: extack for reporting error messages + * + * Activate the dynamic port based on its flavour. + * + * Return: zero on success or an error code on failure. + */ +static int +ice_activate_dynamic_port(struct ice_dynamic_port *dyn_port, + struct netlink_ext_ack *extack) +{ + int err; + + if (dyn_port->active) + return 0; + + err = ice_sf_eth_activate(dyn_port, extack); + if (err) + return err; + + dyn_port->active = true; + + return 0; +} + +/** + * ice_deactivate_dynamic_port - Deactivate a dynamic port + * @dyn_port: dynamic port instance to deactivate + * + * Undo activation of a dynamic port. + */ +static void ice_deactivate_dynamic_port(struct ice_dynamic_port *dyn_port) +{ + if (!dyn_port->active) + return; + + ice_sf_eth_deactivate(dyn_port); + dyn_port->active = false; +} + +/** + * ice_dealloc_dynamic_port - Deallocate and remove a dynamic port + * @dyn_port: dynamic port instance to deallocate + * + * Free resources associated with a dynamically added devlink port. Will + * deactivate the port if its currently active. + */ +static void ice_dealloc_dynamic_port(struct ice_dynamic_port *dyn_port) +{ + struct devlink_port *devlink_port = &dyn_port->devlink_port; + struct ice_pf *pf = dyn_port->pf; + + ice_deactivate_dynamic_port(dyn_port); + + xa_erase(&pf->sf_nums, devlink_port->attrs.pci_sf.sf); + ice_eswitch_detach_sf(pf, dyn_port); + ice_vsi_free(dyn_port->vsi); + xa_erase(&pf->dyn_ports, dyn_port->vsi->idx); + kfree(dyn_port); +} + +/** + * ice_dealloc_all_dynamic_ports - Deallocate all dynamic devlink ports + * @pf: pointer to the pf structure + */ +void ice_dealloc_all_dynamic_ports(struct ice_pf *pf) +{ + struct ice_dynamic_port *dyn_port; + unsigned long index; + + xa_for_each(&pf->dyn_ports, index, dyn_port) + ice_dealloc_dynamic_port(dyn_port); +} + +/** + * ice_devlink_port_new_check_attr - Check that new port attributes are valid + * @pf: pointer to the PF structure + * @new_attr: the attributes for the new port + * @extack: extack for reporting error messages + * + * Check that the attributes for the new port are valid before continuing to + * allocate the devlink port. + * + * Return: zero on success or an error code on failure. + */ +static int +ice_devlink_port_new_check_attr(struct ice_pf *pf, + const struct devlink_port_new_attrs *new_attr, + struct netlink_ext_ack *extack) +{ + if (new_attr->flavour != DEVLINK_PORT_FLAVOUR_PCI_SF) { + NL_SET_ERR_MSG_MOD(extack, "Flavour other than pcisf is not supported"); + return -EOPNOTSUPP; + } + + if (new_attr->controller_valid) { + NL_SET_ERR_MSG_MOD(extack, "Setting controller is not supported"); + return -EOPNOTSUPP; + } + + if (new_attr->port_index_valid) { + NL_SET_ERR_MSG_MOD(extack, "Driver does not support user defined port index assignment"); + return -EOPNOTSUPP; + } + + if (new_attr->pfnum != pf->hw.pf_id) { + NL_SET_ERR_MSG_MOD(extack, "Incorrect pfnum supplied"); + return -EINVAL; + } + + if (!pci_msix_can_alloc_dyn(pf->pdev)) { + NL_SET_ERR_MSG_MOD(extack, "Dynamic MSIX-X interrupt allocation is not supported"); + return -EOPNOTSUPP; + } + + return 0; +} + +/** + * ice_devlink_port_del - devlink handler for port delete + * @devlink: pointer to devlink + * @port: devlink port to be deleted + * @extack: pointer to extack + * + * Deletes devlink port and deallocates all resources associated with + * created subfunction. + * + * Return: zero on success or an error code on failure. + */ +static int +ice_devlink_port_del(struct devlink *devlink, struct devlink_port *port, + struct netlink_ext_ack *extack) +{ + struct ice_dynamic_port *dyn_port; + + dyn_port = ice_devlink_port_to_dyn(port); + ice_dealloc_dynamic_port(dyn_port); + + return 0; +} + +/** + * ice_devlink_port_fn_hw_addr_set - devlink handler for mac address set + * @port: pointer to devlink port + * @hw_addr: hw address to set + * @hw_addr_len: hw address length + * @extack: extack for reporting error messages + * + * Sets mac address for the port, verifies arguments and copies address + * to the subfunction structure. + * + * Return: zero on success or an error code on failure. + */ +static int +ice_devlink_port_fn_hw_addr_set(struct devlink_port *port, const u8 *hw_addr, + int hw_addr_len, + struct netlink_ext_ack *extack) +{ + struct ice_dynamic_port *dyn_port; + + dyn_port = ice_devlink_port_to_dyn(port); + + if (dyn_port->attached) { + NL_SET_ERR_MSG_MOD(extack, + "Ethernet address can be change only in detached state"); + return -EBUSY; + } + + if (hw_addr_len != ETH_ALEN || !is_valid_ether_addr(hw_addr)) { + NL_SET_ERR_MSG_MOD(extack, "Invalid ethernet address"); + return -EADDRNOTAVAIL; + } + + ether_addr_copy(dyn_port->hw_addr, hw_addr); + + return 0; +} + +/** + * ice_devlink_port_fn_hw_addr_get - devlink handler for mac address get + * @port: pointer to devlink port + * @hw_addr: hw address to set + * @hw_addr_len: hw address length + * @extack: extack for reporting error messages + * + * Returns mac address for the port. + * + * Return: zero on success or an error code on failure. + */ +static int +ice_devlink_port_fn_hw_addr_get(struct devlink_port *port, u8 *hw_addr, + int *hw_addr_len, + struct netlink_ext_ack *extack) +{ + struct ice_dynamic_port *dyn_port; + + dyn_port = ice_devlink_port_to_dyn(port); + + ether_addr_copy(hw_addr, dyn_port->hw_addr); + *hw_addr_len = ETH_ALEN; + + return 0; +} + +/** + * ice_devlink_port_fn_state_set - devlink handler for port state set + * @port: pointer to devlink port + * @state: state to set + * @extack: extack for reporting error messages + * + * Activates or deactivates the port. + * + * Return: zero on success or an error code on failure. + */ +static int +ice_devlink_port_fn_state_set(struct devlink_port *port, + enum devlink_port_fn_state state, + struct netlink_ext_ack *extack) +{ + struct ice_dynamic_port *dyn_port; + + dyn_port = ice_devlink_port_to_dyn(port); + + switch (state) { + case DEVLINK_PORT_FN_STATE_ACTIVE: + return ice_activate_dynamic_port(dyn_port, extack); + + case DEVLINK_PORT_FN_STATE_INACTIVE: + ice_deactivate_dynamic_port(dyn_port); + break; + } + + return 0; +} + +/** + * ice_devlink_port_fn_state_get - devlink handler for port state get + * @port: pointer to devlink port + * @state: admin configured state of the port + * @opstate: current port operational state + * @extack: extack for reporting error messages + * + * Gets port state. + * + * Return: zero on success or an error code on failure. + */ +static int +ice_devlink_port_fn_state_get(struct devlink_port *port, + enum devlink_port_fn_state *state, + enum devlink_port_fn_opstate *opstate, + struct netlink_ext_ack *extack) +{ + struct ice_dynamic_port *dyn_port; + + dyn_port = ice_devlink_port_to_dyn(port); + + if (dyn_port->active) + *state = DEVLINK_PORT_FN_STATE_ACTIVE; + else + *state = DEVLINK_PORT_FN_STATE_INACTIVE; + + if (dyn_port->attached) + *opstate = DEVLINK_PORT_FN_OPSTATE_ATTACHED; + else + *opstate = DEVLINK_PORT_FN_OPSTATE_DETACHED; + + return 0; +} + +static const struct devlink_port_ops ice_devlink_port_sf_ops = { + .port_del = ice_devlink_port_del, + .port_fn_hw_addr_get = ice_devlink_port_fn_hw_addr_get, + .port_fn_hw_addr_set = ice_devlink_port_fn_hw_addr_set, + .port_fn_state_get = ice_devlink_port_fn_state_get, + .port_fn_state_set = ice_devlink_port_fn_state_set, +}; + +/** + * ice_reserve_sf_num - Reserve a subfunction number for this port + * @pf: pointer to the pf structure + * @new_attr: devlink port attributes requested + * @extack: extack for reporting error messages + * @sfnum: on success, the sf number reserved + * + * Reserve a subfunction number for this port. Only called for + * DEVLINK_PORT_FLAVOUR_PCI_SF ports. + * + * Return: zero on success or an error code on failure. + */ +static int +ice_reserve_sf_num(struct ice_pf *pf, + const struct devlink_port_new_attrs *new_attr, + struct netlink_ext_ack *extack, u32 *sfnum) +{ + int err; + + /* If user didn't request an explicit number, pick one */ + if (!new_attr->sfnum_valid) + return xa_alloc(&pf->sf_nums, sfnum, NULL, xa_limit_32b, + GFP_KERNEL); + + /* Otherwise, check and use the number provided */ + err = xa_insert(&pf->sf_nums, new_attr->sfnum, NULL, GFP_KERNEL); + if (err) { + if (err == -EBUSY) + NL_SET_ERR_MSG_MOD(extack, "Subfunction with given sfnum already exists"); + return err; + } + + *sfnum = new_attr->sfnum; + + return 0; +} + +/** + * ice_devlink_create_sf_port - Register PCI subfunction devlink port + * @dyn_port: the dynamic port instance structure for this subfunction + * + * Register PCI subfunction flavour devlink port for a dynamically added + * subfunction port. + * + * Return: zero on success or an error code on failure. + */ +int ice_devlink_create_sf_port(struct ice_dynamic_port *dyn_port) +{ + struct devlink_port_attrs attrs = {}; + struct devlink_port *devlink_port; + struct devlink *devlink; + struct ice_vsi *vsi; + struct ice_pf *pf; + + vsi = dyn_port->vsi; + pf = dyn_port->pf; + + devlink_port = &dyn_port->devlink_port; + + attrs.flavour = DEVLINK_PORT_FLAVOUR_PCI_SF; + attrs.pci_sf.pf = pf->hw.pf_id; + attrs.pci_sf.sf = dyn_port->sfnum; + + devlink_port_attrs_set(devlink_port, &attrs); + devlink = priv_to_devlink(pf); + + return devl_port_register_with_ops(devlink, devlink_port, vsi->idx, + &ice_devlink_port_sf_ops); +} + +/** + * ice_devlink_destroy_sf_port - Destroy the devlink_port for this SF + * @dyn_port: the dynamic port instance structure for this subfunction + * + * Unregisters the devlink_port structure associated with this SF. + */ +void ice_devlink_destroy_sf_port(struct ice_dynamic_port *dyn_port) +{ + devl_rate_leaf_destroy(&dyn_port->devlink_port); + devl_port_unregister(&dyn_port->devlink_port); +} + +/** + * ice_alloc_dynamic_port - Allocate new dynamic port + * @pf: pointer to the pf structure + * @new_attr: devlink port attributes requested + * @extack: extack for reporting error messages + * @devlink_port: index of newly created devlink port + * + * Allocate a new dynamic port instance and prepare it for configuration + * with devlink. + * + * Return: zero on success or an error code on failure. + */ +static int +ice_alloc_dynamic_port(struct ice_pf *pf, + const struct devlink_port_new_attrs *new_attr, + struct netlink_ext_ack *extack, + struct devlink_port **devlink_port) +{ + struct ice_dynamic_port *dyn_port; + struct ice_vsi *vsi; + u32 sfnum; + int err; + + err = ice_reserve_sf_num(pf, new_attr, extack, &sfnum); + if (err) + return err; + + dyn_port = kzalloc(sizeof(*dyn_port), GFP_KERNEL); + if (!dyn_port) { + err = -ENOMEM; + goto unroll_reserve_sf_num; + } + + vsi = ice_vsi_alloc(pf); + if (!vsi) { + NL_SET_ERR_MSG_MOD(extack, "Unable to allocate VSI"); + err = -ENOMEM; + goto unroll_dyn_port_alloc; + } + + dyn_port->vsi = vsi; + dyn_port->pf = pf; + dyn_port->sfnum = sfnum; + eth_random_addr(dyn_port->hw_addr); + + err = xa_insert(&pf->dyn_ports, vsi->idx, dyn_port, GFP_KERNEL); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Port index reservation failed"); + goto unroll_vsi_alloc; + } + + err = ice_eswitch_attach_sf(pf, dyn_port); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to attach SF to eswitch"); + goto unroll_xa_insert; + } + + *devlink_port = &dyn_port->devlink_port; + + return 0; + +unroll_xa_insert: + xa_erase(&pf->dyn_ports, vsi->idx); +unroll_vsi_alloc: + ice_vsi_free(vsi); +unroll_dyn_port_alloc: + kfree(dyn_port); +unroll_reserve_sf_num: + xa_erase(&pf->sf_nums, sfnum); + + return err; +} + +/** + * ice_devlink_port_new - devlink handler for the new port + * @devlink: pointer to devlink + * @new_attr: pointer to the port new attributes + * @extack: extack for reporting error messages + * @devlink_port: pointer to a new port + * + * Creates new devlink port, checks new port attributes and reject + * any unsupported parameters, allocates new subfunction for that port. + * + * Return: zero on success or an error code on failure. + */ +int +ice_devlink_port_new(struct devlink *devlink, + const struct devlink_port_new_attrs *new_attr, + struct netlink_ext_ack *extack, + struct devlink_port **devlink_port) +{ + struct ice_pf *pf = devlink_priv(devlink); + int err; + + err = ice_devlink_port_new_check_attr(pf, new_attr, extack); + if (err) + return err; + + if (!ice_is_eswitch_mode_switchdev(pf)) { + NL_SET_ERR_MSG_MOD(extack, + "SF ports are only supported in eswitch switchdev mode"); + return -EOPNOTSUPP; + } + + return ice_alloc_dynamic_port(pf, new_attr, extack, devlink_port); +} diff --git a/drivers/net/ethernet/intel/ice/devlink/port.h b/drivers/net/ethernet/intel/ice/devlink/port.h new file mode 100644 index 000000000000..d60efc340945 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/devlink/port.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2024, Intel Corporation. */ + +#ifndef _DEVLINK_PORT_H_ +#define _DEVLINK_PORT_H_ + +#include "../ice.h" +#include "../ice_sf_eth.h" + +/** + * struct ice_dynamic_port - Track dynamically added devlink port instance + * @hw_addr: the HW address for this port + * @active: true if the port has been activated + * @attached: true it the prot is attached + * @devlink_port: the associated devlink port structure + * @pf: pointer to the PF private structure + * @vsi: the VSI associated with this port + * @repr_id: the representor ID + * @sfnum: the subfunction ID + * @sf_dev: pointer to the subfunction device + * + * An instance of a dynamically added devlink port. Each port flavour + */ +struct ice_dynamic_port { + u8 hw_addr[ETH_ALEN]; + u8 active: 1; + u8 attached: 1; + struct devlink_port devlink_port; + struct ice_pf *pf; + struct ice_vsi *vsi; + unsigned long repr_id; + u32 sfnum; + /* Flavour-specific implementation data */ + union { + struct ice_sf_dev *sf_dev; + }; +}; + +void ice_dealloc_all_dynamic_ports(struct ice_pf *pf); + +int ice_devlink_create_pf_port(struct ice_pf *pf); +void ice_devlink_destroy_pf_port(struct ice_pf *pf); +int ice_devlink_create_vf_port(struct ice_vf *vf); +void ice_devlink_destroy_vf_port(struct ice_vf *vf); +int ice_devlink_create_sf_port(struct ice_dynamic_port *dyn_port); +void ice_devlink_destroy_sf_port(struct ice_dynamic_port *dyn_port); +int ice_devlink_create_sf_dev_port(struct ice_sf_dev *sf_dev); +void ice_devlink_destroy_sf_dev_port(struct ice_sf_dev *sf_dev); + +#define ice_devlink_port_to_dyn(port) \ + container_of(port, struct ice_dynamic_port, devlink_port) + +int +ice_devlink_port_new(struct devlink *devlink, + const struct devlink_port_new_attrs *new_attr, + struct netlink_ext_ack *extack, + struct devlink_port **devlink_port); +#endif /* _DEVLINK_PORT_H_ */ |