summaryrefslogtreecommitdiff
path: root/drivers/net
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2021-10-20 11:43:11 +0100
committerDavid S. Miller <davem@davemloft.net>2021-10-20 11:43:11 +0100
commit37ba803dbd3f4fa658cbd1a46c43009f94173bf2 (patch)
tree63221bbab4907e1006d1406965f638a2b574167d /drivers/net
parent623acf876398917b9798e2c59cf493249e0d75a4 (diff)
parent8702ed0b0de146c980883896b4cbb40ce860142e (diff)
Merge branch '100GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/next-queue
Tony Nguyen says: ==================== 100GbE Intel Wired LAN Driver Updates 2021-10-19 This series contains updates to ice driver only. Brett implements support for ndo_set_vf_rate allowing for min_tx_rate and max_tx_rate to be set for a VF. Jesse updates DIM moderation to improve latency and resolves problems with reported rate limit and extra software generated interrupts. Wojciech moves a check for trusted VFs to the correct function, disables lb_en for switchdev offloads, and refactors ethtool ops due to differences in support for PF and port representor support. Cai Huoqing utilizes the helper function devm_add_action_or_reset(). Gustavo A. R. Silva replaces uses of allocation to devm_kcalloc() as applicable. Dan Carpenter propagates an error instead of returning success. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net')
-rw-r--r--drivers/net/ethernet/intel/ice/ice_devlink.c4
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ethtool.c116
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c4
-rw-r--r--drivers/net/ethernet/intel/ice/ice_fltr.c127
-rw-r--r--drivers/net/ethernet/intel/ice/ice_fltr.h4
-rw-r--r--drivers/net/ethernet/intel/ice/ice_hw_autogen.h1
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.c203
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.h5
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c120
-rw-r--r--drivers/net/ethernet/intel/ice/ice_sched.c130
-rw-r--r--drivers/net/ethernet/intel/ice/ice_sched.h6
-rw-r--r--drivers/net/ethernet/intel/ice/ice_switch.c9
-rw-r--r--drivers/net/ethernet/intel/ice/ice_switch.h11
-rw-r--r--drivers/net/ethernet/intel/ice/ice_tc_lib.c8
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.c114
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c173
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h15
17 files changed, 764 insertions, 286 deletions
diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c
index 55353bf4cbef..e3d9f3f3a631 100644
--- a/drivers/net/ethernet/intel/ice/ice_devlink.c
+++ b/drivers/net/ethernet/intel/ice/ice_devlink.c
@@ -452,10 +452,8 @@ struct ice_pf *ice_allocate_pf(struct device *dev)
return NULL;
/* Add an action to teardown the devlink when unwinding the driver */
- if (devm_add_action(dev, ice_devlink_free, devlink)) {
- devlink_free(devlink);
+ if (devm_add_action_or_reset(dev, ice_devlink_free, devlink))
return NULL;
- }
return devlink_priv(devlink);
}
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 6e0af72c2020..8b3eef6632e9 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -192,7 +192,6 @@ __ice_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo,
strscpy(drvinfo->bus_info, pci_name(pf->pdev),
sizeof(drvinfo->bus_info));
- drvinfo->n_priv_flags = ICE_PRIV_FLAG_ARRAY_SIZE;
}
static void
@@ -201,18 +200,8 @@ ice_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo)
struct ice_netdev_priv *np = netdev_priv(netdev);
__ice_get_drvinfo(netdev, drvinfo, np->vsi);
-}
-
-static void
-ice_repr_get_drvinfo(struct net_device *netdev,
- struct ethtool_drvinfo *drvinfo)
-{
- struct ice_repr *repr = ice_netdev_to_repr(netdev);
-
- if (ice_check_vf_ready_for_cfg(repr->vf))
- return;
- __ice_get_drvinfo(netdev, drvinfo, repr->src_vsi);
+ drvinfo->n_priv_flags = ICE_PRIV_FLAG_ARRAY_SIZE;
}
static int ice_get_regs_len(struct net_device __always_unused *netdev)
@@ -886,10 +875,10 @@ skip_ol_tests:
netdev_info(netdev, "testing finished\n");
}
-static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
+static void
+__ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data,
+ struct ice_vsi *vsi)
{
- struct ice_netdev_priv *np = netdev_priv(netdev);
- struct ice_vsi *vsi = ice_get_netdev_priv_vsi(np);
unsigned int i;
u8 *p = data;
@@ -940,6 +929,13 @@ static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
}
}
+static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+
+ __ice_get_strings(netdev, stringset, data, np->vsi);
+}
+
static int
ice_set_phys_id(struct net_device *netdev, enum ethtool_phys_id_state state)
{
@@ -1331,9 +1327,6 @@ static int ice_get_sset_count(struct net_device *netdev, int sset)
* order of strings will suffer from race conditions and are
* not safe.
*/
- if (ice_is_port_repr_netdev(netdev))
- return ICE_VSI_STATS_LEN;
-
return ICE_ALL_STATS_LEN(netdev);
case ETH_SS_TEST:
return ICE_TEST_LEN;
@@ -1345,11 +1338,10 @@ static int ice_get_sset_count(struct net_device *netdev, int sset)
}
static void
-ice_get_ethtool_stats(struct net_device *netdev,
- struct ethtool_stats __always_unused *stats, u64 *data)
+__ice_get_ethtool_stats(struct net_device *netdev,
+ struct ethtool_stats __always_unused *stats, u64 *data,
+ struct ice_vsi *vsi)
{
- struct ice_netdev_priv *np = netdev_priv(netdev);
- struct ice_vsi *vsi = ice_get_netdev_priv_vsi(np);
struct ice_pf *pf = vsi->back;
struct ice_tx_ring *tx_ring;
struct ice_rx_ring *rx_ring;
@@ -1416,6 +1408,15 @@ ice_get_ethtool_stats(struct net_device *netdev,
}
}
+static void
+ice_get_ethtool_stats(struct net_device *netdev,
+ struct ethtool_stats __always_unused *stats, u64 *data)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+
+ __ice_get_ethtool_stats(netdev, stats, data, np->vsi);
+}
+
#define ICE_PHY_TYPE_LOW_MASK_MIN_1G (ICE_PHY_TYPE_LOW_100BASE_TX | \
ICE_PHY_TYPE_LOW_100M_SGMII)
@@ -3640,6 +3641,9 @@ ice_set_rc_coalesce(struct ethtool_coalesce *ec,
switch (rc->type) {
case ICE_RX_CONTAINER:
+ {
+ struct ice_q_vector *q_vector = rc->rx_ring->q_vector;
+
if (ec->rx_coalesce_usecs_high > ICE_MAX_INTRL ||
(ec->rx_coalesce_usecs_high &&
ec->rx_coalesce_usecs_high < pf->hw.intrl_gran)) {
@@ -3648,22 +3652,20 @@ ice_set_rc_coalesce(struct ethtool_coalesce *ec,
ICE_MAX_INTRL);
return -EINVAL;
}
- if (ec->rx_coalesce_usecs_high != rc->rx_ring->q_vector->intrl &&
+ if (ec->rx_coalesce_usecs_high != q_vector->intrl &&
(ec->use_adaptive_rx_coalesce || ec->use_adaptive_tx_coalesce)) {
netdev_info(vsi->netdev, "Invalid value, %s-usecs-high cannot be changed if adaptive-tx or adaptive-rx is enabled\n",
c_type_str);
return -EINVAL;
}
- if (ec->rx_coalesce_usecs_high != rc->rx_ring->q_vector->intrl) {
- rc->rx_ring->q_vector->intrl = ec->rx_coalesce_usecs_high;
- ice_write_intrl(rc->rx_ring->q_vector,
- ec->rx_coalesce_usecs_high);
- }
+ if (ec->rx_coalesce_usecs_high != q_vector->intrl)
+ q_vector->intrl = ec->rx_coalesce_usecs_high;
use_adaptive_coalesce = ec->use_adaptive_rx_coalesce;
coalesce_usecs = ec->rx_coalesce_usecs;
break;
+ }
case ICE_TX_CONTAINER:
use_adaptive_coalesce = ec->use_adaptive_tx_coalesce;
coalesce_usecs = ec->tx_coalesce_usecs;
@@ -3808,6 +3810,8 @@ __ice_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec,
if (ice_set_q_coalesce(vsi, ec, v_idx))
return -EINVAL;
+
+ ice_set_q_vector_intrl(vsi->q_vectors[v_idx]);
}
goto set_complete;
}
@@ -3815,6 +3819,8 @@ __ice_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec,
if (ice_set_q_coalesce(vsi, ec, q_num))
return -EINVAL;
+ ice_set_q_vector_intrl(vsi->q_vectors[q_num]);
+
set_complete:
return 0;
}
@@ -3834,6 +3840,54 @@ ice_set_per_q_coalesce(struct net_device *netdev, u32 q_num,
return __ice_set_coalesce(netdev, ec, q_num);
}
+static void
+ice_repr_get_drvinfo(struct net_device *netdev,
+ struct ethtool_drvinfo *drvinfo)
+{
+ struct ice_repr *repr = ice_netdev_to_repr(netdev);
+
+ if (ice_check_vf_ready_for_cfg(repr->vf))
+ return;
+
+ __ice_get_drvinfo(netdev, drvinfo, repr->src_vsi);
+}
+
+static void
+ice_repr_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
+{
+ struct ice_repr *repr = ice_netdev_to_repr(netdev);
+
+ /* for port representors only ETH_SS_STATS is supported */
+ if (ice_check_vf_ready_for_cfg(repr->vf) ||
+ stringset != ETH_SS_STATS)
+ return;
+
+ __ice_get_strings(netdev, stringset, data, repr->src_vsi);
+}
+
+static void
+ice_repr_get_ethtool_stats(struct net_device *netdev,
+ struct ethtool_stats __always_unused *stats,
+ u64 *data)
+{
+ struct ice_repr *repr = ice_netdev_to_repr(netdev);
+
+ if (ice_check_vf_ready_for_cfg(repr->vf))
+ return;
+
+ __ice_get_ethtool_stats(netdev, stats, data, repr->src_vsi);
+}
+
+static int ice_repr_get_sset_count(struct net_device *netdev, int sset)
+{
+ switch (sset) {
+ case ETH_SS_STATS:
+ return ICE_VSI_STATS_LEN;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
#define ICE_I2C_EEPROM_DEV_ADDR 0xA0
#define ICE_I2C_EEPROM_DEV_ADDR2 0xA2
#define ICE_MODULE_TYPE_SFP 0x03
@@ -4088,9 +4142,9 @@ void ice_set_ethtool_safe_mode_ops(struct net_device *netdev)
static const struct ethtool_ops ice_ethtool_repr_ops = {
.get_drvinfo = ice_repr_get_drvinfo,
.get_link = ethtool_op_get_link,
- .get_strings = ice_get_strings,
- .get_ethtool_stats = ice_get_ethtool_stats,
- .get_sset_count = ice_get_sset_count,
+ .get_strings = ice_repr_get_strings,
+ .get_ethtool_stats = ice_repr_get_ethtool_stats,
+ .get_sset_count = ice_repr_get_sset_count,
};
/**
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
index 16de603b280c..38960bcc384c 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
@@ -706,7 +706,7 @@ ice_create_init_fdir_rule(struct ice_pf *pf, enum ice_fltr_ptype flow)
if (!seg)
return -ENOMEM;
- tun_seg = devm_kzalloc(dev, sizeof(*seg) * ICE_FD_HW_SEG_MAX,
+ tun_seg = devm_kcalloc(dev, sizeof(*seg), ICE_FD_HW_SEG_MAX,
GFP_KERNEL);
if (!tun_seg) {
devm_kfree(dev, seg);
@@ -1068,7 +1068,7 @@ ice_cfg_fdir_xtrct_seq(struct ice_pf *pf, struct ethtool_rx_flow_spec *fsp,
if (!seg)
return -ENOMEM;
- tun_seg = devm_kzalloc(dev, sizeof(*seg) * ICE_FD_HW_SEG_MAX,
+ tun_seg = devm_kcalloc(dev, sizeof(*seg), ICE_FD_HW_SEG_MAX,
GFP_KERNEL);
if (!tun_seg) {
devm_kfree(dev, seg);
diff --git a/drivers/net/ethernet/intel/ice/ice_fltr.c b/drivers/net/ethernet/intel/ice/ice_fltr.c
index f8c59df4ff9c..c2e78eaf4ccb 100644
--- a/drivers/net/ethernet/intel/ice/ice_fltr.c
+++ b/drivers/net/ethernet/intel/ice/ice_fltr.c
@@ -454,133 +454,6 @@ static u32 ice_fltr_build_action(u16 vsi_id)
}
/**
- * ice_fltr_find_adv_entry - find advanced rule
- * @rules: list of rules
- * @rule_id: id of wanted rule
- */
-static struct ice_adv_fltr_mgmt_list_entry *
-ice_fltr_find_adv_entry(struct list_head *rules, u16 rule_id)
-{
- struct ice_adv_fltr_mgmt_list_entry *entry;
-
- list_for_each_entry(entry, rules, list_entry) {
- if (entry->rule_info.fltr_rule_id == rule_id)
- return entry;
- }
-
- return NULL;
-}
-
-/**
- * ice_fltr_update_adv_rule_flags - update flags on advanced rule
- * @vsi: pointer to VSI
- * @recipe_id: id of recipe
- * @entry: advanced rule entry
- * @new_flags: flags to update
- */
-static enum ice_status
-ice_fltr_update_adv_rule_flags(struct ice_vsi *vsi, u16 recipe_id,
- struct ice_adv_fltr_mgmt_list_entry *entry,
- u32 new_flags)
-{
- struct ice_adv_rule_info *info = &entry->rule_info;
- struct ice_sw_act_ctrl *act = &info->sw_act;
- u32 action;
-
- if (act->fltr_act != ICE_FWD_TO_VSI)
- return ICE_ERR_NOT_SUPPORTED;
-
- action = ice_fltr_build_action(act->fwd_id.hw_vsi_id);
-
- return ice_fltr_update_rule_flags(&vsi->back->hw, info->fltr_rule_id,
- recipe_id, action, info->sw_act.flag,
- act->src, new_flags);
-}
-
-/**
- * ice_fltr_find_regular_entry - find regular rule
- * @rules: list of rules
- * @rule_id: id of wanted rule
- */
-static struct ice_fltr_mgmt_list_entry *
-ice_fltr_find_regular_entry(struct list_head *rules, u16 rule_id)
-{
- struct ice_fltr_mgmt_list_entry *entry;
-
- list_for_each_entry(entry, rules, list_entry) {
- if (entry->fltr_info.fltr_rule_id == rule_id)
- return entry;
- }
-
- return NULL;
-}
-
-/**
- * ice_fltr_update_regular_rule - update flags on regular rule
- * @vsi: pointer to VSI
- * @recipe_id: id of recipe
- * @entry: regular rule entry
- * @new_flags: flags to update
- */
-static enum ice_status
-ice_fltr_update_regular_rule(struct ice_vsi *vsi, u16 recipe_id,
- struct ice_fltr_mgmt_list_entry *entry,
- u32 new_flags)
-{
- struct ice_fltr_info *info = &entry->fltr_info;
- u32 action;
-
- if (info->fltr_act != ICE_FWD_TO_VSI)
- return ICE_ERR_NOT_SUPPORTED;
-
- action = ice_fltr_build_action(info->fwd_id.hw_vsi_id);
-
- return ice_fltr_update_rule_flags(&vsi->back->hw, info->fltr_rule_id,
- recipe_id, action, info->flag,
- info->src, new_flags);
-}
-
-/**
- * ice_fltr_update_flags - update flags on rule
- * @vsi: pointer to VSI
- * @rule_id: id of rule
- * @recipe_id: id of recipe
- * @new_flags: flags to update
- *
- * Function updates flags on regular and advance rule.
- *
- * Flags should be a combination of ICE_SINGLE_ACT_LB_ENABLE and
- * ICE_SINGLE_ACT_LAN_ENABLE.
- */
-enum ice_status
-ice_fltr_update_flags(struct ice_vsi *vsi, u16 rule_id, u16 recipe_id,
- u32 new_flags)
-{
- struct ice_adv_fltr_mgmt_list_entry *adv_entry;
- struct ice_fltr_mgmt_list_entry *regular_entry;
- struct ice_hw *hw = &vsi->back->hw;
- struct ice_sw_recipe *recp_list;
- struct list_head *fltr_rules;
-
- recp_list = &hw->switch_info->recp_list[recipe_id];
- if (!recp_list)
- return ICE_ERR_DOES_NOT_EXIST;
-
- fltr_rules = &recp_list->filt_rules;
- regular_entry = ice_fltr_find_regular_entry(fltr_rules, rule_id);
- if (regular_entry)
- return ice_fltr_update_regular_rule(vsi, recipe_id,
- regular_entry, new_flags);
-
- adv_entry = ice_fltr_find_adv_entry(fltr_rules, rule_id);
- if (adv_entry)
- return ice_fltr_update_adv_rule_flags(vsi, recipe_id,
- adv_entry, new_flags);
-
- return ICE_ERR_DOES_NOT_EXIST;
-}
-
-/**
* ice_fltr_update_flags_dflt_rule - update flags on default rule
* @vsi: pointer to VSI
* @rule_id: id of rule
diff --git a/drivers/net/ethernet/intel/ice/ice_fltr.h b/drivers/net/ethernet/intel/ice/ice_fltr.h
index 949e38ce016c..8eec4febead1 100644
--- a/drivers/net/ethernet/intel/ice/ice_fltr.h
+++ b/drivers/net/ethernet/intel/ice/ice_fltr.h
@@ -36,10 +36,6 @@ enum ice_status
ice_fltr_remove_eth(struct ice_vsi *vsi, u16 ethertype, u16 flag,
enum ice_sw_fwd_act_type action);
void ice_fltr_remove_all(struct ice_vsi *vsi);
-
-enum ice_status
-ice_fltr_update_flags(struct ice_vsi *vsi, u16 rule_id, u16 recipe_id,
- u32 new_flags);
enum ice_status
ice_fltr_update_flags_dflt_rule(struct ice_vsi *vsi, u16 rule_id, u8 direction,
u32 new_flags);
diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
index 76021d977b60..a49082485642 100644
--- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -182,6 +182,7 @@
#define GLINT_DYN_CTL_INTERVAL_S 5
#define GLINT_DYN_CTL_INTERVAL_M ICE_M(0xFFF, 5)
#define GLINT_DYN_CTL_SW_ITR_INDX_ENA_M BIT(24)
+#define GLINT_DYN_CTL_SW_ITR_INDX_S 25
#define GLINT_DYN_CTL_SW_ITR_INDX_M ICE_M(0x3, 25)
#define GLINT_DYN_CTL_WB_ON_ITR_M BIT(30)
#define GLINT_DYN_CTL_INTENA_MSK_M BIT(31)
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index f981e77f72ad..231f8bea2519 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -1942,6 +1942,31 @@ void ice_write_itr(struct ice_ring_container *rc, u16 itr)
}
/**
+ * ice_set_q_vector_intrl - set up interrupt rate limiting
+ * @q_vector: the vector to be configured
+ *
+ * Interrupt rate limiting is local to the vector, not per-queue so we must
+ * detect if either ring container has dynamic moderation enabled to decide
+ * what to set the interrupt rate limit to via INTRL settings. In the case that
+ * dynamic moderation is disabled on both, write the value with the cached
+ * setting to make sure INTRL register matches the user visible value.
+ */
+void ice_set_q_vector_intrl(struct ice_q_vector *q_vector)
+{
+ if (ITR_IS_DYNAMIC(&q_vector->tx) || ITR_IS_DYNAMIC(&q_vector->rx)) {
+ /* in the case of dynamic enabled, cap each vector to no more
+ * than (4 us) 250,000 ints/sec, which allows low latency
+ * but still less than 500,000 interrupts per second, which
+ * reduces CPU a bit in the case of the lowest latency
+ * setting. The 4 here is a value in microseconds.
+ */
+ ice_write_intrl(q_vector, 4);
+ } else {
+ ice_write_intrl(q_vector, q_vector->intrl);
+ }
+}
+
+/**
* ice_vsi_cfg_msix - MSIX mode Interrupt Config in the HW
* @vsi: the VSI being configured
*
@@ -3096,7 +3121,7 @@ ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi,
}
vsi->q_vectors[i]->intrl = coalesce[i].intrl;
- ice_write_intrl(vsi->q_vectors[i], coalesce[i].intrl);
+ ice_set_q_vector_intrl(vsi->q_vectors[i]);
}
/* the number of queue vectors increased so write whatever is in
@@ -3114,7 +3139,7 @@ ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi,
ice_write_itr(rc, rc->itr_setting);
vsi->q_vectors[i]->intrl = coalesce[0].intrl;
- ice_write_intrl(vsi->q_vectors[i], coalesce[0].intrl);
+ ice_set_q_vector_intrl(vsi->q_vectors[i]);
}
}
@@ -3601,6 +3626,180 @@ int ice_clear_dflt_vsi(struct ice_sw *sw)
}
/**
+ * ice_get_link_speed_mbps - get link speed in Mbps
+ * @vsi: the VSI whose link speed is being queried
+ *
+ * Return current VSI link speed and 0 if the speed is unknown.
+ */
+int ice_get_link_speed_mbps(struct ice_vsi *vsi)
+{
+ switch (vsi->port_info->phy.link_info.link_speed) {
+ case ICE_AQ_LINK_SPEED_100GB:
+ return SPEED_100000;
+ case ICE_AQ_LINK_SPEED_50GB:
+ return SPEED_50000;
+ case ICE_AQ_LINK_SPEED_40GB:
+ return SPEED_40000;
+ case ICE_AQ_LINK_SPEED_25GB:
+ return SPEED_25000;
+ case ICE_AQ_LINK_SPEED_20GB:
+ return SPEED_20000;
+ case ICE_AQ_LINK_SPEED_10GB:
+ return SPEED_10000;
+ case ICE_AQ_LINK_SPEED_5GB:
+ return SPEED_5000;
+ case ICE_AQ_LINK_SPEED_2500MB:
+ return SPEED_2500;
+ case ICE_AQ_LINK_SPEED_1000MB:
+ return SPEED_1000;
+ case ICE_AQ_LINK_SPEED_100MB:
+ return SPEED_100;
+ case ICE_AQ_LINK_SPEED_10MB:
+ return SPEED_10;
+ case ICE_AQ_LINK_SPEED_UNKNOWN:
+ default:
+ return 0;
+ }
+}
+
+/**
+ * ice_get_link_speed_kbps - get link speed in Kbps
+ * @vsi: the VSI whose link speed is being queried
+ *
+ * Return current VSI link speed and 0 if the speed is unknown.
+ */
+static int ice_get_link_speed_kbps(struct ice_vsi *vsi)
+{
+ int speed_mbps;
+
+ speed_mbps = ice_get_link_speed_mbps(vsi);
+
+ return speed_mbps * 1000;
+}
+
+/**
+ * ice_set_min_bw_limit - setup minimum BW limit for Tx based on min_tx_rate
+ * @vsi: VSI to be configured
+ * @min_tx_rate: min Tx rate in Kbps to be configured as BW limit
+ *
+ * If the min_tx_rate is specified as 0 that means to clear the minimum BW limit
+ * profile, otherwise a non-zero value will force a minimum BW limit for the VSI
+ * on TC 0.
+ */
+int ice_set_min_bw_limit(struct ice_vsi *vsi, u64 min_tx_rate)
+{
+ struct ice_pf *pf = vsi->back;
+ enum ice_status status;
+ struct device *dev;
+ int speed;
+
+ dev = ice_pf_to_dev(pf);
+ if (!vsi->port_info) {
+ dev_dbg(dev, "VSI %d, type %u specified doesn't have valid port_info\n",
+ vsi->idx, vsi->type);
+ return -EINVAL;
+ }
+
+ speed = ice_get_link_speed_kbps(vsi);
+ if (min_tx_rate > (u64)speed) {
+ dev_err(dev, "invalid min Tx rate %llu Kbps specified for %s %d is greater than current link speed %u Kbps\n",
+ min_tx_rate, ice_vsi_type_str(vsi->type), vsi->idx,
+ speed);
+ return -EINVAL;
+ }
+
+ /* Configure min BW for VSI limit */
+ if (min_tx_rate) {
+ status = ice_cfg_vsi_bw_lmt_per_tc(vsi->port_info, vsi->idx, 0,
+ ICE_MIN_BW, min_tx_rate);
+ if (status) {
+ dev_err(dev, "failed to set min Tx rate(%llu Kbps) for %s %d\n",
+ min_tx_rate, ice_vsi_type_str(vsi->type),
+ vsi->idx);
+ return -EIO;
+ }
+
+ dev_dbg(dev, "set min Tx rate(%llu Kbps) for %s\n",
+ min_tx_rate, ice_vsi_type_str(vsi->type));
+ } else {
+ status = ice_cfg_vsi_bw_dflt_lmt_per_tc(vsi->port_info,
+ vsi->idx, 0,
+ ICE_MIN_BW);
+ if (status) {
+ dev_err(dev, "failed to clear min Tx rate configuration for %s %d\n",
+ ice_vsi_type_str(vsi->type), vsi->idx);
+ return -EIO;
+ }
+
+ dev_dbg(dev, "cleared min Tx rate configuration for %s %d\n",
+ ice_vsi_type_str(vsi->type), vsi->idx);
+ }
+
+ return 0;
+}
+
+/**
+ * ice_set_max_bw_limit - setup maximum BW limit for Tx based on max_tx_rate
+ * @vsi: VSI to be configured
+ * @max_tx_rate: max Tx rate in Kbps to be configured as BW limit
+ *
+ * If the max_tx_rate is specified as 0 that means to clear the maximum BW limit
+ * profile, otherwise a non-zero value will force a maximum BW limit for the VSI
+ * on TC 0.
+ */
+int ice_set_max_bw_limit(struct ice_vsi *vsi, u64 max_tx_rate)
+{
+ struct ice_pf *pf = vsi->back;
+ enum ice_status status;
+ struct device *dev;
+ int speed;
+
+ dev = ice_pf_to_dev(pf);
+ if (!vsi->port_info) {
+ dev_dbg(dev, "VSI %d, type %u specified doesn't have valid port_info\n",
+ vsi->idx, vsi->type);
+ return -EINVAL;
+ }
+
+ speed = ice_get_link_speed_kbps(vsi);
+ if (max_tx_rate > (u64)speed) {
+ dev_err(dev, "invalid max Tx rate %llu Kbps specified for %s %d is greater than current link speed %u Kbps\n",
+ max_tx_rate, ice_vsi_type_str(vsi->type), vsi->idx,
+ speed);
+ return -EINVAL;
+ }
+
+ /* Configure max BW for VSI limit */
+ if (max_tx_rate) {
+ status = ice_cfg_vsi_bw_lmt_per_tc(vsi->port_info, vsi->idx, 0,
+ ICE_MAX_BW, max_tx_rate);
+ if (status) {
+ dev_err(dev, "failed setting max Tx rate(%llu Kbps) for %s %d\n",
+ max_tx_rate, ice_vsi_type_str(vsi->type),
+ vsi->idx);
+ return -EIO;
+ }
+
+ dev_dbg(dev, "set max Tx rate(%llu Kbps) for %s %d\n",
+ max_tx_rate, ice_vsi_type_str(vsi->type), vsi->idx);
+ } else {
+ status = ice_cfg_vsi_bw_dflt_lmt_per_tc(vsi->port_info,
+ vsi->idx, 0,
+ ICE_MAX_BW);
+ if (status) {
+ dev_err(dev, "failed clearing max Tx rate configuration for %s %d\n",
+ ice_vsi_type_str(vsi->type), vsi->idx);
+ return -EIO;
+ }
+
+ dev_dbg(dev, "cleared max Tx rate configuration for %s %d\n",
+ ice_vsi_type_str(vsi->type), vsi->idx);
+ }
+
+ return 0;
+}
+
+/**
* ice_set_link - turn on/off physical link
* @vsi: VSI to modify physical link on
* @ena: turn on/off physical link
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h
index b6c429c5875d..c79fcbf82d8f 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_lib.h
@@ -103,6 +103,7 @@ int ice_status_to_errno(enum ice_status err);
void ice_write_intrl(struct ice_q_vector *q_vector, u8 intrl);
void ice_write_itr(struct ice_ring_container *rc, u16 itr);
+void ice_set_q_vector_intrl(struct ice_q_vector *q_vector);
enum ice_status
ice_vsi_cfg_mac_fltr(struct ice_vsi *vsi, const u8 *macaddr, bool set);
@@ -116,7 +117,9 @@ bool ice_is_vsi_dflt_vsi(struct ice_sw *sw, struct ice_vsi *vsi);
int ice_set_dflt_vsi(struct ice_sw *sw, struct ice_vsi *vsi);
int ice_clear_dflt_vsi(struct ice_sw *sw);
-
+int ice_set_min_bw_limit(struct ice_vsi *vsi, u64 min_tx_rate);
+int ice_set_max_bw_limit(struct ice_vsi *vsi, u64 max_tx_rate);
+int ice_get_link_speed_mbps(struct ice_vsi *vsi);
int
ice_vsi_update_security(struct ice_vsi *vsi, void (*fill)(struct ice_vsi_ctx *));
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index f531691a3e12..846623a97723 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -5502,77 +5502,59 @@ int ice_vsi_cfg(struct ice_vsi *vsi)
}
/* THEORY OF MODERATION:
- * The below code creates custom DIM profiles for use by this driver, because
- * the ice driver hardware works differently than the hardware that DIMLIB was
+ * The ice driver hardware works differently than the hardware that DIMLIB was
* originally made for. ice hardware doesn't have packet count limits that
* can trigger an interrupt, but it *does* have interrupt rate limit support,
- * and this code adds that capability to be used by the driver when it's using
- * DIMLIB. The DIMLIB code was always designed to be a suggestion to the driver
- * for how to "respond" to traffic and interrupts, so this driver uses a
- * slightly different set of moderation parameters to get best performance.
+ * which is hard-coded to a limit of 250,000 ints/second.
+ * If not using dynamic moderation, the INTRL value can be modified
+ * by ethtool rx-usecs-high.
*/
struct ice_dim {
/* the throttle rate for interrupts, basically worst case delay before
* an initial interrupt fires, value is stored in microseconds.
*/
u16 itr;
- /* the rate limit for interrupts, which can cap a delay from a small
- * ITR at a certain amount of interrupts per second. f.e. a 2us ITR
- * could yield as much as 500,000 interrupts per second, but with a
- * 10us rate limit, it limits to 100,000 interrupts per second. Value
- * is stored in microseconds.
- */
- u16 intrl;
};
/* Make a different profile for Rx that doesn't allow quite so aggressive
- * moderation at the high end (it maxes out at 128us or about 8k interrupts a
- * second. The INTRL/rate parameters here are only useful to cap small ITR
- * values, which is why for larger ITR's - like 128, which can only generate
- * 8k interrupts per second, there is no point to rate limit and the values
- * are set to zero. The rate limit values do affect latency, and so must
- * be reasonably small so to not impact latency sensitive tests.
+ * moderation at the high end (it maxes out at 126us or about 8k interrupts a
+ * second.
*/
static const struct ice_dim rx_profile[] = {
- {2, 10},
- {8, 16},
- {32, 0},
- {96, 0},
- {128, 0}
+ {2}, /* 500,000 ints/s, capped at 250K by INTRL */
+ {8}, /* 125,000 ints/s */
+ {16}, /* 62,500 ints/s */
+ {62}, /* 16,129 ints/s */
+ {126} /* 7,936 ints/s */
};
/* The transmit profile, which has the same sorts of values
* as the previous struct
*/
static const struct ice_dim tx_profile[] = {
- {2, 10},
- {8, 16},
- {64, 0},
- {128, 0},
- {256, 0}
+ {2}, /* 500,000 ints/s, capped at 250K by INTRL */
+ {8}, /* 125,000 ints/s */
+ {40}, /* 16,125 ints/s */
+ {128}, /* 7,812 ints/s */
+ {256} /* 3,906 ints/s */
};
static void ice_tx_dim_work(struct work_struct *work)
{
struct ice_ring_container *rc;
- struct ice_q_vector *q_vector;
struct dim *dim;
- u16 itr, intrl;
+ u16 itr;
dim = container_of(work, struct dim, work);
- rc = container_of(dim, struct ice_ring_container, dim);
- q_vector = container_of(rc, struct ice_q_vector, tx);
+ rc = (struct ice_ring_container *)dim->priv;
- if (dim->profile_ix >= ARRAY_SIZE(tx_profile))
- dim->profile_ix = ARRAY_SIZE(tx_profile) - 1;
+ WARN_ON(dim->profile_ix >= ARRAY_SIZE(tx_profile));
/* look up the values in our local table */
itr = tx_profile[dim->profile_ix].itr;
- intrl = tx_profile[dim->profile_ix].intrl;
- ice_trace(tx_dim_work, q_vector, dim);
+ ice_trace(tx_dim_work, container_of(rc, struct ice_q_vector, tx), dim);
ice_write_itr(rc, itr);
- ice_write_intrl(q_vector, intrl);
dim->state = DIM_START_MEASURE;
}
@@ -5580,28 +5562,65 @@ static void ice_tx_dim_work(struct work_struct *work)
static void ice_rx_dim_work(struct work_struct *work)
{
struct ice_ring_container *rc;
- struct ice_q_vector *q_vector;
struct dim *dim;
- u16 itr, intrl;
+ u16 itr;
dim = container_of(work, struct dim, work);
- rc = container_of(dim, struct ice_ring_container, dim);
- q_vector = container_of(rc, struct ice_q_vector, rx);
+ rc = (struct ice_ring_container *)dim->priv;
- if (dim->profile_ix >= ARRAY_SIZE(rx_profile))
- dim->profile_ix = ARRAY_SIZE(rx_profile) - 1;
+ WARN_ON(dim->profile_ix >= ARRAY_SIZE(rx_profile));
/* look up the values in our local table */
itr = rx_profile[dim->profile_ix].itr;
- intrl = rx_profile[dim->profile_ix].intrl;
- ice_trace(rx_dim_work, q_vector, dim);
+ ice_trace(rx_dim_work, container_of(rc, struct ice_q_vector, rx), dim);
ice_write_itr(rc, itr);
- ice_write_intrl(q_vector, intrl);
dim->state = DIM_START_MEASURE;
}
+#define ICE_DIM_DEFAULT_PROFILE_IX 1
+
+/**
+ * ice_init_moderation - set up interrupt moderation
+ * @q_vector: the vector containing rings to be configured
+ *
+ * Set up interrupt moderation registers, with the intent to do the right thing
+ * when called from reset or from probe, and whether or not dynamic moderation
+ * is enabled or not. Take special care to write all the registers in both
+ * dynamic moderation mode or not in order to make sure hardware is in a known
+ * state.
+ */
+static void ice_init_moderation(struct ice_q_vector *q_vector)
+{
+ struct ice_ring_container *rc;
+ bool tx_dynamic, rx_dynamic;
+
+ rc = &q_vector->tx;
+ INIT_WORK(&rc->dim.work, ice_tx_dim_work);
+ rc->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+ rc->dim.profile_ix = ICE_DIM_DEFAULT_PROFILE_IX;
+ rc->dim.priv = rc;
+ tx_dynamic = ITR_IS_DYNAMIC(rc);
+
+ /* set the initial TX ITR to match the above */
+ ice_write_itr(rc, tx_dynamic ?
+ tx_profile[rc->dim.profile_ix].itr : rc->itr_setting);
+
+ rc = &q_vector->rx;
+ INIT_WORK(&rc->dim.work, ice_rx_dim_work);
+ rc->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+ rc->dim.profile_ix = ICE_DIM_DEFAULT_PROFILE_IX;
+ rc->dim.priv = rc;
+ rx_dynamic = ITR_IS_DYNAMIC(rc);
+
+ /* set the initial RX ITR to match the above */
+ ice_write_itr(rc, rx_dynamic ? rx_profile[rc->dim.profile_ix].itr :
+ rc->itr_setting);
+
+ ice_set_q_vector_intrl(q_vector);
+}
+
/**
* ice_napi_enable_all - Enable NAPI for all q_vectors in the VSI
* @vsi: the VSI being configured
@@ -5616,11 +5635,7 @@ static void ice_napi_enable_all(struct ice_vsi *vsi)
ice_for_each_q_vector(vsi, q_idx) {
struct ice_q_vector *q_vector = vsi->q_vectors[q_idx];
- INIT_WORK(&q_vector->tx.dim.work, ice_tx_dim_work);
- q_vector->tx.dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
-
- INIT_WORK(&q_vector->rx.dim.work, ice_rx_dim_work);
- q_vector->rx.dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+ ice_init_moderation(q_vector);
if (q_vector->rx.rx_ring || q_vector->tx.tx_ring)
napi_enable(&q_vector->napi);
@@ -7390,6 +7405,7 @@ static const struct net_device_ops ice_netdev_ops = {
.ndo_set_vf_vlan = ice_set_vf_port_vlan,
.ndo_set_vf_link_state = ice_set_vf_link_state,
.ndo_get_vf_stats = ice_get_vf_stats,
+ .ndo_set_vf_rate = ice_set_vf_bw,
.ndo_vlan_rx_add_vid = ice_vlan_rx_add_vid,
.ndo_vlan_rx_kill_vid = ice_vlan_rx_kill_vid,
.ndo_setup_tc = ice_setup_tc,
diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c
index 9f07b6641705..560e52b99f83 100644
--- a/drivers/net/ethernet/intel/ice/ice_sched.c
+++ b/drivers/net/ethernet/intel/ice/ice_sched.c
@@ -3771,6 +3771,136 @@ ice_cfg_q_bw_dflt_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
}
/**
+ * ice_sched_get_node_by_id_type - get node from ID type
+ * @pi: port information structure
+ * @id: identifier
+ * @agg_type: type of aggregator
+ * @tc: traffic class
+ *
+ * This function returns node identified by ID of type aggregator, and
+ * based on traffic class (TC). This function needs to be called with
+ * the scheduler lock held.
+ */
+static struct ice_sched_node *
+ice_sched_get_node_by_id_type(struct ice_port_info *pi, u32 id,
+ enum ice_agg_type agg_type, u8 tc)
+{
+ struct ice_sched_node *node = NULL;
+
+ switch (agg_type) {
+ case ICE_AGG_TYPE_VSI: {
+ struct ice_vsi_ctx *vsi_ctx;
+ u16 vsi_handle = (u16)id;
+
+ if (!ice_is_vsi_valid(pi->hw, vsi_handle))
+ break;
+ /* Get sched_vsi_info */
+ vsi_ctx = ice_get_vsi_ctx(pi->hw, vsi_handle);
+ if (!vsi_ctx)
+ break;
+ node = vsi_ctx->sched.vsi_node[tc];
+ break;
+ }
+
+ case ICE_AGG_TYPE_AGG: {
+ struct ice_sched_node *tc_node;
+
+ tc_node = ice_sched_get_tc_node(pi, tc);
+ if (tc_node)
+ node = ice_sched_get_agg_node(pi, tc_node, id);
+ break;
+ }
+
+ default:
+ break;
+ }
+
+ return node;
+}
+
+/**
+ * ice_sched_set_node_bw_lmt_per_tc - set node BW limit per TC
+ * @pi: port information structure
+ * @id: ID (software VSI handle or AGG ID)
+ * @agg_type: aggregator type (VSI or AGG type node)
+ * @tc: traffic class
+ * @rl_type: min or max
+ * @bw: bandwidth in Kbps
+ *
+ * This function sets BW limit of VSI or Aggregator scheduling node
+ * based on TC information from passed in argument BW.
+ */
+static enum ice_status
+ice_sched_set_node_bw_lmt_per_tc(struct ice_port_info *pi, u32 id,
+ enum ice_agg_type agg_type, u8 tc,
+ enum ice_rl_type rl_type, u32 bw)
+{
+ enum ice_status status = ICE_ERR_PARAM;
+ struct ice_sched_node *node;
+
+ if (!pi)
+ return status;
+
+ if (rl_type == ICE_UNKNOWN_BW)
+ return status;
+
+ mutex_lock(&pi->sched_lock);
+ node = ice_sched_get_node_by_id_type(pi, id, agg_type, tc);
+ if (!node) {
+ ice_debug(pi->hw, ICE_DBG_SCHED, "Wrong id, agg type, or tc\n");
+ goto exit_set_node_bw_lmt_per_tc;
+ }
+ if (bw == ICE_SCHED_DFLT_BW)
+ status = ice_sched_set_node_bw_dflt_lmt(pi, node, rl_type);
+ else
+ status = ice_sched_set_node_bw_lmt(pi, node, rl_type, bw);
+
+exit_set_node_bw_lmt_per_tc:
+ mutex_unlock(&pi->sched_lock);
+ return status;
+}
+
+/**
+ * ice_cfg_vsi_bw_lmt_per_tc - configure VSI BW limit per TC
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @tc: traffic class
+ * @rl_type: min or max
+ * @bw: bandwidth in Kbps
+ *
+ * This function configures BW limit of VSI scheduling node based on TC
+ * information.
+ */
+enum ice_status
+ice_cfg_vsi_bw_lmt_per_tc(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+ enum ice_rl_type rl_type, u32 bw)
+{
+ return ice_sched_set_node_bw_lmt_per_tc(pi, vsi_handle,
+ ICE_AGG_TYPE_VSI,
+ tc, rl_type, bw);
+}
+
+/**
+ * ice_cfg_vsi_bw_dflt_lmt_per_tc - configure default VSI BW limit per TC
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @tc: traffic class
+ * @rl_type: min or max
+ *
+ * This function configures default BW limit of VSI scheduling node based on TC
+ * information.
+ */
+enum ice_status
+ice_cfg_vsi_bw_dflt_lmt_per_tc(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+ enum ice_rl_type rl_type)
+{
+ return ice_sched_set_node_bw_lmt_per_tc(pi, vsi_handle,
+ ICE_AGG_TYPE_VSI,
+ tc, rl_type,
+ ICE_SCHED_DFLT_BW);
+}
+
+/**
* ice_cfg_rl_burst_size - Set burst size value
* @hw: pointer to the HW struct
* @bytes: burst size in bytes
diff --git a/drivers/net/ethernet/intel/ice/ice_sched.h b/drivers/net/ethernet/intel/ice/ice_sched.h
index 9beef8f0ec76..f89b80ba3499 100644
--- a/drivers/net/ethernet/intel/ice/ice_sched.h
+++ b/drivers/net/ethernet/intel/ice/ice_sched.h
@@ -103,6 +103,12 @@ ice_cfg_q_bw_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
enum ice_status
ice_cfg_q_bw_dflt_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
u16 q_handle, enum ice_rl_type rl_type);
+enum ice_status
+ice_cfg_vsi_bw_lmt_per_tc(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+ enum ice_rl_type rl_type, u32 bw);
+enum ice_status
+ice_cfg_vsi_bw_dflt_lmt_per_tc(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+ enum ice_rl_type rl_type);
enum ice_status ice_cfg_rl_burst_size(struct ice_hw *hw, u32 bytes);
void ice_sched_replay_agg_vsi_preinit(struct ice_hw *hw);
void ice_sched_replay_agg(struct ice_hw *hw);
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
index 0d07547b40f1..a4a299012f9f 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.c
+++ b/drivers/net/ethernet/intel/ice/ice_switch.c
@@ -4783,7 +4783,14 @@ ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups,
s_rule = kzalloc(rule_buf_sz, GFP_KERNEL);
if (!s_rule)
return ICE_ERR_NO_MEMORY;
- act |= ICE_SINGLE_ACT_LB_ENABLE | ICE_SINGLE_ACT_LAN_ENABLE;
+ if (!rinfo->flags_info.act_valid) {
+ act |= ICE_SINGLE_ACT_LAN_ENABLE;
+ act |= ICE_SINGLE_ACT_LB_ENABLE;
+ } else {
+ act |= rinfo->flags_info.act & (ICE_SINGLE_ACT_LAN_ENABLE |
+ ICE_SINGLE_ACT_LB_ENABLE);
+ }
+
switch (rinfo->sw_act.fltr_act) {
case ICE_FWD_TO_VSI:
act |= (rinfo->sw_act.fwd_id.hw_vsi_id <<
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h
index 34b7f74b1ab8..d4c0a3b594af 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.h
+++ b/drivers/net/ethernet/intel/ice/ice_switch.h
@@ -160,11 +160,22 @@ struct ice_rule_query_data {
u16 vsi_handle;
};
+/* This structure allows to pass info about lb_en and lan_en
+ * flags to ice_add_adv_rule. Values in act would be used
+ * only if act_valid was set to true, otherwise default
+ * values would be used.
+ */
+struct ice_adv_rule_flags_info {
+ u32 act;
+ u8 act_valid; /* indicate if flags in act are valid */
+};
+
struct ice_adv_rule_info {
struct ice_sw_act_ctrl sw_act;
u32 priority;
u8 rx; /* true means LOOKUP_RX otherwise LOOKUP_TX */
u16 fltr_rule_id;
+ struct ice_adv_rule_flags_info flags_info;
};
/* A collection of one or more four word recipe */
diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c
index 4c1daa1a02a1..1dccfd116bc9 100644
--- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c
@@ -274,6 +274,8 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr)
rule_info.sw_act.flag |= ICE_FLTR_TX;
rule_info.sw_act.src = vsi->idx;
rule_info.rx = false;
+ rule_info.flags_info.act = ICE_SINGLE_ACT_LAN_ENABLE;
+ rule_info.flags_info.act_valid = true;
}
/* specify the cookie as filter_rule_id */
@@ -296,12 +298,6 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr)
fltr->rid = rule_added.rid;
fltr->rule_id = rule_added.rule_id;
- if (fltr->direction == ICE_ESWITCH_FLTR_EGRESS) {
- if (ice_fltr_update_flags(vsi, fltr->rule_id, fltr->rid,
- ICE_SINGLE_ACT_LAN_ENABLE))
- ice_rem_adv_rule_by_id(hw, &rule_added);
- }
-
exit:
kfree(list);
return ret;
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 01ae331927bd..bc3ba19dc88f 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -343,7 +343,7 @@ int ice_setup_tx_ring(struct ice_tx_ring *tx_ring)
/* warn if we are about to overwrite the pointer */
WARN_ON(tx_ring->tx_buf);
tx_ring->tx_buf =
- devm_kzalloc(dev, sizeof(*tx_ring->tx_buf) * tx_ring->count,
+ devm_kcalloc(dev, sizeof(*tx_ring->tx_buf), tx_ring->count,
GFP_KERNEL);
if (!tx_ring->tx_buf)
return -ENOMEM;
@@ -475,7 +475,7 @@ int ice_setup_rx_ring(struct ice_rx_ring *rx_ring)
/* warn if we are about to overwrite the pointer */
WARN_ON(rx_ring->rx_buf);
rx_ring->rx_buf =
- devm_kzalloc(dev, sizeof(*rx_ring->rx_buf) * rx_ring->count,
+ devm_kcalloc(dev, sizeof(*rx_ring->rx_buf), rx_ring->count,
GFP_KERNEL);
if (!rx_ring->rx_buf)
return -ENOMEM;
@@ -1259,6 +1259,41 @@ construct_skb:
return failure ? budget : (int)total_rx_pkts;
}
+static void __ice_update_sample(struct ice_q_vector *q_vector,
+ struct ice_ring_container *rc,
+ struct dim_sample *sample,
+ bool is_tx)
+{
+ u64 packets = 0, bytes = 0;
+
+ if (is_tx) {
+ struct ice_tx_ring *tx_ring;
+
+ ice_for_each_tx_ring(tx_ring, *rc) {
+ packets += tx_ring->stats.pkts;
+ bytes += tx_ring->stats.bytes;
+ }
+ } else {
+ struct ice_rx_ring *rx_ring;
+
+ ice_for_each_rx_ring(rx_ring, *rc) {
+ packets += rx_ring->stats.pkts;
+ bytes += rx_ring->stats.bytes;
+ }
+ }
+
+ dim_update_sample(q_vector->total_events, packets, bytes, sample);
+ sample->comp_ctr = 0;
+
+ /* if dim settings get stale, like when not updated for 1
+ * second or longer, force it to start again. This addresses the
+ * frequent case of an idle queue being switched to by the
+ * scheduler. The 1,000 here means 1,000 milliseconds.
+ */
+ if (ktime_ms_delta(sample->time, rc->dim.start_sample.time) >= 1000)
+ rc->dim.state = DIM_START_MEASURE;
+}
+
/**
* ice_net_dim - Update net DIM algorithm
* @q_vector: the vector associated with the interrupt
@@ -1274,34 +1309,16 @@ static void ice_net_dim(struct ice_q_vector *q_vector)
struct ice_ring_container *rx = &q_vector->rx;
if (ITR_IS_DYNAMIC(tx)) {
- struct dim_sample dim_sample = {};
- u64 packets = 0, bytes = 0;
- struct ice_tx_ring *ring;
-
- ice_for_each_tx_ring(ring, q_vector->tx) {
- packets += ring->stats.pkts;
- bytes += ring->stats.bytes;
- }
-
- dim_update_sample(q_vector->total_events, packets, bytes,
- &dim_sample);
+ struct dim_sample dim_sample;
+ __ice_update_sample(q_vector, tx, &dim_sample, true);
net_dim(&tx->dim, dim_sample);
}
if (ITR_IS_DYNAMIC(rx)) {
- struct dim_sample dim_sample = {};
- u64 packets = 0, bytes = 0;
- struct ice_rx_ring *ring;
-
- ice_for_each_rx_ring(ring, q_vector->rx) {
- packets += ring->stats.pkts;
- bytes += ring->stats.bytes;
- }
-
- dim_update_sample(q_vector->total_events, packets, bytes,
- &dim_sample);
+ struct dim_sample dim_sample;
+ __ice_update_sample(q_vector, rx, &dim_sample, false);
net_dim(&rx->dim, dim_sample);
}
}
@@ -1328,15 +1345,14 @@ static u32 ice_buildreg_itr(u16 itr_idx, u16 itr)
}
/**
- * ice_update_ena_itr - Update ITR moderation and re-enable MSI-X interrupt
+ * ice_enable_interrupt - re-enable MSI-X interrupt
* @q_vector: the vector associated with the interrupt to enable
*
- * Update the net_dim() algorithm and re-enable the interrupt associated with
- * this vector.
- *
- * If the VSI is down, the interrupt will not be re-enabled.
+ * If the VSI is down, the interrupt will not be re-enabled. Also,
+ * when enabling the interrupt always reset the wb_on_itr to false
+ * and trigger a software interrupt to clean out internal state.
*/
-static void ice_update_ena_itr(struct ice_q_vector *q_vector)
+static void ice_enable_interrupt(struct ice_q_vector *q_vector)
{
struct ice_vsi *vsi = q_vector->vsi;
bool wb_en = q_vector->wb_on_itr;
@@ -1345,25 +1361,25 @@ static void ice_update_ena_itr(struct ice_q_vector *q_vector)
if (test_bit(ICE_DOWN, vsi->state))
return;
- /* When exiting WB_ON_ITR, let ITR resume its normal
- * interrupts-enabled path.
+ /* trigger an ITR delayed software interrupt when exiting busy poll, to
+ * make sure to catch any pending cleanups that might have been missed
+ * due to interrupt state transition. If busy poll or poll isn't
+ * enabled, then don't update ITR, and just enable the interrupt.
*/
- if (wb_en)
+ if (!wb_en) {
+ itr_val = ice_buildreg_itr(ICE_ITR_NONE, 0);
+ } else {
q_vector->wb_on_itr = false;
- /* This will do nothing if dynamic updates are not enabled. */
- ice_net_dim(q_vector);
-
- /* net_dim() updates ITR out-of-band using a work item */
- itr_val = ice_buildreg_itr(ICE_ITR_NONE, 0);
- /* trigger an immediate software interrupt when exiting
- * busy poll, to make sure to catch any pending cleanups
- * that might have been missed due to interrupt state
- * transition.
- */
- if (wb_en) {
+ /* do two things here with a single write. Set up the third ITR
+ * index to be used for software interrupt moderation, and then
+ * trigger a software interrupt with a rate limit of 20K on
+ * software interrupts, this will help avoid high interrupt
+ * loads due to frequently polling and exiting polling.
+ */
+ itr_val = ice_buildreg_itr(ICE_IDX_ITR2, ICE_ITR_20K);
itr_val |= GLINT_DYN_CTL_SWINT_TRIG_M |
- GLINT_DYN_CTL_SW_ITR_INDX_M |
+ ICE_IDX_ITR2 << GLINT_DYN_CTL_SW_ITR_INDX_S |
GLINT_DYN_CTL_SW_ITR_INDX_ENA_M;
}
wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), itr_val);
@@ -1482,10 +1498,12 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
/* Exit the polling mode, but don't re-enable interrupts if stack might
* poll us due to busy-polling
*/
- if (likely(napi_complete_done(napi, work_done)))
- ice_update_ena_itr(q_vector);
- else
+ if (likely(napi_complete_done(napi, work_done))) {
+ ice_net_dim(q_vector);
+ ice_enable_interrupt(q_vector);
+ } else {
ice_set_wb_on_itr(q_vector);
+ }
return min_t(int, work_done, budget - 1);
}
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index d90a3b7be713..8e3f9ec4e35b 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -5,6 +5,7 @@
#include "ice_base.h"
#include "ice_lib.h"
#include "ice_fltr.h"
+#include "ice_dcb_lib.h"
#include "ice_flow.h"
#include "ice_eswitch.h"
#include "ice_virtchnl_allowlist.h"
@@ -885,6 +886,40 @@ static int ice_calc_vf_first_vector_idx(struct ice_pf *pf, struct ice_vf *vf)
}
/**
+ * ice_vf_rebuild_host_tx_rate_cfg - re-apply the Tx rate limiting configuration
+ * @vf: VF to re-apply the configuration for
+ *
+ * Called after a VF VSI has been re-added/rebuild during reset. The PF driver
+ * needs to re-apply the host configured Tx rate limiting configuration.
+ */
+static int ice_vf_rebuild_host_tx_rate_cfg(struct ice_vf *vf)
+{
+ struct device *dev = ice_pf_to_dev(vf->pf);
+ struct ice_vsi *vsi = ice_get_vf_vsi(vf);
+ int err;
+
+ if (vf->min_tx_rate) {
+ err = ice_set_min_bw_limit(vsi, (u64)vf->min_tx_rate * 1000);
+ if (err) {
+ dev_err(dev, "failed to set min Tx rate to %d Mbps for VF %u, error %d\n",
+ vf->min_tx_rate, vf->vf_id, err);
+ return err;
+ }
+ }
+
+ if (vf->max_tx_rate) {
+ err = ice_set_max_bw_limit(vsi, (u64)vf->max_tx_rate * 1000);
+ if (err) {
+ dev_err(dev, "failed to set max Tx rate to %d Mbps for VF %u, error %d\n",
+ vf->max_tx_rate, vf->vf_id, err);
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+/**
* ice_vf_rebuild_host_vlan_cfg - add VLAN 0 filter or rebuild the Port VLAN
* @vf: VF to add MAC filters for
*
@@ -1420,6 +1455,11 @@ static void ice_vf_rebuild_host_cfg(struct ice_vf *vf)
if (ice_vf_rebuild_host_vlan_cfg(vf))
dev_err(dev, "failed to rebuild VLAN configuration for VF %u\n",
vf->vf_id);
+
+ if (ice_vf_rebuild_host_tx_rate_cfg(vf))
+ dev_err(dev, "failed to rebuild Tx rate limiting configuration for VF %u\n",
+ vf->vf_id);
+
/* rebuild aggregator node config for main VF VSI */
ice_vf_rebuild_aggregator_node_cfg(vsi);
}
@@ -1975,7 +2015,8 @@ static int ice_ena_vfs(struct ice_pf *pf, u16 num_vfs)
clear_bit(ICE_VF_DIS, pf->state);
- if (ice_eswitch_configure(pf))
+ ret = ice_eswitch_configure(pf);
+ if (ret)
goto err_unroll_sriov;
return 0;
@@ -4747,8 +4788,8 @@ ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi)
ivi->linkstate = IFLA_VF_LINK_STATE_ENABLE;
else
ivi->linkstate = IFLA_VF_LINK_STATE_DISABLE;
- ivi->max_tx_rate = vf->tx_rate;
- ivi->min_tx_rate = 0;
+ ivi->max_tx_rate = vf->max_tx_rate;
+ ivi->min_tx_rate = vf->min_tx_rate;
return 0;
}
@@ -4799,11 +4840,6 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
struct ice_vf *vf;
int ret;
- if (ice_is_eswitch_mode_switchdev(pf)) {
- dev_info(ice_pf_to_dev(pf), "Trusted VF is forbidden in switchdev mode\n");
- return -EOPNOTSUPP;
- }
-
if (ice_validate_vf_id(pf, vf_id))
return -EINVAL;
@@ -4863,6 +4899,11 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted)
struct ice_vf *vf;
int ret;
+ if (ice_is_eswitch_mode_switchdev(pf)) {
+ dev_info(ice_pf_to_dev(pf), "Trusted VF is forbidden in switchdev mode\n");
+ return -EOPNOTSUPP;
+ }
+
if (ice_validate_vf_id(pf, vf_id))
return -EINVAL;
@@ -4927,6 +4968,122 @@ int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state)
}
/**
+ * ice_calc_all_vfs_min_tx_rate - calculate cumulative min Tx rate on all VFs
+ * @pf: PF associated with VFs
+ */
+static int ice_calc_all_vfs_min_tx_rate(struct ice_pf *pf)
+{
+ int rate = 0, i;
+
+ ice_for_each_vf(pf, i)
+ rate += pf->vf[i].min_tx_rate;
+
+ return rate;
+}
+
+/**
+ * ice_min_tx_rate_oversubscribed - check if min Tx rate causes oversubscription
+ * @vf: VF trying to configure min_tx_rate
+ * @min_tx_rate: min Tx rate in Mbps
+ *
+ * Check if the min_tx_rate being passed in will cause oversubscription of total
+ * min_tx_rate based on the current link speed and all other VFs configured
+ * min_tx_rate
+ *
+ * Return true if the passed min_tx_rate would cause oversubscription, else
+ * return false
+ */
+static bool
+ice_min_tx_rate_oversubscribed(struct ice_vf *vf, int min_tx_rate)
+{
+ int link_speed_mbps = ice_get_link_speed_mbps(ice_get_vf_vsi(vf));
+ int all_vfs_min_tx_rate = ice_calc_all_vfs_min_tx_rate(vf->pf);
+
+ /* this VF's previous rate is being overwritten */
+ all_vfs_min_tx_rate -= vf->min_tx_rate;
+
+ if (all_vfs_min_tx_rate + min_tx_rate > link_speed_mbps) {
+ dev_err(ice_pf_to_dev(vf->pf), "min_tx_rate of %d Mbps on VF %u would cause oversubscription of %d Mbps based on the current link speed %d Mbps\n",
+ min_tx_rate, vf->vf_id,
+ all_vfs_min_tx_rate + min_tx_rate - link_speed_mbps,
+ link_speed_mbps);
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * ice_set_vf_bw - set min/max VF bandwidth
+ * @netdev: network interface device structure
+ * @vf_id: VF identifier
+ * @min_tx_rate: Minimum Tx rate in Mbps
+ * @max_tx_rate: Maximum Tx rate in Mbps
+ */
+int
+ice_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate,
+ int max_tx_rate)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct ice_vsi *vsi;
+ struct device *dev;
+ struct ice_vf *vf;
+ int ret;
+
+ dev = ice_pf_to_dev(pf);
+ if (ice_validate_vf_id(pf, vf_id))
+ return -EINVAL;
+
+ vf = &pf->vf[vf_id];
+ ret = ice_check_vf_ready_for_cfg(vf);
+ if (ret)
+ return ret;
+
+ vsi = ice_get_vf_vsi(vf);
+
+ /* when max_tx_rate is zero that means no max Tx rate limiting, so only
+ * check if max_tx_rate is non-zero
+ */
+ if (max_tx_rate && min_tx_rate > max_tx_rate) {
+ dev_err(dev, "Cannot set min Tx rate %d Mbps greater than max Tx rate %d Mbps\n",
+ min_tx_rate, max_tx_rate);
+ return -EINVAL;
+ }
+
+ if (min_tx_rate && ice_is_dcb_active(pf)) {
+ dev_err(dev, "DCB on PF is currently enabled. VF min Tx rate limiting not allowed on this PF.\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (ice_min_tx_rate_oversubscribed(vf, min_tx_rate))
+ return -EINVAL;
+
+ if (vf->min_tx_rate != (unsigned int)min_tx_rate) {
+ ret = ice_set_min_bw_limit(vsi, (u64)min_tx_rate * 1000);
+ if (ret) {
+ dev_err(dev, "Unable to set min-tx-rate for VF %d\n",
+ vf->vf_id);
+ return ret;
+ }
+
+ vf->min_tx_rate = min_tx_rate;
+ }
+
+ if (vf->max_tx_rate != (unsigned int)max_tx_rate) {
+ ret = ice_set_max_bw_limit(vsi, (u64)max_tx_rate * 1000);
+ if (ret) {
+ dev_err(dev, "Unable to set max-tx-rate for VF %d\n",
+ vf->vf_id);
+ return ret;
+ }
+
+ vf->max_tx_rate = max_tx_rate;
+ }
+
+ return 0;
+}
+
+/**
* ice_get_vf_stats - populate some stats for the VF
* @netdev: the netdev of the PF
* @vf_id: the host OS identifier (0-255)
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
index 3115284e5411..5ff93a08f54c 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
@@ -125,7 +125,8 @@ struct ice_vf {
* the main LAN VSI for the PF.
*/
u16 lan_vsi_num; /* ID as used by firmware */
- unsigned int tx_rate; /* Tx bandwidth limit in Mbps */
+ unsigned int min_tx_rate; /* Minimum Tx bandwidth limit in Mbps */
+ unsigned int max_tx_rate; /* Maximum Tx bandwidth limit in Mbps */
DECLARE_BITMAP(vf_states, ICE_VF_STATES_NBITS); /* VF runtime states */
u64 num_inval_msgs; /* number of continuous invalid msgs */
@@ -172,6 +173,10 @@ int
ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
__be16 vlan_proto);
+int
+ice_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate,
+ int max_tx_rate);
+
int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted);
int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state);
@@ -304,6 +309,14 @@ ice_set_vf_link_state(struct net_device __always_unused *netdev,
}
static inline int
+ice_set_vf_bw(struct net_device __always_unused *netdev,
+ int __always_unused vf_id, int __always_unused min_tx_rate,
+ int __always_unused max_tx_rate)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int
ice_calc_vf_reg_idx(struct ice_vf __always_unused *vf,
struct ice_q_vector __always_unused *q_vector)
{