summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e.h3
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_debugfs.c12
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_ethtool.c81
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c54
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c421
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.h68
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_txrx.c425
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_txrx.h67
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40evf.h3
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c46
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40evf_main.c22
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c24
12 files changed, 769 insertions, 457 deletions
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 46e9f4e0a02c..ebe795a7f5f9 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -824,6 +824,7 @@ struct i40e_q_vector {
struct i40e_ring_container rx;
struct i40e_ring_container tx;
+ u8 itr_countdown; /* when 0 should adjust adaptive ITR */
u8 num_ringpairs; /* total number of ring pairs in vector */
cpumask_t affinity_mask;
@@ -832,8 +833,6 @@ struct i40e_q_vector {
struct rcu_head rcu; /* to avoid race with update stats on free */
char name[I40E_INT_NAME_STR_LEN];
bool arm_wb_state;
-#define ITR_COUNTDOWN_START 100
- u8 itr_countdown; /* when 0 should adjust ITR */
} ____cacheline_internodealigned_in_smp;
/* lan device */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index 4c3b4243cf65..e9fc51bd6c95 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -315,9 +315,9 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
i, rx_ring->vsi,
rx_ring->q_vector);
dev_info(&pf->pdev->dev,
- " rx_rings[%i]: rx_itr_setting = %d (%s)\n",
- i, rx_ring->rx_itr_setting,
- ITR_IS_DYNAMIC(rx_ring->rx_itr_setting) ? "dynamic" : "fixed");
+ " rx_rings[%i]: itr_setting = %d (%s)\n",
+ i, rx_ring->itr_setting,
+ ITR_IS_DYNAMIC(rx_ring->itr_setting) ? "dynamic" : "fixed");
}
for (i = 0; i < vsi->num_queue_pairs; i++) {
struct i40e_ring *tx_ring = READ_ONCE(vsi->tx_rings[i]);
@@ -366,9 +366,9 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
" tx_rings[%i]: DCB tc = %d\n",
i, tx_ring->dcb_tc);
dev_info(&pf->pdev->dev,
- " tx_rings[%i]: tx_itr_setting = %d (%s)\n",
- i, tx_ring->tx_itr_setting,
- ITR_IS_DYNAMIC(tx_ring->tx_itr_setting) ? "dynamic" : "fixed");
+ " tx_rings[%i]: itr_setting = %d (%s)\n",
+ i, tx_ring->itr_setting,
+ ITR_IS_DYNAMIC(tx_ring->itr_setting) ? "dynamic" : "fixed");
}
rcu_read_unlock();
dev_info(&pf->pdev->dev,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 2f5bee713fef..29a7412b2fa6 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -2244,14 +2244,14 @@ static int __i40e_get_coalesce(struct net_device *netdev,
rx_ring = vsi->rx_rings[queue];
tx_ring = vsi->tx_rings[queue];
- if (ITR_IS_DYNAMIC(rx_ring->rx_itr_setting))
+ if (ITR_IS_DYNAMIC(rx_ring->itr_setting))
ec->use_adaptive_rx_coalesce = 1;
- if (ITR_IS_DYNAMIC(tx_ring->tx_itr_setting))
+ if (ITR_IS_DYNAMIC(tx_ring->itr_setting))
ec->use_adaptive_tx_coalesce = 1;
- ec->rx_coalesce_usecs = rx_ring->rx_itr_setting & ~I40E_ITR_DYNAMIC;
- ec->tx_coalesce_usecs = tx_ring->tx_itr_setting & ~I40E_ITR_DYNAMIC;
+ ec->rx_coalesce_usecs = rx_ring->itr_setting & ~I40E_ITR_DYNAMIC;
+ ec->tx_coalesce_usecs = tx_ring->itr_setting & ~I40E_ITR_DYNAMIC;
/* we use the _usecs_high to store/set the interrupt rate limit
* that the hardware supports, that almost but not quite
@@ -2311,34 +2311,35 @@ static void i40e_set_itr_per_queue(struct i40e_vsi *vsi,
struct i40e_pf *pf = vsi->back;
struct i40e_hw *hw = &pf->hw;
struct i40e_q_vector *q_vector;
- u16 vector, intrl;
+ u16 intrl;
intrl = i40e_intrl_usec_to_reg(vsi->int_rate_limit);
- rx_ring->rx_itr_setting = ec->rx_coalesce_usecs;
- tx_ring->tx_itr_setting = ec->tx_coalesce_usecs;
+ rx_ring->itr_setting = ITR_REG_ALIGN(ec->rx_coalesce_usecs);
+ tx_ring->itr_setting = ITR_REG_ALIGN(ec->tx_coalesce_usecs);
if (ec->use_adaptive_rx_coalesce)
- rx_ring->rx_itr_setting |= I40E_ITR_DYNAMIC;
+ rx_ring->itr_setting |= I40E_ITR_DYNAMIC;
else
- rx_ring->rx_itr_setting &= ~I40E_ITR_DYNAMIC;
+ rx_ring->itr_setting &= ~I40E_ITR_DYNAMIC;
if (ec->use_adaptive_tx_coalesce)
- tx_ring->tx_itr_setting |= I40E_ITR_DYNAMIC;
+ tx_ring->itr_setting |= I40E_ITR_DYNAMIC;
else
- tx_ring->tx_itr_setting &= ~I40E_ITR_DYNAMIC;
+ tx_ring->itr_setting &= ~I40E_ITR_DYNAMIC;
q_vector = rx_ring->q_vector;
- q_vector->rx.itr = ITR_TO_REG(rx_ring->rx_itr_setting);
- vector = vsi->base_vector + q_vector->v_idx;
- wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1), q_vector->rx.itr);
+ q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting);
q_vector = tx_ring->q_vector;
- q_vector->tx.itr = ITR_TO_REG(tx_ring->tx_itr_setting);
- vector = vsi->base_vector + q_vector->v_idx;
- wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1), q_vector->tx.itr);
+ q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting);
- wr32(hw, I40E_PFINT_RATEN(vector - 1), intrl);
+ /* The interrupt handler itself will take care of programming
+ * the Tx and Rx ITR values based on the values we have entered
+ * into the q_vector, no need to write the values now.
+ */
+
+ wr32(hw, I40E_PFINT_RATEN(q_vector->reg_idx), intrl);
i40e_flush(hw);
}
@@ -2364,11 +2365,11 @@ static int __i40e_set_coalesce(struct net_device *netdev,
vsi->work_limit = ec->tx_max_coalesced_frames_irq;
if (queue < 0) {
- cur_rx_itr = vsi->rx_rings[0]->rx_itr_setting;
- cur_tx_itr = vsi->tx_rings[0]->tx_itr_setting;
+ cur_rx_itr = vsi->rx_rings[0]->itr_setting;
+ cur_tx_itr = vsi->tx_rings[0]->itr_setting;
} else if (queue < vsi->num_queue_pairs) {
- cur_rx_itr = vsi->rx_rings[queue]->rx_itr_setting;
- cur_tx_itr = vsi->tx_rings[queue]->tx_itr_setting;
+ cur_rx_itr = vsi->rx_rings[queue]->itr_setting;
+ cur_tx_itr = vsi->tx_rings[queue]->itr_setting;
} else {
netif_info(pf, drv, netdev, "Invalid queue value, queue range is 0 - %d\n",
vsi->num_queue_pairs - 1);
@@ -2396,7 +2397,7 @@ static int __i40e_set_coalesce(struct net_device *netdev,
return -EINVAL;
}
- if (ec->rx_coalesce_usecs > (I40E_MAX_ITR << 1)) {
+ if (ec->rx_coalesce_usecs > I40E_MAX_ITR) {
netif_info(pf, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n");
return -EINVAL;
}
@@ -2407,16 +2408,16 @@ static int __i40e_set_coalesce(struct net_device *netdev,
return -EINVAL;
}
- if (ec->tx_coalesce_usecs > (I40E_MAX_ITR << 1)) {
+ if (ec->tx_coalesce_usecs > I40E_MAX_ITR) {
netif_info(pf, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n");
return -EINVAL;
}
if (ec->use_adaptive_rx_coalesce && !cur_rx_itr)
- ec->rx_coalesce_usecs = I40E_MIN_ITR << 1;
+ ec->rx_coalesce_usecs = I40E_MIN_ITR;
if (ec->use_adaptive_tx_coalesce && !cur_tx_itr)
- ec->tx_coalesce_usecs = I40E_MIN_ITR << 1;
+ ec->tx_coalesce_usecs = I40E_MIN_ITR;
intrl_reg = i40e_intrl_usec_to_reg(ec->rx_coalesce_usecs_high);
vsi->int_rate_limit = INTRL_REG_TO_USEC(intrl_reg);
@@ -4406,6 +4407,8 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
}
flags_complete:
+ changed_flags = orig_flags ^ new_flags;
+
/* Before we finalize any flag changes, we need to perform some
* checks to ensure that the changes are supported and safe.
*/
@@ -4415,13 +4418,17 @@ flags_complete:
!(pf->hw_features & I40E_HW_ATR_EVICT_CAPABLE))
return -EOPNOTSUPP;
- /* Disable FW LLDP not supported if NPAR active or if FW
- * API version < 1.7
+ /* If the driver detected FW LLDP was disabled on init, this flag could
+ * be set, however we do not support _changing_ the flag if NPAR is
+ * enabled or FW API version < 1.7. There are situations where older
+ * FW versions/NPAR enabled PFs could disable LLDP, however we _must_
+ * not allow the user to enable/disable LLDP with this flag on
+ * unsupported FW versions.
*/
- if (new_flags & I40E_FLAG_DISABLE_FW_LLDP) {
+ if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP) {
if (pf->hw.func_caps.npar_enable) {
dev_warn(&pf->pdev->dev,
- "Unable to stop FW LLDP if NPAR active\n");
+ "Unable to change FW LLDP if NPAR active\n");
return -EOPNOTSUPP;
}
@@ -4429,7 +4436,7 @@ flags_complete:
(pf->hw.aq.api_maj_ver == 1 &&
pf->hw.aq.api_min_ver < 7)) {
dev_warn(&pf->pdev->dev,
- "FW ver does not support stopping FW LLDP\n");
+ "FW ver does not support changing FW LLDP\n");
return -EOPNOTSUPP;
}
}
@@ -4439,6 +4446,10 @@ flags_complete:
* something else has modified the flags variable since we copied it
* originally. We'll just punt with an error and log something in the
* message buffer.
+ *
+ * This is the point of no return for this function. We need to have
+ * checked any discrepancies or misconfigurations and returned
+ * EOPNOTSUPP before updating pf->flags here.
*/
if (cmpxchg64(&pf->flags, orig_flags, new_flags) != orig_flags) {
dev_warn(&pf->pdev->dev,
@@ -4446,8 +4457,6 @@ flags_complete:
return -EAGAIN;
}
- changed_flags = orig_flags ^ new_flags;
-
/* Process any additional changes needed as a result of flag changes.
* The changed_flags value reflects the list of bits that were
* changed in the code above.
@@ -4479,6 +4488,12 @@ flags_complete:
}
}
+ if ((changed_flags & pf->flags &
+ I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED) &&
+ (pf->flags & I40E_FLAG_MFP_ENABLED))
+ dev_warn(&pf->pdev->dev,
+ "Turning on link-down-on-close flag may affect other partitions\n");
+
if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP) {
if (pf->flags & I40E_FLAG_DISABLE_FW_LLDP) {
struct i40e_dcbx_config *dcbcfg;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index e31adbc75f9c..70ecd9c3a163 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -3449,15 +3449,20 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi)
for (i = 0; i < vsi->num_q_vectors; i++, vector++) {
struct i40e_q_vector *q_vector = vsi->q_vectors[i];
- q_vector->itr_countdown = ITR_COUNTDOWN_START;
- q_vector->rx.itr = ITR_TO_REG(vsi->rx_rings[i]->rx_itr_setting);
- q_vector->rx.latency_range = I40E_LOW_LATENCY;
+ q_vector->rx.next_update = jiffies + 1;
+ q_vector->rx.target_itr =
+ ITR_TO_REG(vsi->rx_rings[i]->itr_setting);
wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1),
- q_vector->rx.itr);
- q_vector->tx.itr = ITR_TO_REG(vsi->tx_rings[i]->tx_itr_setting);
- q_vector->tx.latency_range = I40E_LOW_LATENCY;
+ q_vector->rx.target_itr);
+ q_vector->rx.current_itr = q_vector->rx.target_itr;
+
+ q_vector->tx.next_update = jiffies + 1;
+ q_vector->tx.target_itr =
+ ITR_TO_REG(vsi->tx_rings[i]->itr_setting);
wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1),
- q_vector->tx.itr);
+ q_vector->tx.target_itr);
+ q_vector->tx.current_itr = q_vector->tx.target_itr;
+
wr32(hw, I40E_PFINT_RATEN(vector - 1),
i40e_intrl_usec_to_reg(vsi->int_rate_limit));
@@ -3558,13 +3563,14 @@ static void i40e_configure_msi_and_legacy(struct i40e_vsi *vsi)
u32 val;
/* set the ITR configuration */
- q_vector->itr_countdown = ITR_COUNTDOWN_START;
- q_vector->rx.itr = ITR_TO_REG(vsi->rx_rings[0]->rx_itr_setting);
- q_vector->rx.latency_range = I40E_LOW_LATENCY;
- wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.itr);
- q_vector->tx.itr = ITR_TO_REG(vsi->tx_rings[0]->tx_itr_setting);
- q_vector->tx.latency_range = I40E_LOW_LATENCY;
- wr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.itr);
+ q_vector->rx.next_update = jiffies + 1;
+ q_vector->rx.target_itr = ITR_TO_REG(vsi->rx_rings[0]->itr_setting);
+ wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.target_itr);
+ q_vector->rx.current_itr = q_vector->rx.target_itr;
+ q_vector->tx.next_update = jiffies + 1;
+ q_vector->tx.target_itr = ITR_TO_REG(vsi->tx_rings[0]->itr_setting);
+ wr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.target_itr);
+ q_vector->tx.current_itr = q_vector->tx.target_itr;
i40e_enable_misc_int_causes(pf);
@@ -9215,6 +9221,17 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
}
i40e_get_oem_version(&pf->hw);
+ if (test_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state) &&
+ ((hw->aq.fw_maj_ver == 4 && hw->aq.fw_min_ver <= 33) ||
+ hw->aq.fw_maj_ver < 4) && hw->mac.type == I40E_MAC_XL710) {
+ /* The following delay is necessary for 4.33 firmware and older
+ * to recover after EMP reset. 200 ms should suffice but we
+ * put here 300 ms to be sure that FW is ready to operate
+ * after reset.
+ */
+ mdelay(300);
+ }
+
/* re-verify the eeprom if we just had an EMP reset */
if (test_and_clear_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state))
i40e_verify_eeprom(pf);
@@ -10018,7 +10035,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi)
ring->dcb_tc = 0;
if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE)
ring->flags = I40E_TXR_FLAGS_WB_ON_ITR;
- ring->tx_itr_setting = pf->tx_itr_default;
+ ring->itr_setting = pf->tx_itr_default;
vsi->tx_rings[i] = ring++;
if (!i40e_enabled_xdp_vsi(vsi))
@@ -10036,7 +10053,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi)
if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE)
ring->flags = I40E_TXR_FLAGS_WB_ON_ITR;
set_ring_xdp(ring);
- ring->tx_itr_setting = pf->tx_itr_default;
+ ring->itr_setting = pf->tx_itr_default;
vsi->xdp_rings[i] = ring++;
setup_rx:
@@ -10049,7 +10066,7 @@ setup_rx:
ring->count = vsi->num_desc;
ring->size = 0;
ring->dcb_tc = 0;
- ring->rx_itr_setting = pf->rx_itr_default;
+ ring->itr_setting = pf->rx_itr_default;
vsi->rx_rings[i] = ring;
}
@@ -10328,9 +10345,6 @@ static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx, int cpu)
netif_napi_add(vsi->netdev, &q_vector->napi,
i40e_napi_poll, NAPI_POLL_WEIGHT);
- q_vector->rx.latency_range = I40E_LOW_LATENCY;
- q_vector->tx.latency_range = I40E_LOW_LATENCY;
-
/* tie q_vector and vsi together */
vsi->q_vectors[v_idx] = q_vector;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index e554aa6cf070..1ec9b1d8023d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -995,99 +995,241 @@ void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
}
}
+static inline bool i40e_container_is_rx(struct i40e_q_vector *q_vector,
+ struct i40e_ring_container *rc)
+{
+ return &q_vector->rx == rc;
+}
+
+static inline unsigned int i40e_itr_divisor(struct i40e_q_vector *q_vector)
+{
+ unsigned int divisor;
+
+ switch (q_vector->vsi->back->hw.phy.link_info.link_speed) {
+ case I40E_LINK_SPEED_40GB:
+ divisor = I40E_ITR_ADAPTIVE_MIN_INC * 1024;
+ break;
+ case I40E_LINK_SPEED_25GB:
+ case I40E_LINK_SPEED_20GB:
+ divisor = I40E_ITR_ADAPTIVE_MIN_INC * 512;
+ break;
+ default:
+ case I40E_LINK_SPEED_10GB:
+ divisor = I40E_ITR_ADAPTIVE_MIN_INC * 256;
+ break;
+ case I40E_LINK_SPEED_1GB:
+ case I40E_LINK_SPEED_100MB:
+ divisor = I40E_ITR_ADAPTIVE_MIN_INC * 32;
+ break;
+ }
+
+ return divisor;
+}
+
/**
- * i40e_set_new_dynamic_itr - Find new ITR level
+ * i40e_update_itr - update the dynamic ITR value based on statistics
+ * @q_vector: structure containing interrupt and ring information
* @rc: structure containing ring performance data
*
- * Returns true if ITR changed, false if not
- *
- * Stores a new ITR value based on packets and byte counts during
- * the last interrupt. The advantage of per interrupt computation
- * is faster updates and more accurate ITR for the current traffic
- * pattern. Constants in this function were computed based on
- * theoretical maximum wire speed and thresholds were set based on
- * testing data as well as attempting to minimize response time
+ * Stores a new ITR value based on packets and byte
+ * counts during the last interrupt. The advantage of per interrupt
+ * computation is faster updates and more accurate ITR for the current
+ * traffic pattern. Constants in this function were computed
+ * based on theoretical maximum wire speed and thresholds were set based
+ * on testing data as well as attempting to minimize response time
* while increasing bulk throughput.
**/
-static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
+static void i40e_update_itr(struct i40e_q_vector *q_vector,
+ struct i40e_ring_container *rc)
{
- enum i40e_latency_range new_latency_range = rc->latency_range;
- u32 new_itr = rc->itr;
- int bytes_per_usec;
- unsigned int usecs, estimated_usecs;
+ unsigned int avg_wire_size, packets, bytes, itr;
+ unsigned long next_update = jiffies;
- if (rc->total_packets == 0 || !rc->itr)
- return false;
+ /* If we don't have any rings just leave ourselves set for maximum
+ * possible latency so we take ourselves out of the equation.
+ */
+ if (!rc->ring || !ITR_IS_DYNAMIC(rc->ring->itr_setting))
+ return;
- usecs = (rc->itr << 1) * ITR_COUNTDOWN_START;
- bytes_per_usec = rc->total_bytes / usecs;
+ /* For Rx we want to push the delay up and default to low latency.
+ * for Tx we want to pull the delay down and default to high latency.
+ */
+ itr = i40e_container_is_rx(q_vector, rc) ?
+ I40E_ITR_ADAPTIVE_MIN_USECS | I40E_ITR_ADAPTIVE_LATENCY :
+ I40E_ITR_ADAPTIVE_MAX_USECS | I40E_ITR_ADAPTIVE_LATENCY;
+
+ /* If we didn't update within up to 1 - 2 jiffies we can assume
+ * that either packets are coming in so slow there hasn't been
+ * any work, or that there is so much work that NAPI is dealing
+ * with interrupt moderation and we don't need to do anything.
+ */
+ if (time_after(next_update, rc->next_update))
+ goto clear_counts;
+
+ /* If itr_countdown is set it means we programmed an ITR within
+ * the last 4 interrupt cycles. This has a side effect of us
+ * potentially firing an early interrupt. In order to work around
+ * this we need to throw out any data received for a few
+ * interrupts following the update.
+ */
+ if (q_vector->itr_countdown) {
+ itr = rc->target_itr;
+ goto clear_counts;
+ }
+
+ packets = rc->total_packets;
+ bytes = rc->total_bytes;
- /* The calculations in this algorithm depend on interrupts actually
- * firing at the ITR rate. This may not happen if the packet rate is
- * really low, or if we've been napi polling. Check to make sure
- * that's not the case before we continue.
+ if (i40e_container_is_rx(q_vector, rc)) {
+ /* If Rx there are 1 to 4 packets and bytes are less than
+ * 9000 assume insufficient data to use bulk rate limiting
+ * approach unless Tx is already in bulk rate limiting. We
+ * are likely latency driven.
+ */
+ if (packets && packets < 4 && bytes < 9000 &&
+ (q_vector->tx.target_itr & I40E_ITR_ADAPTIVE_LATENCY)) {
+ itr = I40E_ITR_ADAPTIVE_LATENCY;
+ goto adjust_by_size;
+ }
+ } else if (packets < 4) {
+ /* If we have Tx and Rx ITR maxed and Tx ITR is running in
+ * bulk mode and we are receiving 4 or fewer packets just
+ * reset the ITR_ADAPTIVE_LATENCY bit for latency mode so
+ * that the Rx can relax.
+ */
+ if (rc->target_itr == I40E_ITR_ADAPTIVE_MAX_USECS &&
+ (q_vector->rx.target_itr & I40E_ITR_MASK) ==
+ I40E_ITR_ADAPTIVE_MAX_USECS)
+ goto clear_counts;
+ } else if (packets > 32) {
+ /* If we have processed over 32 packets in a single interrupt
+ * for Tx assume we need to switch over to "bulk" mode.
+ */
+ rc->target_itr &= ~I40E_ITR_ADAPTIVE_LATENCY;
+ }
+
+ /* We have no packets to actually measure against. This means
+ * either one of the other queues on this vector is active or
+ * we are a Tx queue doing TSO with too high of an interrupt rate.
+ *
+ * Between 4 and 56 we can assume that our current interrupt delay
+ * is only slightly too low. As such we should increase it by a small
+ * fixed amount.
*/
- estimated_usecs = jiffies_to_usecs(jiffies - rc->last_itr_update);
- if (estimated_usecs > usecs) {
- new_latency_range = I40E_LOW_LATENCY;
- goto reset_latency;
+ if (packets < 56) {
+ itr = rc->target_itr + I40E_ITR_ADAPTIVE_MIN_INC;
+ if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) {
+ itr &= I40E_ITR_ADAPTIVE_LATENCY;
+ itr += I40E_ITR_ADAPTIVE_MAX_USECS;
+ }
+ goto clear_counts;
}
- /* simple throttlerate management
- * 0-10MB/s lowest (50000 ints/s)
- * 10-20MB/s low (20000 ints/s)
- * 20-1249MB/s bulk (18000 ints/s)
+ if (packets <= 256) {
+ itr = min(q_vector->tx.current_itr, q_vector->rx.current_itr);
+ itr &= I40E_ITR_MASK;
+
+ /* Between 56 and 112 is our "goldilocks" zone where we are
+ * working out "just right". Just report that our current
+ * ITR is good for us.
+ */
+ if (packets <= 112)
+ goto clear_counts;
+
+ /* If packet count is 128 or greater we are likely looking
+ * at a slight overrun of the delay we want. Try halving
+ * our delay to see if that will cut the number of packets
+ * in half per interrupt.
+ */
+ itr /= 2;
+ itr &= I40E_ITR_MASK;
+ if (itr < I40E_ITR_ADAPTIVE_MIN_USECS)
+ itr = I40E_ITR_ADAPTIVE_MIN_USECS;
+
+ goto clear_counts;
+ }
+
+ /* The paths below assume we are dealing with a bulk ITR since
+ * number of packets is greater than 256. We are just going to have
+ * to compute a value and try to bring the count under control,
+ * though for smaller packet sizes there isn't much we can do as
+ * NAPI polling will likely be kicking in sooner rather than later.
+ */
+ itr = I40E_ITR_ADAPTIVE_BULK;
+
+adjust_by_size:
+ /* If packet counts are 256 or greater we can assume we have a gross
+ * overestimation of what the rate should be. Instead of trying to fine
+ * tune it just use the formula below to try and dial in an exact value
+ * give the current packet size of the frame.
+ */
+ avg_wire_size = bytes / packets;
+
+ /* The following is a crude approximation of:
+ * wmem_default / (size + overhead) = desired_pkts_per_int
+ * rate / bits_per_byte / (size + ethernet overhead) = pkt_rate
+ * (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value
*
- * The math works out because the divisor is in 10^(-6) which
- * turns the bytes/us input value into MB/s values, but
- * make sure to use usecs, as the register values written
- * are in 2 usec increments in the ITR registers, and make sure
- * to use the smoothed values that the countdown timer gives us.
+ * Assuming wmem_default is 212992 and overhead is 640 bytes per
+ * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the
+ * formula down to
+ *
+ * (170 * (size + 24)) / (size + 640) = ITR
+ *
+ * We first do some math on the packet size and then finally bitshift
+ * by 8 after rounding up. We also have to account for PCIe link speed
+ * difference as ITR scales based on this.
*/
- switch (new_latency_range) {
- case I40E_LOWEST_LATENCY:
- if (bytes_per_usec > 10)
- new_latency_range = I40E_LOW_LATENCY;
- break;
- case I40E_LOW_LATENCY:
- if (bytes_per_usec > 20)
- new_latency_range = I40E_BULK_LATENCY;
- else if (bytes_per_usec <= 10)
- new_latency_range = I40E_LOWEST_LATENCY;
- break;
- case I40E_BULK_LATENCY:
- default:
- if (bytes_per_usec <= 20)
- new_latency_range = I40E_LOW_LATENCY;
- break;
+ if (avg_wire_size <= 60) {
+ /* Start at 250k ints/sec */
+ avg_wire_size = 4096;
+ } else if (avg_wire_size <= 380) {
+ /* 250K ints/sec to 60K ints/sec */
+ avg_wire_size *= 40;
+ avg_wire_size += 1696;
+ } else if (avg_wire_size <= 1084) {
+ /* 60K ints/sec to 36K ints/sec */
+ avg_wire_size *= 15;
+ avg_wire_size += 11452;
+ } else if (avg_wire_size <= 1980) {
+ /* 36K ints/sec to 30K ints/sec */
+ avg_wire_size *= 5;
+ avg_wire_size += 22420;
+ } else {
+ /* plateau at a limit of 30K ints/sec */
+ avg_wire_size = 32256;
}
-reset_latency:
- rc->latency_range = new_latency_range;
+ /* If we are in low latency mode halve our delay which doubles the
+ * rate to somewhere between 100K to 16K ints/sec
+ */
+ if (itr & I40E_ITR_ADAPTIVE_LATENCY)
+ avg_wire_size /= 2;
- switch (new_latency_range) {
- case I40E_LOWEST_LATENCY:
- new_itr = I40E_ITR_50K;
- break;
- case I40E_LOW_LATENCY:
- new_itr = I40E_ITR_20K;
- break;
- case I40E_BULK_LATENCY:
- new_itr = I40E_ITR_18K;
- break;
- default:
- break;
+ /* Resultant value is 256 times larger than it needs to be. This
+ * gives us room to adjust the value as needed to either increase
+ * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc.
+ *
+ * Use addition as we have already recorded the new latency flag
+ * for the ITR value.
+ */
+ itr += DIV_ROUND_UP(avg_wire_size, i40e_itr_divisor(q_vector)) *
+ I40E_ITR_ADAPTIVE_MIN_INC;
+
+ if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) {
+ itr &= I40E_ITR_ADAPTIVE_LATENCY;
+ itr += I40E_ITR_ADAPTIVE_MAX_USECS;
}
+clear_counts:
+ /* write back value */
+ rc->target_itr = itr;
+
+ /* next update should occur within next jiffy */
+ rc->next_update = next_update + 1;
+
rc->total_bytes = 0;
rc->total_packets = 0;
- rc->last_itr_update = jiffies;
-
- if (new_itr != rc->itr) {
- rc->itr = new_itr;
- return true;
- }
- return false;
}
/**
@@ -1991,7 +2133,7 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
* @rx_buffer: rx buffer to pull data from
*
* This function will clean up the contents of the rx_buffer. It will
- * either recycle the bufer or unmap it and free the associated resources.
+ * either recycle the buffer or unmap it and free the associated resources.
*/
static void i40e_put_rx_buffer(struct i40e_ring *rx_ring,
struct i40e_rx_buffer *rx_buffer)
@@ -2274,29 +2416,45 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
return failure ? budget : (int)total_rx_packets;
}
-static u32 i40e_buildreg_itr(const int type, const u16 itr)
+static inline u32 i40e_buildreg_itr(const int type, u16 itr)
{
u32 val;
+ /* We don't bother with setting the CLEARPBA bit as the data sheet
+ * points out doing so is "meaningless since it was already
+ * auto-cleared". The auto-clearing happens when the interrupt is
+ * asserted.
+ *
+ * Hardware errata 28 for also indicates that writing to a
+ * xxINT_DYN_CTLx CSR with INTENA_MSK (bit 31) set to 0 will clear
+ * an event in the PBA anyway so we need to rely on the automask
+ * to hold pending events for us until the interrupt is re-enabled
+ *
+ * The itr value is reported in microseconds, and the register
+ * value is recorded in 2 microsecond units. For this reason we
+ * only need to shift by the interval shift - 1 instead of the
+ * full value.
+ */
+ itr &= I40E_ITR_MASK;
+
val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
- I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
(type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) |
- (itr << I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT);
+ (itr << (I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT - 1));
return val;
}
/* a small macro to shorten up some long lines */
#define INTREG I40E_PFINT_DYN_CTLN
-static inline int get_rx_itr(struct i40e_vsi *vsi, int idx)
-{
- return vsi->rx_rings[idx]->rx_itr_setting;
-}
-static inline int get_tx_itr(struct i40e_vsi *vsi, int idx)
-{
- return vsi->tx_rings[idx]->tx_itr_setting;
-}
+/* The act of updating the ITR will cause it to immediately trigger. In order
+ * to prevent this from throwing off adaptive update statistics we defer the
+ * update so that it can only happen so often. So after either Tx or Rx are
+ * updated we make the adaptive scheme wait until either the ITR completely
+ * expires via the next_update expiration or we have been through at least
+ * 3 interrupts.
+ */
+#define ITR_COUNTDOWN_START 3
/**
* i40e_update_enable_itr - Update itr and re-enable MSIX interrupt
@@ -2308,10 +2466,7 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
struct i40e_q_vector *q_vector)
{
struct i40e_hw *hw = &vsi->back->hw;
- bool rx = false, tx = false;
- u32 rxval, txval;
- int idx = q_vector->v_idx;
- int rx_itr_setting, tx_itr_setting;
+ u32 intval;
/* If we don't have MSIX, then we only need to re-enable icr0 */
if (!(vsi->back->flags & I40E_FLAG_MSIX_ENABLED)) {
@@ -2319,65 +2474,49 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
return;
}
- /* avoid dynamic calculation if in countdown mode OR if
- * all dynamic is disabled
- */
- rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
-
- rx_itr_setting = get_rx_itr(vsi, idx);
- tx_itr_setting = get_tx_itr(vsi, idx);
-
- if (q_vector->itr_countdown > 0 ||
- (!ITR_IS_DYNAMIC(rx_itr_setting) &&
- !ITR_IS_DYNAMIC(tx_itr_setting))) {
- goto enable_int;
- }
-
- if (ITR_IS_DYNAMIC(rx_itr_setting)) {
- rx = i40e_set_new_dynamic_itr(&q_vector->rx);
- rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr);
- }
-
- if (ITR_IS_DYNAMIC(tx_itr_setting)) {
- tx = i40e_set_new_dynamic_itr(&q_vector->tx);
- txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr);
- }
+ /* These will do nothing if dynamic updates are not enabled */
+ i40e_update_itr(q_vector, &q_vector->tx);
+ i40e_update_itr(q_vector, &q_vector->rx);
- if (rx || tx) {
- /* get the higher of the two ITR adjustments and
- * use the same value for both ITR registers
- * when in adaptive mode (Rx and/or Tx)
- */
- u16 itr = max(q_vector->tx.itr, q_vector->rx.itr);
-
- q_vector->tx.itr = q_vector->rx.itr = itr;
- txval = i40e_buildreg_itr(I40E_TX_ITR, itr);
- tx = true;
- rxval = i40e_buildreg_itr(I40E_RX_ITR, itr);
- rx = true;
- }
-
- /* only need to enable the interrupt once, but need
- * to possibly update both ITR values
+ /* This block of logic allows us to get away with only updating
+ * one ITR value with each interrupt. The idea is to perform a
+ * pseudo-lazy update with the following criteria.
+ *
+ * 1. Rx is given higher priority than Tx if both are in same state
+ * 2. If we must reduce an ITR that is given highest priority.
+ * 3. We then give priority to increasing ITR based on amount.
*/
- if (rx) {
- /* set the INTENA_MSK_MASK so that this first write
- * won't actually enable the interrupt, instead just
- * updating the ITR (it's bit 31 PF and VF)
+ if (q_vector->rx.target_itr < q_vector->rx.current_itr) {
+ /* Rx ITR needs to be reduced, this is highest priority */
+ intval = i40e_buildreg_itr(I40E_RX_ITR,
+ q_vector->rx.target_itr);
+ q_vector->rx.current_itr = q_vector->rx.target_itr;
+ q_vector->itr_countdown = ITR_COUNTDOWN_START;
+ } else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) ||
+ ((q_vector->rx.target_itr - q_vector->rx.current_itr) <
+ (q_vector->tx.target_itr - q_vector->tx.current_itr))) {
+ /* Tx ITR needs to be reduced, this is second priority
+ * Tx ITR needs to be increased more than Rx, fourth priority
*/
- rxval |= BIT(31);
- /* don't check _DOWN because interrupt isn't being enabled */
- wr32(hw, INTREG(q_vector->reg_idx), rxval);
+ intval = i40e_buildreg_itr(I40E_TX_ITR,
+ q_vector->tx.target_itr);
+ q_vector->tx.current_itr = q_vector->tx.target_itr;
+ q_vector->itr_countdown = ITR_COUNTDOWN_START;
+ } else if (q_vector->rx.current_itr != q_vector->rx.target_itr) {
+ /* Rx ITR needs to be increased, third priority */
+ intval = i40e_buildreg_itr(I40E_RX_ITR,
+ q_vector->rx.target_itr);
+ q_vector->rx.current_itr = q_vector->rx.target_itr;
+ q_vector->itr_countdown = ITR_COUNTDOWN_START;
+ } else {
+ /* No ITR update, lowest priority */
+ intval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
+ if (q_vector->itr_countdown)
+ q_vector->itr_countdown--;
}
-enable_int:
if (!test_bit(__I40E_VSI_DOWN, vsi->state))
- wr32(hw, INTREG(q_vector->reg_idx), txval);
-
- if (q_vector->itr_countdown)
- q_vector->itr_countdown--;
- else
- q_vector->itr_countdown = ITR_COUNTDOWN_START;
+ wr32(hw, INTREG(q_vector->reg_idx), intval);
}
/**
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index 701b708628b0..f75a8fe68fcf 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -30,32 +30,37 @@
#include <net/xdp.h>
/* Interrupt Throttling and Rate Limiting Goodies */
-
-#define I40E_MAX_ITR 0x0FF0 /* reg uses 2 usec resolution */
-#define I40E_MIN_ITR 0x0001 /* reg uses 2 usec resolution */
-#define I40E_ITR_100K 0x0005
-#define I40E_ITR_50K 0x000A
-#define I40E_ITR_20K 0x0019
-#define I40E_ITR_18K 0x001B
-#define I40E_ITR_8K 0x003E
-#define I40E_ITR_4K 0x007A
-#define I40E_MAX_INTRL 0x3B /* reg uses 4 usec resolution */
-#define I40E_ITR_RX_DEF (ITR_REG_TO_USEC(I40E_ITR_20K) | \
- I40E_ITR_DYNAMIC)
-#define I40E_ITR_TX_DEF (ITR_REG_TO_USEC(I40E_ITR_20K) | \
- I40E_ITR_DYNAMIC)
-#define I40E_ITR_DYNAMIC 0x8000 /* use top bit as a flag */
-#define I40E_MIN_INT_RATE 250 /* ~= 1000000 / (I40E_MAX_ITR * 2) */
-#define I40E_MAX_INT_RATE 500000 /* == 1000000 / (I40E_MIN_ITR * 2) */
#define I40E_DEFAULT_IRQ_WORK 256
-#define ITR_TO_REG(setting) ((setting & ~I40E_ITR_DYNAMIC) >> 1)
-#define ITR_IS_DYNAMIC(setting) (!!(setting & I40E_ITR_DYNAMIC))
-#define ITR_REG_TO_USEC(itr_reg) (itr_reg << 1)
+
+/* The datasheet for the X710 and XL710 indicate that the maximum value for
+ * the ITR is 8160usec which is then called out as 0xFF0 with a 2usec
+ * resolution. 8160 is 0x1FE0 when written out in hex. So instead of storing
+ * the register value which is divided by 2 lets use the actual values and
+ * avoid an excessive amount of translation.
+ */
+#define I40E_ITR_DYNAMIC 0x8000 /* use top bit as a flag */
+#define I40E_ITR_MASK 0x1FFE /* mask for ITR register value */
+#define I40E_MIN_ITR 2 /* reg uses 2 usec resolution */
+#define I40E_ITR_100K 10 /* all values below must be even */
+#define I40E_ITR_50K 20
+#define I40E_ITR_20K 50
+#define I40E_ITR_18K 60
+#define I40E_ITR_8K 122
+#define I40E_MAX_ITR 8160 /* maximum value as per datasheet */
+#define ITR_TO_REG(setting) ((setting) & ~I40E_ITR_DYNAMIC)
+#define ITR_REG_ALIGN(setting) __ALIGN_MASK(setting, ~I40E_ITR_MASK)
+#define ITR_IS_DYNAMIC(setting) (!!((setting) & I40E_ITR_DYNAMIC))
+
+#define I40E_ITR_RX_DEF (I40E_ITR_20K | I40E_ITR_DYNAMIC)
+#define I40E_ITR_TX_DEF (I40E_ITR_20K | I40E_ITR_DYNAMIC)
+
/* 0x40 is the enable bit for interrupt rate limiting, and must be set if
* the value of the rate limit is non-zero
*/
#define INTRL_ENA BIT(6)
+#define I40E_MAX_INTRL 0x3B /* reg uses 4 usec resolution */
#define INTRL_REG_TO_USEC(intrl) ((intrl & ~INTRL_ENA) << 2)
+
/**
* i40e_intrl_usec_to_reg - convert interrupt rate limit to register
* @intrl: interrupt rate limit to convert
@@ -382,8 +387,7 @@ struct i40e_ring {
* these values always store the USER setting, and must be converted
* before programming to a register.
*/
- u16 rx_itr_setting;
- u16 tx_itr_setting;
+ u16 itr_setting;
u16 count; /* Number of descriptors */
u16 reg_idx; /* HW register index of the ring */
@@ -459,21 +463,21 @@ static inline void set_ring_xdp(struct i40e_ring *ring)
ring->flags |= I40E_TXR_FLAGS_XDP;
}
-enum i40e_latency_range {
- I40E_LOWEST_LATENCY = 0,
- I40E_LOW_LATENCY = 1,
- I40E_BULK_LATENCY = 2,
-};
+#define I40E_ITR_ADAPTIVE_MIN_INC 0x0002
+#define I40E_ITR_ADAPTIVE_MIN_USECS 0x0002
+#define I40E_ITR_ADAPTIVE_MAX_USECS 0x007e
+#define I40E_ITR_ADAPTIVE_LATENCY 0x8000
+#define I40E_ITR_ADAPTIVE_BULK 0x0000
+#define ITR_IS_BULK(x) (!((x) & I40E_ITR_ADAPTIVE_LATENCY))
struct i40e_ring_container {
- /* array of pointers to rings */
- struct i40e_ring *ring;
+ struct i40e_ring *ring; /* pointer to linked list of ring(s) */
+ unsigned long next_update; /* jiffies value of next update */
unsigned int total_bytes; /* total bytes processed this int */
unsigned int total_packets; /* total packets processed this int */
- unsigned long last_itr_update; /* jiffies of last ITR update */
u16 count;
- enum i40e_latency_range latency_range;
- u16 itr;
+ u16 target_itr; /* target ITR setting for ring(s) */
+ u16 current_itr; /* current ITR setting for ring(s) */
};
/* iterator for handling rings in ring container */
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index 357d6051281f..eb8f3e327f6b 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -392,99 +392,241 @@ void i40evf_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
val);
}
+static inline bool i40e_container_is_rx(struct i40e_q_vector *q_vector,
+ struct i40e_ring_container *rc)
+{
+ return &q_vector->rx == rc;
+}
+
+static inline unsigned int i40e_itr_divisor(struct i40e_q_vector *q_vector)
+{
+ unsigned int divisor;
+
+ switch (q_vector->adapter->link_speed) {
+ case I40E_LINK_SPEED_40GB:
+ divisor = I40E_ITR_ADAPTIVE_MIN_INC * 1024;
+ break;
+ case I40E_LINK_SPEED_25GB:
+ case I40E_LINK_SPEED_20GB:
+ divisor = I40E_ITR_ADAPTIVE_MIN_INC * 512;
+ break;
+ default:
+ case I40E_LINK_SPEED_10GB:
+ divisor = I40E_ITR_ADAPTIVE_MIN_INC * 256;
+ break;
+ case I40E_LINK_SPEED_1GB:
+ case I40E_LINK_SPEED_100MB:
+ divisor = I40E_ITR_ADAPTIVE_MIN_INC * 32;
+ break;
+ }
+
+ return divisor;
+}
+
/**
- * i40e_set_new_dynamic_itr - Find new ITR level
+ * i40e_update_itr - update the dynamic ITR value based on statistics
+ * @q_vector: structure containing interrupt and ring information
* @rc: structure containing ring performance data
*
- * Returns true if ITR changed, false if not
- *
- * Stores a new ITR value based on packets and byte counts during
- * the last interrupt. The advantage of per interrupt computation
- * is faster updates and more accurate ITR for the current traffic
- * pattern. Constants in this function were computed based on
- * theoretical maximum wire speed and thresholds were set based on
- * testing data as well as attempting to minimize response time
+ * Stores a new ITR value based on packets and byte
+ * counts during the last interrupt. The advantage of per interrupt
+ * computation is faster updates and more accurate ITR for the current
+ * traffic pattern. Constants in this function were computed
+ * based on theoretical maximum wire speed and thresholds were set based
+ * on testing data as well as attempting to minimize response time
* while increasing bulk throughput.
**/
-static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
+static void i40e_update_itr(struct i40e_q_vector *q_vector,
+ struct i40e_ring_container *rc)
{
- enum i40e_latency_range new_latency_range = rc->latency_range;
- u32 new_itr = rc->itr;
- int bytes_per_usec;
- unsigned int usecs, estimated_usecs;
+ unsigned int avg_wire_size, packets, bytes, itr;
+ unsigned long next_update = jiffies;
- if (rc->total_packets == 0 || !rc->itr)
- return false;
+ /* If we don't have any rings just leave ourselves set for maximum
+ * possible latency so we take ourselves out of the equation.
+ */
+ if (!rc->ring || !ITR_IS_DYNAMIC(rc->ring->itr_setting))
+ return;
+
+ /* For Rx we want to push the delay up and default to low latency.
+ * for Tx we want to pull the delay down and default to high latency.
+ */
+ itr = i40e_container_is_rx(q_vector, rc) ?
+ I40E_ITR_ADAPTIVE_MIN_USECS | I40E_ITR_ADAPTIVE_LATENCY :
+ I40E_ITR_ADAPTIVE_MAX_USECS | I40E_ITR_ADAPTIVE_LATENCY;
+
+ /* If we didn't update within up to 1 - 2 jiffies we can assume
+ * that either packets are coming in so slow there hasn't been
+ * any work, or that there is so much work that NAPI is dealing
+ * with interrupt moderation and we don't need to do anything.
+ */
+ if (time_after(next_update, rc->next_update))
+ goto clear_counts;
+
+ /* If itr_countdown is set it means we programmed an ITR within
+ * the last 4 interrupt cycles. This has a side effect of us
+ * potentially firing an early interrupt. In order to work around
+ * this we need to throw out any data received for a few
+ * interrupts following the update.
+ */
+ if (q_vector->itr_countdown) {
+ itr = rc->target_itr;
+ goto clear_counts;
+ }
+
+ packets = rc->total_packets;
+ bytes = rc->total_bytes;
- usecs = (rc->itr << 1) * ITR_COUNTDOWN_START;
- bytes_per_usec = rc->total_bytes / usecs;
+ if (i40e_container_is_rx(q_vector, rc)) {
+ /* If Rx there are 1 to 4 packets and bytes are less than
+ * 9000 assume insufficient data to use bulk rate limiting
+ * approach unless Tx is already in bulk rate limiting. We
+ * are likely latency driven.
+ */
+ if (packets && packets < 4 && bytes < 9000 &&
+ (q_vector->tx.target_itr & I40E_ITR_ADAPTIVE_LATENCY)) {
+ itr = I40E_ITR_ADAPTIVE_LATENCY;
+ goto adjust_by_size;
+ }
+ } else if (packets < 4) {
+ /* If we have Tx and Rx ITR maxed and Tx ITR is running in
+ * bulk mode and we are receiving 4 or fewer packets just
+ * reset the ITR_ADAPTIVE_LATENCY bit for latency mode so
+ * that the Rx can relax.
+ */
+ if (rc->target_itr == I40E_ITR_ADAPTIVE_MAX_USECS &&
+ (q_vector->rx.target_itr & I40E_ITR_MASK) ==
+ I40E_ITR_ADAPTIVE_MAX_USECS)
+ goto clear_counts;
+ } else if (packets > 32) {
+ /* If we have processed over 32 packets in a single interrupt
+ * for Tx assume we need to switch over to "bulk" mode.
+ */
+ rc->target_itr &= ~I40E_ITR_ADAPTIVE_LATENCY;
+ }
- /* The calculations in this algorithm depend on interrupts actually
- * firing at the ITR rate. This may not happen if the packet rate is
- * really low, or if we've been napi polling. Check to make sure
- * that's not the case before we continue.
+ /* We have no packets to actually measure against. This means
+ * either one of the other queues on this vector is active or
+ * we are a Tx queue doing TSO with too high of an interrupt rate.
+ *
+ * Between 4 and 56 we can assume that our current interrupt delay
+ * is only slightly too low. As such we should increase it by a small
+ * fixed amount.
*/
- estimated_usecs = jiffies_to_usecs(jiffies - rc->last_itr_update);
- if (estimated_usecs > usecs) {
- new_latency_range = I40E_LOW_LATENCY;
- goto reset_latency;
+ if (packets < 56) {
+ itr = rc->target_itr + I40E_ITR_ADAPTIVE_MIN_INC;
+ if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) {
+ itr &= I40E_ITR_ADAPTIVE_LATENCY;
+ itr += I40E_ITR_ADAPTIVE_MAX_USECS;
+ }
+ goto clear_counts;
+ }
+
+ if (packets <= 256) {
+ itr = min(q_vector->tx.current_itr, q_vector->rx.current_itr);
+ itr &= I40E_ITR_MASK;
+
+ /* Between 56 and 112 is our "goldilocks" zone where we are
+ * working out "just right". Just report that our current
+ * ITR is good for us.
+ */
+ if (packets <= 112)
+ goto clear_counts;
+
+ /* If packet count is 128 or greater we are likely looking
+ * at a slight overrun of the delay we want. Try halving
+ * our delay to see if that will cut the number of packets
+ * in half per interrupt.
+ */
+ itr /= 2;
+ itr &= I40E_ITR_MASK;
+ if (itr < I40E_ITR_ADAPTIVE_MIN_USECS)
+ itr = I40E_ITR_ADAPTIVE_MIN_USECS;
+
+ goto clear_counts;
}
- /* simple throttlerate management
- * 0-10MB/s lowest (50000 ints/s)
- * 10-20MB/s low (20000 ints/s)
- * 20-1249MB/s bulk (18000 ints/s)
+ /* The paths below assume we are dealing with a bulk ITR since
+ * number of packets is greater than 256. We are just going to have
+ * to compute a value and try to bring the count under control,
+ * though for smaller packet sizes there isn't much we can do as
+ * NAPI polling will likely be kicking in sooner rather than later.
+ */
+ itr = I40E_ITR_ADAPTIVE_BULK;
+
+adjust_by_size:
+ /* If packet counts are 256 or greater we can assume we have a gross
+ * overestimation of what the rate should be. Instead of trying to fine
+ * tune it just use the formula below to try and dial in an exact value
+ * give the current packet size of the frame.
+ */
+ avg_wire_size = bytes / packets;
+
+ /* The following is a crude approximation of:
+ * wmem_default / (size + overhead) = desired_pkts_per_int
+ * rate / bits_per_byte / (size + ethernet overhead) = pkt_rate
+ * (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value
*
- * The math works out because the divisor is in 10^(-6) which
- * turns the bytes/us input value into MB/s values, but
- * make sure to use usecs, as the register values written
- * are in 2 usec increments in the ITR registers, and make sure
- * to use the smoothed values that the countdown timer gives us.
+ * Assuming wmem_default is 212992 and overhead is 640 bytes per
+ * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the
+ * formula down to
+ *
+ * (170 * (size + 24)) / (size + 640) = ITR
+ *
+ * We first do some math on the packet size and then finally bitshift
+ * by 8 after rounding up. We also have to account for PCIe link speed
+ * difference as ITR scales based on this.
*/
- switch (new_latency_range) {
- case I40E_LOWEST_LATENCY:
- if (bytes_per_usec > 10)
- new_latency_range = I40E_LOW_LATENCY;
- break;
- case I40E_LOW_LATENCY:
- if (bytes_per_usec > 20)
- new_latency_range = I40E_BULK_LATENCY;
- else if (bytes_per_usec <= 10)
- new_latency_range = I40E_LOWEST_LATENCY;
- break;
- case I40E_BULK_LATENCY:
- default:
- if (bytes_per_usec <= 20)
- new_latency_range = I40E_LOW_LATENCY;
- break;
+ if (avg_wire_size <= 60) {
+ /* Start at 250k ints/sec */
+ avg_wire_size = 4096;
+ } else if (avg_wire_size <= 380) {
+ /* 250K ints/sec to 60K ints/sec */
+ avg_wire_size *= 40;
+ avg_wire_size += 1696;
+ } else if (avg_wire_size <= 1084) {
+ /* 60K ints/sec to 36K ints/sec */
+ avg_wire_size *= 15;
+ avg_wire_size += 11452;
+ } else if (avg_wire_size <= 1980) {
+ /* 36K ints/sec to 30K ints/sec */
+ avg_wire_size *= 5;
+ avg_wire_size += 22420;
+ } else {
+ /* plateau at a limit of 30K ints/sec */
+ avg_wire_size = 32256;
}
-reset_latency:
- rc->latency_range = new_latency_range;
+ /* If we are in low latency mode halve our delay which doubles the
+ * rate to somewhere between 100K to 16K ints/sec
+ */
+ if (itr & I40E_ITR_ADAPTIVE_LATENCY)
+ avg_wire_size /= 2;
- switch (new_latency_range) {
- case I40E_LOWEST_LATENCY:
- new_itr = I40E_ITR_50K;
- break;
- case I40E_LOW_LATENCY:
- new_itr = I40E_ITR_20K;
- break;
- case I40E_BULK_LATENCY:
- new_itr = I40E_ITR_18K;
- break;
- default:
- break;
+ /* Resultant value is 256 times larger than it needs to be. This
+ * gives us room to adjust the value as needed to either increase
+ * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc.
+ *
+ * Use addition as we have already recorded the new latency flag
+ * for the ITR value.
+ */
+ itr += DIV_ROUND_UP(avg_wire_size, i40e_itr_divisor(q_vector)) *
+ I40E_ITR_ADAPTIVE_MIN_INC;
+
+ if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) {
+ itr &= I40E_ITR_ADAPTIVE_LATENCY;
+ itr += I40E_ITR_ADAPTIVE_MAX_USECS;
}
+clear_counts:
+ /* write back value */
+ rc->target_itr = itr;
+
+ /* next update should occur within next jiffy */
+ rc->next_update = next_update + 1;
+
rc->total_bytes = 0;
rc->total_packets = 0;
- rc->last_itr_update = jiffies;
-
- if (new_itr != rc->itr) {
- rc->itr = new_itr;
- return true;
- }
- return false;
}
/**
@@ -1273,7 +1415,7 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
* @rx_buffer: rx buffer to pull data from
*
* This function will clean up the contents of the rx_buffer. It will
- * either recycle the bufer or unmap it and free the associated resources.
+ * either recycle the buffer or unmap it and free the associated resources.
*/
static void i40e_put_rx_buffer(struct i40e_ring *rx_ring,
struct i40e_rx_buffer *rx_buffer)
@@ -1457,33 +1599,45 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
return failure ? budget : (int)total_rx_packets;
}
-static u32 i40e_buildreg_itr(const int type, const u16 itr)
+static inline u32 i40e_buildreg_itr(const int type, u16 itr)
{
u32 val;
+ /* We don't bother with setting the CLEARPBA bit as the data sheet
+ * points out doing so is "meaningless since it was already
+ * auto-cleared". The auto-clearing happens when the interrupt is
+ * asserted.
+ *
+ * Hardware errata 28 for also indicates that writing to a
+ * xxINT_DYN_CTLx CSR with INTENA_MSK (bit 31) set to 0 will clear
+ * an event in the PBA anyway so we need to rely on the automask
+ * to hold pending events for us until the interrupt is re-enabled
+ *
+ * The itr value is reported in microseconds, and the register
+ * value is recorded in 2 microsecond units. For this reason we
+ * only need to shift by the interval shift - 1 instead of the
+ * full value.
+ */
+ itr &= I40E_ITR_MASK;
+
val = I40E_VFINT_DYN_CTLN1_INTENA_MASK |
- I40E_VFINT_DYN_CTLN1_CLEARPBA_MASK |
(type << I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT) |
- (itr << I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT);
+ (itr << (I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT - 1));
return val;
}
/* a small macro to shorten up some long lines */
#define INTREG I40E_VFINT_DYN_CTLN1
-static inline int get_rx_itr(struct i40e_vsi *vsi, int idx)
-{
- struct i40evf_adapter *adapter = vsi->back;
- return adapter->rx_rings[idx].rx_itr_setting;
-}
-
-static inline int get_tx_itr(struct i40e_vsi *vsi, int idx)
-{
- struct i40evf_adapter *adapter = vsi->back;
-
- return adapter->tx_rings[idx].tx_itr_setting;
-}
+/* The act of updating the ITR will cause it to immediately trigger. In order
+ * to prevent this from throwing off adaptive update statistics we defer the
+ * update so that it can only happen so often. So after either Tx or Rx are
+ * updated we make the adaptive scheme wait until either the ITR completely
+ * expires via the next_update expiration or we have been through at least
+ * 3 interrupts.
+ */
+#define ITR_COUNTDOWN_START 3
/**
* i40e_update_enable_itr - Update itr and re-enable MSIX interrupt
@@ -1495,70 +1649,51 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
struct i40e_q_vector *q_vector)
{
struct i40e_hw *hw = &vsi->back->hw;
- bool rx = false, tx = false;
- u32 rxval, txval;
- int idx = q_vector->v_idx;
- int rx_itr_setting, tx_itr_setting;
-
- /* avoid dynamic calculation if in countdown mode OR if
- * all dynamic is disabled
- */
- rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
-
- rx_itr_setting = get_rx_itr(vsi, idx);
- tx_itr_setting = get_tx_itr(vsi, idx);
+ u32 intval;
- if (q_vector->itr_countdown > 0 ||
- (!ITR_IS_DYNAMIC(rx_itr_setting) &&
- !ITR_IS_DYNAMIC(tx_itr_setting))) {
- goto enable_int;
- }
-
- if (ITR_IS_DYNAMIC(rx_itr_setting)) {
- rx = i40e_set_new_dynamic_itr(&q_vector->rx);
- rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr);
- }
+ /* These will do nothing if dynamic updates are not enabled */
+ i40e_update_itr(q_vector, &q_vector->tx);
+ i40e_update_itr(q_vector, &q_vector->rx);
- if (ITR_IS_DYNAMIC(tx_itr_setting)) {
- tx = i40e_set_new_dynamic_itr(&q_vector->tx);
- txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr);
- }
-
- if (rx || tx) {
- /* get the higher of the two ITR adjustments and
- * use the same value for both ITR registers
- * when in adaptive mode (Rx and/or Tx)
- */
- u16 itr = max(q_vector->tx.itr, q_vector->rx.itr);
-
- q_vector->tx.itr = q_vector->rx.itr = itr;
- txval = i40e_buildreg_itr(I40E_TX_ITR, itr);
- tx = true;
- rxval = i40e_buildreg_itr(I40E_RX_ITR, itr);
- rx = true;
- }
-
- /* only need to enable the interrupt once, but need
- * to possibly update both ITR values
+ /* This block of logic allows us to get away with only updating
+ * one ITR value with each interrupt. The idea is to perform a
+ * pseudo-lazy update with the following criteria.
+ *
+ * 1. Rx is given higher priority than Tx if both are in same state
+ * 2. If we must reduce an ITR that is given highest priority.
+ * 3. We then give priority to increasing ITR based on amount.
*/
- if (rx) {
- /* set the INTENA_MSK_MASK so that this first write
- * won't actually enable the interrupt, instead just
- * updating the ITR (it's bit 31 PF and VF)
+ if (q_vector->rx.target_itr < q_vector->rx.current_itr) {
+ /* Rx ITR needs to be reduced, this is highest priority */
+ intval = i40e_buildreg_itr(I40E_RX_ITR,
+ q_vector->rx.target_itr);
+ q_vector->rx.current_itr = q_vector->rx.target_itr;
+ q_vector->itr_countdown = ITR_COUNTDOWN_START;
+ } else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) ||
+ ((q_vector->rx.target_itr - q_vector->rx.current_itr) <
+ (q_vector->tx.target_itr - q_vector->tx.current_itr))) {
+ /* Tx ITR needs to be reduced, this is second priority
+ * Tx ITR needs to be increased more than Rx, fourth priority
*/
- rxval |= BIT(31);
- /* don't check _DOWN because interrupt isn't being enabled */
- wr32(hw, INTREG(q_vector->reg_idx), rxval);
+ intval = i40e_buildreg_itr(I40E_TX_ITR,
+ q_vector->tx.target_itr);
+ q_vector->tx.current_itr = q_vector->tx.target_itr;
+ q_vector->itr_countdown = ITR_COUNTDOWN_START;
+ } else if (q_vector->rx.current_itr != q_vector->rx.target_itr) {
+ /* Rx ITR needs to be increased, third priority */
+ intval = i40e_buildreg_itr(I40E_RX_ITR,
+ q_vector->rx.target_itr);
+ q_vector->rx.current_itr = q_vector->rx.target_itr;
+ q_vector->itr_countdown = ITR_COUNTDOWN_START;
+ } else {
+ /* No ITR update, lowest priority */
+ intval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
+ if (q_vector->itr_countdown)
+ q_vector->itr_countdown--;
}
-enable_int:
if (!test_bit(__I40E_VSI_DOWN, vsi->state))
- wr32(hw, INTREG(q_vector->reg_idx), txval);
-
- if (q_vector->itr_countdown)
- q_vector->itr_countdown--;
- else
- q_vector->itr_countdown = ITR_COUNTDOWN_START;
+ wr32(hw, INTREG(q_vector->reg_idx), intval);
}
/**
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
index 7798a6645c3f..9129447d079b 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
@@ -28,31 +28,35 @@
#define _I40E_TXRX_H_
/* Interrupt Throttling and Rate Limiting Goodies */
-
-#define I40E_MAX_ITR 0x0FF0 /* reg uses 2 usec resolution */
-#define I40E_MIN_ITR 0x0001 /* reg uses 2 usec resolution */
-#define I40E_ITR_100K 0x0005
-#define I40E_ITR_50K 0x000A
-#define I40E_ITR_20K 0x0019
-#define I40E_ITR_18K 0x001B
-#define I40E_ITR_8K 0x003E
-#define I40E_ITR_4K 0x007A
-#define I40E_MAX_INTRL 0x3B /* reg uses 4 usec resolution */
-#define I40E_ITR_RX_DEF (ITR_REG_TO_USEC(I40E_ITR_20K) | \
- I40E_ITR_DYNAMIC)
-#define I40E_ITR_TX_DEF (ITR_REG_TO_USEC(I40E_ITR_20K) | \
- I40E_ITR_DYNAMIC)
-#define I40E_ITR_DYNAMIC 0x8000 /* use top bit as a flag */
-#define I40E_MIN_INT_RATE 250 /* ~= 1000000 / (I40E_MAX_ITR * 2) */
-#define I40E_MAX_INT_RATE 500000 /* == 1000000 / (I40E_MIN_ITR * 2) */
#define I40E_DEFAULT_IRQ_WORK 256
-#define ITR_TO_REG(setting) ((setting & ~I40E_ITR_DYNAMIC) >> 1)
-#define ITR_IS_DYNAMIC(setting) (!!(setting & I40E_ITR_DYNAMIC))
-#define ITR_REG_TO_USEC(itr_reg) (itr_reg << 1)
+
+/* The datasheet for the X710 and XL710 indicate that the maximum value for
+ * the ITR is 8160usec which is then called out as 0xFF0 with a 2usec
+ * resolution. 8160 is 0x1FE0 when written out in hex. So instead of storing
+ * the register value which is divided by 2 lets use the actual values and
+ * avoid an excessive amount of translation.
+ */
+#define I40E_ITR_DYNAMIC 0x8000 /* use top bit as a flag */
+#define I40E_ITR_MASK 0x1FFE /* mask for ITR register value */
+#define I40E_MIN_ITR 2 /* reg uses 2 usec resolution */
+#define I40E_ITR_100K 10 /* all values below must be even */
+#define I40E_ITR_50K 20
+#define I40E_ITR_20K 50
+#define I40E_ITR_18K 60
+#define I40E_ITR_8K 122
+#define I40E_MAX_ITR 8160 /* maximum value as per datasheet */
+#define ITR_TO_REG(setting) ((setting) & ~I40E_ITR_DYNAMIC)
+#define ITR_REG_ALIGN(setting) __ALIGN_MASK(setting, ~I40E_ITR_MASK)
+#define ITR_IS_DYNAMIC(setting) (!!((setting) & I40E_ITR_DYNAMIC))
+
+#define I40E_ITR_RX_DEF (I40E_ITR_20K | I40E_ITR_DYNAMIC)
+#define I40E_ITR_TX_DEF (I40E_ITR_20K | I40E_ITR_DYNAMIC)
+
/* 0x40 is the enable bit for interrupt rate limiting, and must be set if
* the value of the rate limit is non-zero
*/
#define INTRL_ENA BIT(6)
+#define I40E_MAX_INTRL 0x3B /* reg uses 4 usec resolution */
#define INTRL_REG_TO_USEC(intrl) ((intrl & ~INTRL_ENA) << 2)
#define INTRL_USEC_TO_REG(set) ((set) ? ((set) >> 2) | INTRL_ENA : 0)
#define I40E_INTRL_8K 125 /* 8000 ints/sec */
@@ -362,8 +366,7 @@ struct i40e_ring {
* these values always store the USER setting, and must be converted
* before programming to a register.
*/
- u16 rx_itr_setting;
- u16 tx_itr_setting;
+ u16 itr_setting;
u16 count; /* Number of descriptors */
u16 reg_idx; /* HW register index of the ring */
@@ -425,21 +428,21 @@ static inline void clear_ring_build_skb_enabled(struct i40e_ring *ring)
ring->flags &= ~I40E_RXR_FLAGS_BUILD_SKB_ENABLED;
}
-enum i40e_latency_range {
- I40E_LOWEST_LATENCY = 0,
- I40E_LOW_LATENCY = 1,
- I40E_BULK_LATENCY = 2,
-};
+#define I40E_ITR_ADAPTIVE_MIN_INC 0x0002
+#define I40E_ITR_ADAPTIVE_MIN_USECS 0x0002
+#define I40E_ITR_ADAPTIVE_MAX_USECS 0x007e
+#define I40E_ITR_ADAPTIVE_LATENCY 0x8000
+#define I40E_ITR_ADAPTIVE_BULK 0x0000
+#define ITR_IS_BULK(x) (!((x) & I40E_ITR_ADAPTIVE_LATENCY))
struct i40e_ring_container {
- /* array of pointers to rings */
- struct i40e_ring *ring;
+ struct i40e_ring *ring; /* pointer to linked list of ring(s) */
+ unsigned long next_update; /* jiffies value of next update */
unsigned int total_bytes; /* total bytes processed this int */
unsigned int total_packets; /* total packets processed this int */
- unsigned long last_itr_update; /* jiffies of last ITR update */
u16 count;
- enum i40e_latency_range latency_range;
- u16 itr;
+ u16 target_itr; /* target ITR setting for ring(s) */
+ u16 current_itr; /* current ITR setting for ring(s) */
};
/* iterator for handling rings in ring container */
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h
index 9690c1ea019e..b6991e8014d8 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf.h
+++ b/drivers/net/ethernet/intel/i40evf/i40evf.h
@@ -117,9 +117,8 @@ struct i40e_q_vector {
struct i40e_ring_container rx;
struct i40e_ring_container tx;
u32 ring_mask;
+ u8 itr_countdown; /* when 0 should adjust adaptive ITR */
u8 num_ringpairs; /* total number of ring pairs in vector */
-#define ITR_COUNTDOWN_START 100
- u8 itr_countdown; /* when 0 or 1 update ITR */
u16 v_idx; /* index in the vsi->q_vector array. */
u16 reg_idx; /* register index of the interrupt */
char name[IFNAMSIZ + 15];
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
index e2d8aa19d205..aded3ad7763e 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
@@ -457,14 +457,14 @@ static int __i40evf_get_coalesce(struct net_device *netdev,
rx_ring = &adapter->rx_rings[queue];
tx_ring = &adapter->tx_rings[queue];
- if (ITR_IS_DYNAMIC(rx_ring->rx_itr_setting))
+ if (ITR_IS_DYNAMIC(rx_ring->itr_setting))
ec->use_adaptive_rx_coalesce = 1;
- if (ITR_IS_DYNAMIC(tx_ring->tx_itr_setting))
+ if (ITR_IS_DYNAMIC(tx_ring->itr_setting))
ec->use_adaptive_tx_coalesce = 1;
- ec->rx_coalesce_usecs = rx_ring->rx_itr_setting & ~I40E_ITR_DYNAMIC;
- ec->tx_coalesce_usecs = tx_ring->tx_itr_setting & ~I40E_ITR_DYNAMIC;
+ ec->rx_coalesce_usecs = rx_ring->itr_setting & ~I40E_ITR_DYNAMIC;
+ ec->tx_coalesce_usecs = tx_ring->itr_setting & ~I40E_ITR_DYNAMIC;
return 0;
}
@@ -502,7 +502,7 @@ static int i40evf_get_per_queue_coalesce(struct net_device *netdev,
/**
* i40evf_set_itr_per_queue - set ITR values for specific queue
- * @vsi: the VSI to set values for
+ * @adapter: the VF adapter struct to set values for
* @ec: coalesce settings from ethtool
* @queue: the queue to modify
*
@@ -514,33 +514,29 @@ static void i40evf_set_itr_per_queue(struct i40evf_adapter *adapter,
{
struct i40e_ring *rx_ring = &adapter->rx_rings[queue];
struct i40e_ring *tx_ring = &adapter->tx_rings[queue];
- struct i40e_vsi *vsi = &adapter->vsi;
- struct i40e_hw *hw = &adapter->hw;
struct i40e_q_vector *q_vector;
- u16 vector;
- rx_ring->rx_itr_setting = ec->rx_coalesce_usecs;
- tx_ring->tx_itr_setting = ec->tx_coalesce_usecs;
+ rx_ring->itr_setting = ITR_REG_ALIGN(ec->rx_coalesce_usecs);
+ tx_ring->itr_setting = ITR_REG_ALIGN(ec->tx_coalesce_usecs);
- rx_ring->rx_itr_setting |= I40E_ITR_DYNAMIC;
+ rx_ring->itr_setting |= I40E_ITR_DYNAMIC;
if (!ec->use_adaptive_rx_coalesce)
- rx_ring->rx_itr_setting ^= I40E_ITR_DYNAMIC;
+ rx_ring->itr_setting ^= I40E_ITR_DYNAMIC;
- tx_ring->tx_itr_setting |= I40E_ITR_DYNAMIC;
+ tx_ring->itr_setting |= I40E_ITR_DYNAMIC;
if (!ec->use_adaptive_tx_coalesce)
- tx_ring->tx_itr_setting ^= I40E_ITR_DYNAMIC;
+ tx_ring->itr_setting ^= I40E_ITR_DYNAMIC;
q_vector = rx_ring->q_vector;
- q_vector->rx.itr = ITR_TO_REG(rx_ring->rx_itr_setting);
- vector = vsi->base_vector + q_vector->v_idx;
- wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, vector - 1), q_vector->rx.itr);
+ q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting);
q_vector = tx_ring->q_vector;
- q_vector->tx.itr = ITR_TO_REG(tx_ring->tx_itr_setting);
- vector = vsi->base_vector + q_vector->v_idx;
- wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, vector - 1), q_vector->tx.itr);
+ q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting);
- i40e_flush(hw);
+ /* The interrupt handler itself will take care of programming
+ * the Tx and Rx ITR values based on the values we have entered
+ * into the q_vector, no need to write the values now.
+ */
}
/**
@@ -565,8 +561,8 @@ static int __i40evf_set_coalesce(struct net_device *netdev,
if (ec->rx_coalesce_usecs == 0) {
if (ec->use_adaptive_rx_coalesce)
netif_info(adapter, drv, netdev, "rx-usecs=0, need to disable adaptive-rx for a complete disable\n");
- } else if ((ec->rx_coalesce_usecs < (I40E_MIN_ITR << 1)) ||
- (ec->rx_coalesce_usecs > (I40E_MAX_ITR << 1))) {
+ } else if ((ec->rx_coalesce_usecs < I40E_MIN_ITR) ||
+ (ec->rx_coalesce_usecs > I40E_MAX_ITR)) {
netif_info(adapter, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n");
return -EINVAL;
}
@@ -575,8 +571,8 @@ static int __i40evf_set_coalesce(struct net_device *netdev,
if (ec->tx_coalesce_usecs == 0) {
if (ec->use_adaptive_tx_coalesce)
netif_info(adapter, drv, netdev, "tx-usecs=0, need to disable adaptive-tx for a complete disable\n");
- } else if ((ec->tx_coalesce_usecs < (I40E_MIN_ITR << 1)) ||
- (ec->tx_coalesce_usecs > (I40E_MAX_ITR << 1))) {
+ } else if ((ec->tx_coalesce_usecs < I40E_MIN_ITR) ||
+ (ec->tx_coalesce_usecs > I40E_MAX_ITR)) {
netif_info(adapter, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n");
return -EINVAL;
}
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index 16989ad2ca90..6fd09926181a 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -353,11 +353,12 @@ i40evf_map_vector_to_rxq(struct i40evf_adapter *adapter, int v_idx, int r_idx)
rx_ring->vsi = &adapter->vsi;
q_vector->rx.ring = rx_ring;
q_vector->rx.count++;
- q_vector->rx.latency_range = I40E_LOW_LATENCY;
- q_vector->rx.itr = ITR_TO_REG(rx_ring->rx_itr_setting);
+ q_vector->rx.next_update = jiffies + 1;
+ q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting);
q_vector->ring_mask |= BIT(r_idx);
- q_vector->itr_countdown = ITR_COUNTDOWN_START;
- wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, v_idx - 1), q_vector->rx.itr);
+ wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, q_vector->reg_idx),
+ q_vector->rx.current_itr);
+ q_vector->rx.current_itr = q_vector->rx.target_itr;
}
/**
@@ -378,11 +379,12 @@ i40evf_map_vector_to_txq(struct i40evf_adapter *adapter, int v_idx, int t_idx)
tx_ring->vsi = &adapter->vsi;
q_vector->tx.ring = tx_ring;
q_vector->tx.count++;
- q_vector->tx.latency_range = I40E_LOW_LATENCY;
- q_vector->tx.itr = ITR_TO_REG(tx_ring->tx_itr_setting);
- q_vector->itr_countdown = ITR_COUNTDOWN_START;
+ q_vector->tx.next_update = jiffies + 1;
+ q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting);
q_vector->num_ringpairs++;
- wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, v_idx - 1), q_vector->tx.itr);
+ wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, q_vector->reg_idx),
+ q_vector->tx.target_itr);
+ q_vector->tx.current_itr = q_vector->tx.target_itr;
}
/**
@@ -1169,7 +1171,7 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter)
tx_ring->netdev = adapter->netdev;
tx_ring->dev = &adapter->pdev->dev;
tx_ring->count = adapter->tx_desc_count;
- tx_ring->tx_itr_setting = I40E_ITR_TX_DEF;
+ tx_ring->itr_setting = I40E_ITR_TX_DEF;
if (adapter->flags & I40EVF_FLAG_WB_ON_ITR_CAPABLE)
tx_ring->flags |= I40E_TXR_FLAGS_WB_ON_ITR;
@@ -1178,7 +1180,7 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter)
rx_ring->netdev = adapter->netdev;
rx_ring->dev = &adapter->pdev->dev;
rx_ring->count = adapter->rx_desc_count;
- rx_ring->rx_itr_setting = I40E_ITR_RX_DEF;
+ rx_ring->itr_setting = I40E_ITR_RX_DEF;
}
adapter->num_active_queues = num_active_queues;
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
index 50ce0d6c09ef..d57a67285505 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
@@ -344,6 +344,7 @@ void i40evf_disable_queues(struct i40evf_adapter *adapter)
void i40evf_map_queues(struct i40evf_adapter *adapter)
{
struct virtchnl_irq_map_info *vimi;
+ struct virtchnl_vector_map *vecmap;
int v_idx, q_vectors, len;
struct i40e_q_vector *q_vector;
@@ -367,17 +368,22 @@ void i40evf_map_queues(struct i40evf_adapter *adapter)
vimi->num_vectors = adapter->num_msix_vectors;
/* Queue vectors first */
for (v_idx = 0; v_idx < q_vectors; v_idx++) {
- q_vector = adapter->q_vectors + v_idx;
- vimi->vecmap[v_idx].vsi_id = adapter->vsi_res->vsi_id;
- vimi->vecmap[v_idx].vector_id = v_idx + NONQ_VECS;
- vimi->vecmap[v_idx].txq_map = q_vector->ring_mask;
- vimi->vecmap[v_idx].rxq_map = q_vector->ring_mask;
+ q_vector = &adapter->q_vectors[v_idx];
+ vecmap = &vimi->vecmap[v_idx];
+
+ vecmap->vsi_id = adapter->vsi_res->vsi_id;
+ vecmap->vector_id = v_idx + NONQ_VECS;
+ vecmap->txq_map = q_vector->ring_mask;
+ vecmap->rxq_map = q_vector->ring_mask;
+ vecmap->rxitr_idx = I40E_RX_ITR;
+ vecmap->txitr_idx = I40E_TX_ITR;
}
/* Misc vector last - this is only for AdminQ messages */
- vimi->vecmap[v_idx].vsi_id = adapter->vsi_res->vsi_id;
- vimi->vecmap[v_idx].vector_id = 0;
- vimi->vecmap[v_idx].txq_map = 0;
- vimi->vecmap[v_idx].rxq_map = 0;
+ vecmap = &vimi->vecmap[v_idx];
+ vecmap->vsi_id = adapter->vsi_res->vsi_id;
+ vecmap->vector_id = 0;
+ vecmap->txq_map = 0;
+ vecmap->rxq_map = 0;
adapter->aq_required &= ~I40EVF_FLAG_AQ_MAP_VECTORS;
i40evf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_IRQ_MAP,