summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/intel
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/intel')
-rw-r--r--drivers/net/ethernet/intel/ice/ice.h1
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.c292
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.h6
3 files changed, 275 insertions, 24 deletions
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 0843868d1bbd..7609cccb251e 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -307,6 +307,7 @@ struct ice_q_vector {
* value to the device
*/
u8 intrl;
+ u8 itr_countdown; /* when 0 should adjust adaptive ITR */
} ____cacheline_internodealigned_in_smp;
enum ice_pf_flags {
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index ea4ec3760f8b..dfd7fa06ed22 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -1097,18 +1097,257 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
return failure ? budget : (int)total_rx_pkts;
}
+static unsigned int ice_itr_divisor(struct ice_port_info *pi)
+{
+ switch (pi->phy.link_info.link_speed) {
+ case ICE_AQ_LINK_SPEED_40GB:
+ return ICE_ITR_ADAPTIVE_MIN_INC * 1024;
+ case ICE_AQ_LINK_SPEED_25GB:
+ case ICE_AQ_LINK_SPEED_20GB:
+ return ICE_ITR_ADAPTIVE_MIN_INC * 512;
+ case ICE_AQ_LINK_SPEED_100MB:
+ return ICE_ITR_ADAPTIVE_MIN_INC * 32;
+ default:
+ return ICE_ITR_ADAPTIVE_MIN_INC * 256;
+ }
+}
+
+/**
+ * ice_update_itr - update the adaptive ITR value based on statistics
+ * @q_vector: structure containing interrupt and ring information
+ * @rc: structure containing ring performance data
+ *
+ * Stores a new ITR value based on packets and byte
+ * counts during the last interrupt. The advantage of per interrupt
+ * computation is faster updates and more accurate ITR for the current
+ * traffic pattern. Constants in this function were computed
+ * based on theoretical maximum wire speed and thresholds were set based
+ * on testing data as well as attempting to minimize response time
+ * while increasing bulk throughput.
+ */
+static void
+ice_update_itr(struct ice_q_vector *q_vector, struct ice_ring_container *rc)
+{
+ unsigned int avg_wire_size, packets, bytes, itr;
+ unsigned long next_update = jiffies;
+ bool container_is_rx;
+
+ if (!rc->ring || !ITR_IS_DYNAMIC(rc->itr_setting))
+ return;
+
+ /* If itr_countdown is set it means we programmed an ITR within
+ * the last 4 interrupt cycles. This has a side effect of us
+ * potentially firing an early interrupt. In order to work around
+ * this we need to throw out any data received for a few
+ * interrupts following the update.
+ */
+ if (q_vector->itr_countdown) {
+ itr = rc->target_itr;
+ goto clear_counts;
+ }
+
+ container_is_rx = (&q_vector->rx == rc);
+ /* For Rx we want to push the delay up and default to low latency.
+ * for Tx we want to pull the delay down and default to high latency.
+ */
+ itr = container_is_rx ?
+ ICE_ITR_ADAPTIVE_MIN_USECS | ICE_ITR_ADAPTIVE_LATENCY :
+ ICE_ITR_ADAPTIVE_MAX_USECS | ICE_ITR_ADAPTIVE_LATENCY;
+
+ /* If we didn't update within up to 1 - 2 jiffies we can assume
+ * that either packets are coming in so slow there hasn't been
+ * any work, or that there is so much work that NAPI is dealing
+ * with interrupt moderation and we don't need to do anything.
+ */
+ if (time_after(next_update, rc->next_update))
+ goto clear_counts;
+
+ packets = rc->total_pkts;
+ bytes = rc->total_bytes;
+
+ if (container_is_rx) {
+ /* If Rx there are 1 to 4 packets and bytes are less than
+ * 9000 assume insufficient data to use bulk rate limiting
+ * approach unless Tx is already in bulk rate limiting. We
+ * are likely latency driven.
+ */
+ if (packets && packets < 4 && bytes < 9000 &&
+ (q_vector->tx.target_itr & ICE_ITR_ADAPTIVE_LATENCY)) {
+ itr = ICE_ITR_ADAPTIVE_LATENCY;
+ goto adjust_by_size;
+ }
+ } else if (packets < 4) {
+ /* If we have Tx and Rx ITR maxed and Tx ITR is running in
+ * bulk mode and we are receiving 4 or fewer packets just
+ * reset the ITR_ADAPTIVE_LATENCY bit for latency mode so
+ * that the Rx can relax.
+ */
+ if (rc->target_itr == ICE_ITR_ADAPTIVE_MAX_USECS &&
+ (q_vector->rx.target_itr & ICE_ITR_MASK) ==
+ ICE_ITR_ADAPTIVE_MAX_USECS)
+ goto clear_counts;
+ } else if (packets > 32) {
+ /* If we have processed over 32 packets in a single interrupt
+ * for Tx assume we need to switch over to "bulk" mode.
+ */
+ rc->target_itr &= ~ICE_ITR_ADAPTIVE_LATENCY;
+ }
+
+ /* We have no packets to actually measure against. This means
+ * either one of the other queues on this vector is active or
+ * we are a Tx queue doing TSO with too high of an interrupt rate.
+ *
+ * Between 4 and 56 we can assume that our current interrupt delay
+ * is only slightly too low. As such we should increase it by a small
+ * fixed amount.
+ */
+ if (packets < 56) {
+ itr = rc->target_itr + ICE_ITR_ADAPTIVE_MIN_INC;
+ if ((itr & ICE_ITR_MASK) > ICE_ITR_ADAPTIVE_MAX_USECS) {
+ itr &= ICE_ITR_ADAPTIVE_LATENCY;
+ itr += ICE_ITR_ADAPTIVE_MAX_USECS;
+ }
+ goto clear_counts;
+ }
+
+ if (packets <= 256) {
+ itr = min(q_vector->tx.current_itr, q_vector->rx.current_itr);
+ itr &= ICE_ITR_MASK;
+
+ /* Between 56 and 112 is our "goldilocks" zone where we are
+ * working out "just right". Just report that our current
+ * ITR is good for us.
+ */
+ if (packets <= 112)
+ goto clear_counts;
+
+ /* If packet count is 128 or greater we are likely looking
+ * at a slight overrun of the delay we want. Try halving
+ * our delay to see if that will cut the number of packets
+ * in half per interrupt.
+ */
+ itr >>= 1;
+ itr &= ICE_ITR_MASK;
+ if (itr < ICE_ITR_ADAPTIVE_MIN_USECS)
+ itr = ICE_ITR_ADAPTIVE_MIN_USECS;
+
+ goto clear_counts;
+ }
+
+ /* The paths below assume we are dealing with a bulk ITR since
+ * number of packets is greater than 256. We are just going to have
+ * to compute a value and try to bring the count under control,
+ * though for smaller packet sizes there isn't much we can do as
+ * NAPI polling will likely be kicking in sooner rather than later.
+ */
+ itr = ICE_ITR_ADAPTIVE_BULK;
+
+adjust_by_size:
+ /* If packet counts are 256 or greater we can assume we have a gross
+ * overestimation of what the rate should be. Instead of trying to fine
+ * tune it just use the formula below to try and dial in an exact value
+ * gives the current packet size of the frame.
+ */
+ avg_wire_size = bytes / packets;
+
+ /* The following is a crude approximation of:
+ * wmem_default / (size + overhead) = desired_pkts_per_int
+ * rate / bits_per_byte / (size + ethernet overhead) = pkt_rate
+ * (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value
+ *
+ * Assuming wmem_default is 212992 and overhead is 640 bytes per
+ * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the
+ * formula down to
+ *
+ * (170 * (size + 24)) / (size + 640) = ITR
+ *
+ * We first do some math on the packet size and then finally bitshift
+ * by 8 after rounding up. We also have to account for PCIe link speed
+ * difference as ITR scales based on this.
+ */
+ if (avg_wire_size <= 60) {
+ /* Start at 250k ints/sec */
+ avg_wire_size = 4096;
+ } else if (avg_wire_size <= 380) {
+ /* 250K ints/sec to 60K ints/sec */
+ avg_wire_size *= 40;
+ avg_wire_size += 1696;
+ } else if (avg_wire_size <= 1084) {
+ /* 60K ints/sec to 36K ints/sec */
+ avg_wire_size *= 15;
+ avg_wire_size += 11452;
+ } else if (avg_wire_size <= 1980) {
+ /* 36K ints/sec to 30K ints/sec */
+ avg_wire_size *= 5;
+ avg_wire_size += 22420;
+ } else {
+ /* plateau at a limit of 30K ints/sec */
+ avg_wire_size = 32256;
+ }
+
+ /* If we are in low latency mode halve our delay which doubles the
+ * rate to somewhere between 100K to 16K ints/sec
+ */
+ if (itr & ICE_ITR_ADAPTIVE_LATENCY)
+ avg_wire_size >>= 1;
+
+ /* Resultant value is 256 times larger than it needs to be. This
+ * gives us room to adjust the value as needed to either increase
+ * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc.
+ *
+ * Use addition as we have already recorded the new latency flag
+ * for the ITR value.
+ */
+ itr += DIV_ROUND_UP(avg_wire_size,
+ ice_itr_divisor(q_vector->vsi->port_info)) *
+ ICE_ITR_ADAPTIVE_MIN_INC;
+
+ if ((itr & ICE_ITR_MASK) > ICE_ITR_ADAPTIVE_MAX_USECS) {
+ itr &= ICE_ITR_ADAPTIVE_LATENCY;
+ itr += ICE_ITR_ADAPTIVE_MAX_USECS;
+ }
+
+clear_counts:
+ /* write back value */
+ rc->target_itr = itr;
+
+ /* next update should occur within next jiffy */
+ rc->next_update = next_update + 1;
+
+ rc->total_bytes = 0;
+ rc->total_pkts = 0;
+}
+
/**
* ice_buildreg_itr - build value for writing to the GLINT_DYN_CTL register
* @itr_idx: interrupt throttling index
- * @reg_itr: interrupt throttling value adjusted based on ITR granularity
+ * @itr: interrupt throttling value in usecs
*/
-static u32 ice_buildreg_itr(int itr_idx, u16 reg_itr)
+static u32 ice_buildreg_itr(int itr_idx, u16 itr)
{
+ /* The itr value is reported in microseconds, and the register value is
+ * recorded in 2 microsecond units. For this reason we only need to
+ * shift by the GLINT_DYN_CTL_INTERVAL_S - ICE_ITR_GRAN_S to apply this
+ * granularity as a shift instead of division. The mask makes sure the
+ * ITR value is never odd so we don't accidentally write into the field
+ * prior to the ITR field.
+ */
+ itr &= ICE_ITR_MASK;
+
return GLINT_DYN_CTL_INTENA_M | GLINT_DYN_CTL_CLEARPBA_M |
(itr_idx << GLINT_DYN_CTL_ITR_INDX_S) |
- (reg_itr << GLINT_DYN_CTL_INTERVAL_S);
+ (itr << (GLINT_DYN_CTL_INTERVAL_S - ICE_ITR_GRAN_S));
}
+/* The act of updating the ITR will cause it to immediately trigger. In order
+ * to prevent this from throwing off adaptive update statistics we defer the
+ * update so that it can only happen so often. So after either Tx or Rx are
+ * updated we make the adaptive scheme wait until either the ITR completely
+ * expires via the next_update expiration or we have been through at least
+ * 3 interrupts.
+ */
+#define ITR_COUNTDOWN_START 3
+
/**
* ice_update_ena_itr - Update ITR and re-enable MSIX interrupt
* @vsi: the VSI associated with the q_vector
@@ -1117,10 +1356,14 @@ static u32 ice_buildreg_itr(int itr_idx, u16 reg_itr)
static void
ice_update_ena_itr(struct ice_vsi *vsi, struct ice_q_vector *q_vector)
{
- struct ice_hw *hw = &vsi->back->hw;
- struct ice_ring_container *rc;
+ struct ice_ring_container *tx = &q_vector->tx;
+ struct ice_ring_container *rx = &q_vector->rx;
u32 itr_val;
+ /* This will do nothing if dynamic updates are not enabled */
+ ice_update_itr(q_vector, tx);
+ ice_update_itr(q_vector, rx);
+
/* This block of logic allows us to get away with only updating
* one ITR value with each interrupt. The idea is to perform a
* pseudo-lazy update with the following criteria.
@@ -1129,35 +1372,36 @@ ice_update_ena_itr(struct ice_vsi *vsi, struct ice_q_vector *q_vector)
* 2. If we must reduce an ITR that is given highest priority.
* 3. We then give priority to increasing ITR based on amount.
*/
- if (q_vector->rx.target_itr < q_vector->rx.current_itr) {
- rc = &q_vector->rx;
+ if (rx->target_itr < rx->current_itr) {
/* Rx ITR needs to be reduced, this is highest priority */
- itr_val = ice_buildreg_itr(rc->itr_idx, rc->target_itr);
- rc->current_itr = rc->target_itr;
- } else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) ||
- ((q_vector->rx.target_itr - q_vector->rx.current_itr) <
- (q_vector->tx.target_itr - q_vector->tx.current_itr))) {
- rc = &q_vector->tx;
+ itr_val = ice_buildreg_itr(rx->itr_idx, rx->target_itr);
+ rx->current_itr = rx->target_itr;
+ q_vector->itr_countdown = ITR_COUNTDOWN_START;
+ } else if ((tx->target_itr < tx->current_itr) ||
+ ((rx->target_itr - rx->current_itr) <
+ (tx->target_itr - tx->current_itr))) {
/* Tx ITR needs to be reduced, this is second priority
* Tx ITR needs to be increased more than Rx, fourth priority
*/
- itr_val = ice_buildreg_itr(rc->itr_idx, rc->target_itr);
- rc->current_itr = rc->target_itr;
- } else if (q_vector->rx.current_itr != q_vector->rx.target_itr) {
- rc = &q_vector->rx;
+ itr_val = ice_buildreg_itr(tx->itr_idx, tx->target_itr);
+ tx->current_itr = tx->target_itr;
+ q_vector->itr_countdown = ITR_COUNTDOWN_START;
+ } else if (rx->current_itr != rx->target_itr) {
/* Rx ITR needs to be increased, third priority */
- itr_val = ice_buildreg_itr(rc->itr_idx, rc->target_itr);
- rc->current_itr = rc->target_itr;
+ itr_val = ice_buildreg_itr(rx->itr_idx, rx->target_itr);
+ rx->current_itr = rx->target_itr;
+ q_vector->itr_countdown = ITR_COUNTDOWN_START;
} else {
/* Still have to re-enable the interrupts */
itr_val = ice_buildreg_itr(ICE_ITR_NONE, 0);
+ if (q_vector->itr_countdown)
+ q_vector->itr_countdown--;
}
- if (!test_bit(__ICE_DOWN, vsi->state)) {
- int vector = vsi->hw_base_vector + q_vector->v_idx;
-
- wr32(hw, GLINT_DYN_CTL(vector), itr_val);
- }
+ if (!test_bit(__ICE_DOWN, vsi->state))
+ wr32(&vsi->back->hw,
+ GLINT_DYN_CTL(vsi->hw_base_vector + q_vector->v_idx),
+ itr_val);
}
/**
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index bd446ed423e5..69625857c482 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -133,6 +133,12 @@ enum ice_rx_dtype {
#define ICE_ITR_MASK 0x1FFE /* ITR register value alignment mask */
#define ITR_REG_ALIGN(setting) __ALIGN_MASK(setting, ~ICE_ITR_MASK)
+#define ICE_ITR_ADAPTIVE_MIN_INC 0x0002
+#define ICE_ITR_ADAPTIVE_MIN_USECS 0x0002
+#define ICE_ITR_ADAPTIVE_MAX_USECS 0x00FA
+#define ICE_ITR_ADAPTIVE_LATENCY 0x8000
+#define ICE_ITR_ADAPTIVE_BULK 0x0000
+
#define ICE_DFLT_INTRL 0
/* Legacy or Advanced Mode Queue */