summaryrefslogtreecommitdiff
path: root/drivers/cpuidle/governors/menu.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/cpuidle/governors/menu.c')
-rw-r--r--drivers/cpuidle/governors/menu.c252
1 files changed, 134 insertions, 118 deletions
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 28363bfa3e4c..64d6f7a1c776 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -41,7 +41,7 @@
* the C state is required to actually break even on this cost. CPUIDLE
* provides us this duration in the "target_residency" field. So all that we
* need is a good prediction of how long we'll be idle. Like the traditional
- * menu governor, we start with the actual known "next timer event" time.
+ * menu governor, we take the actual known "next timer event" time.
*
* Since there are other source of wakeups (interrupts for example) than
* the next timer event, this estimation is rather optimistic. To get a
@@ -50,30 +50,21 @@
* duration always was 50% of the next timer tick, the correction factor will
* be 0.5.
*
- * menu uses a running average for this correction factor, however it uses a
- * set of factors, not just a single factor. This stems from the realization
- * that the ratio is dependent on the order of magnitude of the expected
- * duration; if we expect 500 milliseconds of idle time the likelihood of
- * getting an interrupt very early is much higher than if we expect 50 micro
- * seconds of idle time. A second independent factor that has big impact on
- * the actual factor is if there is (disk) IO outstanding or not.
- * (as a special twist, we consider every sleep longer than 50 milliseconds
- * as perfect; there are no power gains for sleeping longer than this)
- *
- * For these two reasons we keep an array of 12 independent factors, that gets
- * indexed based on the magnitude of the expected duration as well as the
- * "is IO outstanding" property.
+ * menu uses a running average for this correction factor, but it uses a set of
+ * factors, not just a single factor. This stems from the realization that the
+ * ratio is dependent on the order of magnitude of the expected duration; if we
+ * expect 500 milliseconds of idle time the likelihood of getting an interrupt
+ * very early is much higher than if we expect 50 micro seconds of idle time.
+ * For this reason, menu keeps an array of 6 independent factors, that gets
+ * indexed based on the magnitude of the expected duration.
*
* Repeatable-interval-detector
* ----------------------------
* There are some cases where "next timer" is a completely unusable predictor:
* Those cases where the interval is fixed, for example due to hardware
- * interrupt mitigation, but also due to fixed transfer rate devices such as
- * mice.
+ * interrupt mitigation, but also due to fixed transfer rate devices like mice.
* For this, we use a different predictor: We track the duration of the last 8
- * intervals and if the stand deviation of these 8 intervals is below a
- * threshold value, we use the average of these intervals as prediction.
- *
+ * intervals and use them to estimate the duration of the next one.
*/
struct menu_device {
@@ -106,6 +97,14 @@ static inline int which_bucket(u64 duration_ns)
static DEFINE_PER_CPU(struct menu_device, menu_devices);
+static void menu_update_intervals(struct menu_device *data, unsigned int interval_us)
+{
+ /* Update the repeating-pattern data. */
+ data->intervals[data->interval_ptr++] = interval_us;
+ if (data->interval_ptr >= INTERVALS)
+ data->interval_ptr = 0;
+}
+
static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev);
/*
@@ -116,53 +115,52 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev);
*/
static unsigned int get_typical_interval(struct menu_device *data)
{
- int i, divisor;
- unsigned int min, max, thresh, avg;
- uint64_t sum, variance;
-
- thresh = INT_MAX; /* Discard outliers above this value */
+ s64 value, min_thresh = -1, max_thresh = UINT_MAX;
+ unsigned int max, min, divisor;
+ u64 avg, variance, avg_sq;
+ int i;
again:
-
- /* First calculate the average of past intervals */
- min = UINT_MAX;
+ /* Compute the average and variance of past intervals. */
max = 0;
- sum = 0;
+ min = UINT_MAX;
+ avg = 0;
+ variance = 0;
divisor = 0;
for (i = 0; i < INTERVALS; i++) {
- unsigned int value = data->intervals[i];
- if (value <= thresh) {
- sum += value;
- divisor++;
- if (value > max)
- max = value;
-
- if (value < min)
- min = value;
- }
+ value = data->intervals[i];
+ /*
+ * Discard the samples outside the interval between the min and
+ * max thresholds.
+ */
+ if (value <= min_thresh || value >= max_thresh)
+ continue;
+
+ divisor++;
+
+ avg += value;
+ variance += value * value;
+
+ if (value > max)
+ max = value;
+
+ if (value < min)
+ min = value;
}
if (!max)
return UINT_MAX;
- if (divisor == INTERVALS)
- avg = sum >> INTERVAL_SHIFT;
- else
- avg = div_u64(sum, divisor);
-
- /* Then try to determine variance */
- variance = 0;
- for (i = 0; i < INTERVALS; i++) {
- unsigned int value = data->intervals[i];
- if (value <= thresh) {
- int64_t diff = (int64_t)value - avg;
- variance += diff * diff;
- }
- }
- if (divisor == INTERVALS)
+ if (divisor == INTERVALS) {
+ avg >>= INTERVAL_SHIFT;
variance >>= INTERVAL_SHIFT;
- else
+ } else {
+ do_div(avg, divisor);
do_div(variance, divisor);
+ }
+
+ avg_sq = avg * avg;
+ variance -= avg_sq;
/*
* The typical interval is obtained when standard deviation is
@@ -177,25 +175,37 @@ again:
* Use this result only if there is no timer to wake us up sooner.
*/
if (likely(variance <= U64_MAX/36)) {
- if ((((u64)avg*avg > variance*36) && (divisor * 4 >= INTERVALS * 3))
- || variance <= 400) {
+ if ((avg_sq > variance * 36 && divisor * 4 >= INTERVALS * 3) ||
+ variance <= 400)
return avg;
- }
}
/*
- * If we have outliers to the upside in our distribution, discard
- * those by setting the threshold to exclude these outliers, then
+ * If there are outliers, discard them by setting thresholds to exclude
+ * data points at a large enough distance from the average, then
* calculate the average and standard deviation again. Once we get
- * down to the bottom 3/4 of our samples, stop excluding samples.
+ * down to the last 3/4 of our samples, stop excluding samples.
*
* This can deal with workloads that have long pauses interspersed
* with sporadic activity with a bunch of short pauses.
+ *
+ * However, if the number of remaining samples is too small to exclude
+ * any more outliers, allow the deepest available idle state to be
+ * selected because there are systems where the time spent by CPUs in
+ * deep idle states is correlated to the maximum frequency the CPUs
+ * can get to. On those systems, shallow idle states should be avoided
+ * unless there is a clear indication that the given CPU is most likley
+ * going to be woken up shortly.
*/
- if ((divisor * 4) <= INTERVALS * 3)
+ if (divisor * 4 <= INTERVALS * 3)
return UINT_MAX;
- thresh = max - 1;
+ /* Update the thresholds for the next round. */
+ if (avg - min > max - avg)
+ min_thresh = min;
+ else
+ max_thresh = max;
+
goto again;
}
@@ -217,6 +227,14 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
if (data->needs_update) {
menu_update(drv, dev);
data->needs_update = 0;
+ } else if (!dev->last_residency_ns) {
+ /*
+ * This happens when the driver rejects the previously selected
+ * idle state and returns an error, so update the recent
+ * intervals table to prevent invalid information from being
+ * used going forward.
+ */
+ menu_update_intervals(data, UINT_MAX);
}
/* Find the shortest expected idle interval. */
@@ -250,7 +268,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
*/
data->next_timer_ns = KTIME_MAX;
delta_tick = TICK_NSEC / 2;
- data->bucket = which_bucket(KTIME_MAX);
+ data->bucket = BUCKETS - 1;
}
if (unlikely(drv->state_count <= 1 || latency_req == 0) ||
@@ -266,20 +284,15 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
return 0;
}
- if (tick_nohz_tick_stopped()) {
- /*
- * If the tick is already stopped, the cost of possible short
- * idle duration misprediction is much higher, because the CPU
- * may be stuck in a shallow idle state for a long time as a
- * result of it. In that case say we might mispredict and use
- * the known time till the closest timer event for the idle
- * state selection.
- */
- if (predicted_ns < TICK_NSEC)
- predicted_ns = data->next_timer_ns;
- } else if (latency_req > predicted_ns) {
- latency_req = predicted_ns;
- }
+ /*
+ * If the tick is already stopped, the cost of possible short idle
+ * duration misprediction is much higher, because the CPU may be stuck
+ * in a shallow idle state for a long time as a result of it. In that
+ * case, say we might mispredict and use the known time till the closest
+ * timer event for the idle state selection.
+ */
+ if (tick_nohz_tick_stopped() && predicted_ns < TICK_NSEC)
+ predicted_ns = data->next_timer_ns;
/*
* Find the idle state with the lowest power while satisfying
@@ -295,48 +308,54 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
if (idx == -1)
idx = i; /* first enabled state */
- if (s->target_residency_ns > predicted_ns) {
- /*
- * Use a physical idle state, not busy polling, unless
- * a timer is going to trigger soon enough.
- */
- if ((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) &&
- s->exit_latency_ns <= latency_req &&
- s->target_residency_ns <= data->next_timer_ns) {
- predicted_ns = s->target_residency_ns;
- idx = i;
- break;
- }
- if (predicted_ns < TICK_NSEC)
- break;
-
- if (!tick_nohz_tick_stopped()) {
- /*
- * If the state selected so far is shallow,
- * waking up early won't hurt, so retain the
- * tick in that case and let the governor run
- * again in the next iteration of the loop.
- */
- predicted_ns = drv->states[idx].target_residency_ns;
- break;
- }
+ if (s->exit_latency_ns > latency_req)
+ break;
- /*
- * If the state selected so far is shallow and this
- * state's target residency matches the time till the
- * closest timer event, select this one to avoid getting
- * stuck in the shallow one for too long.
- */
- if (drv->states[idx].target_residency_ns < TICK_NSEC &&
- s->target_residency_ns <= delta_tick)
- idx = i;
+ if (s->target_residency_ns <= predicted_ns) {
+ idx = i;
+ continue;
+ }
- return idx;
+ /*
+ * Use a physical idle state instead of busy polling so long as
+ * its target residency is below the residency threshold, its
+ * exit latency is not greater than the predicted idle duration,
+ * and the next timer doesn't expire soon.
+ */
+ if ((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) &&
+ s->target_residency_ns < RESIDENCY_THRESHOLD_NS &&
+ s->target_residency_ns <= data->next_timer_ns &&
+ s->exit_latency_ns <= predicted_ns) {
+ predicted_ns = s->target_residency_ns;
+ idx = i;
+ break;
}
- if (s->exit_latency_ns > latency_req)
+
+ if (predicted_ns < TICK_NSEC)
break;
- idx = i;
+ if (!tick_nohz_tick_stopped()) {
+ /*
+ * If the state selected so far is shallow, waking up
+ * early won't hurt, so retain the tick in that case and
+ * let the governor run again in the next iteration of
+ * the idle loop.
+ */
+ predicted_ns = drv->states[idx].target_residency_ns;
+ break;
+ }
+
+ /*
+ * If the state selected so far is shallow and this state's
+ * target residency matches the time till the closest timer
+ * event, select this one to avoid getting stuck in the shallow
+ * one for too long.
+ */
+ if (drv->states[idx].target_residency_ns < TICK_NSEC &&
+ s->target_residency_ns <= delta_tick)
+ idx = i;
+
+ return idx;
}
if (idx == -1)
@@ -477,10 +496,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
data->correction_factor[data->bucket] = new_factor;
- /* update the repeating-pattern data */
- data->intervals[data->interval_ptr++] = ktime_to_us(measured_ns);
- if (data->interval_ptr >= INTERVALS)
- data->interval_ptr = 0;
+ menu_update_intervals(data, ktime_to_us(measured_ns));
}
/**