diff options
Diffstat (limited to 'drivers/cpuidle/governors/menu.c')
| -rw-r--r-- | drivers/cpuidle/governors/menu.c | 252 |
1 files changed, 134 insertions, 118 deletions
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 28363bfa3e4c..64d6f7a1c776 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -41,7 +41,7 @@ * the C state is required to actually break even on this cost. CPUIDLE * provides us this duration in the "target_residency" field. So all that we * need is a good prediction of how long we'll be idle. Like the traditional - * menu governor, we start with the actual known "next timer event" time. + * menu governor, we take the actual known "next timer event" time. * * Since there are other source of wakeups (interrupts for example) than * the next timer event, this estimation is rather optimistic. To get a @@ -50,30 +50,21 @@ * duration always was 50% of the next timer tick, the correction factor will * be 0.5. * - * menu uses a running average for this correction factor, however it uses a - * set of factors, not just a single factor. This stems from the realization - * that the ratio is dependent on the order of magnitude of the expected - * duration; if we expect 500 milliseconds of idle time the likelihood of - * getting an interrupt very early is much higher than if we expect 50 micro - * seconds of idle time. A second independent factor that has big impact on - * the actual factor is if there is (disk) IO outstanding or not. - * (as a special twist, we consider every sleep longer than 50 milliseconds - * as perfect; there are no power gains for sleeping longer than this) - * - * For these two reasons we keep an array of 12 independent factors, that gets - * indexed based on the magnitude of the expected duration as well as the - * "is IO outstanding" property. + * menu uses a running average for this correction factor, but it uses a set of + * factors, not just a single factor. This stems from the realization that the + * ratio is dependent on the order of magnitude of the expected duration; if we + * expect 500 milliseconds of idle time the likelihood of getting an interrupt + * very early is much higher than if we expect 50 micro seconds of idle time. + * For this reason, menu keeps an array of 6 independent factors, that gets + * indexed based on the magnitude of the expected duration. * * Repeatable-interval-detector * ---------------------------- * There are some cases where "next timer" is a completely unusable predictor: * Those cases where the interval is fixed, for example due to hardware - * interrupt mitigation, but also due to fixed transfer rate devices such as - * mice. + * interrupt mitigation, but also due to fixed transfer rate devices like mice. * For this, we use a different predictor: We track the duration of the last 8 - * intervals and if the stand deviation of these 8 intervals is below a - * threshold value, we use the average of these intervals as prediction. - * + * intervals and use them to estimate the duration of the next one. */ struct menu_device { @@ -106,6 +97,14 @@ static inline int which_bucket(u64 duration_ns) static DEFINE_PER_CPU(struct menu_device, menu_devices); +static void menu_update_intervals(struct menu_device *data, unsigned int interval_us) +{ + /* Update the repeating-pattern data. */ + data->intervals[data->interval_ptr++] = interval_us; + if (data->interval_ptr >= INTERVALS) + data->interval_ptr = 0; +} + static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev); /* @@ -116,53 +115,52 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev); */ static unsigned int get_typical_interval(struct menu_device *data) { - int i, divisor; - unsigned int min, max, thresh, avg; - uint64_t sum, variance; - - thresh = INT_MAX; /* Discard outliers above this value */ + s64 value, min_thresh = -1, max_thresh = UINT_MAX; + unsigned int max, min, divisor; + u64 avg, variance, avg_sq; + int i; again: - - /* First calculate the average of past intervals */ - min = UINT_MAX; + /* Compute the average and variance of past intervals. */ max = 0; - sum = 0; + min = UINT_MAX; + avg = 0; + variance = 0; divisor = 0; for (i = 0; i < INTERVALS; i++) { - unsigned int value = data->intervals[i]; - if (value <= thresh) { - sum += value; - divisor++; - if (value > max) - max = value; - - if (value < min) - min = value; - } + value = data->intervals[i]; + /* + * Discard the samples outside the interval between the min and + * max thresholds. + */ + if (value <= min_thresh || value >= max_thresh) + continue; + + divisor++; + + avg += value; + variance += value * value; + + if (value > max) + max = value; + + if (value < min) + min = value; } if (!max) return UINT_MAX; - if (divisor == INTERVALS) - avg = sum >> INTERVAL_SHIFT; - else - avg = div_u64(sum, divisor); - - /* Then try to determine variance */ - variance = 0; - for (i = 0; i < INTERVALS; i++) { - unsigned int value = data->intervals[i]; - if (value <= thresh) { - int64_t diff = (int64_t)value - avg; - variance += diff * diff; - } - } - if (divisor == INTERVALS) + if (divisor == INTERVALS) { + avg >>= INTERVAL_SHIFT; variance >>= INTERVAL_SHIFT; - else + } else { + do_div(avg, divisor); do_div(variance, divisor); + } + + avg_sq = avg * avg; + variance -= avg_sq; /* * The typical interval is obtained when standard deviation is @@ -177,25 +175,37 @@ again: * Use this result only if there is no timer to wake us up sooner. */ if (likely(variance <= U64_MAX/36)) { - if ((((u64)avg*avg > variance*36) && (divisor * 4 >= INTERVALS * 3)) - || variance <= 400) { + if ((avg_sq > variance * 36 && divisor * 4 >= INTERVALS * 3) || + variance <= 400) return avg; - } } /* - * If we have outliers to the upside in our distribution, discard - * those by setting the threshold to exclude these outliers, then + * If there are outliers, discard them by setting thresholds to exclude + * data points at a large enough distance from the average, then * calculate the average and standard deviation again. Once we get - * down to the bottom 3/4 of our samples, stop excluding samples. + * down to the last 3/4 of our samples, stop excluding samples. * * This can deal with workloads that have long pauses interspersed * with sporadic activity with a bunch of short pauses. + * + * However, if the number of remaining samples is too small to exclude + * any more outliers, allow the deepest available idle state to be + * selected because there are systems where the time spent by CPUs in + * deep idle states is correlated to the maximum frequency the CPUs + * can get to. On those systems, shallow idle states should be avoided + * unless there is a clear indication that the given CPU is most likley + * going to be woken up shortly. */ - if ((divisor * 4) <= INTERVALS * 3) + if (divisor * 4 <= INTERVALS * 3) return UINT_MAX; - thresh = max - 1; + /* Update the thresholds for the next round. */ + if (avg - min > max - avg) + min_thresh = min; + else + max_thresh = max; + goto again; } @@ -217,6 +227,14 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, if (data->needs_update) { menu_update(drv, dev); data->needs_update = 0; + } else if (!dev->last_residency_ns) { + /* + * This happens when the driver rejects the previously selected + * idle state and returns an error, so update the recent + * intervals table to prevent invalid information from being + * used going forward. + */ + menu_update_intervals(data, UINT_MAX); } /* Find the shortest expected idle interval. */ @@ -250,7 +268,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, */ data->next_timer_ns = KTIME_MAX; delta_tick = TICK_NSEC / 2; - data->bucket = which_bucket(KTIME_MAX); + data->bucket = BUCKETS - 1; } if (unlikely(drv->state_count <= 1 || latency_req == 0) || @@ -266,20 +284,15 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, return 0; } - if (tick_nohz_tick_stopped()) { - /* - * If the tick is already stopped, the cost of possible short - * idle duration misprediction is much higher, because the CPU - * may be stuck in a shallow idle state for a long time as a - * result of it. In that case say we might mispredict and use - * the known time till the closest timer event for the idle - * state selection. - */ - if (predicted_ns < TICK_NSEC) - predicted_ns = data->next_timer_ns; - } else if (latency_req > predicted_ns) { - latency_req = predicted_ns; - } + /* + * If the tick is already stopped, the cost of possible short idle + * duration misprediction is much higher, because the CPU may be stuck + * in a shallow idle state for a long time as a result of it. In that + * case, say we might mispredict and use the known time till the closest + * timer event for the idle state selection. + */ + if (tick_nohz_tick_stopped() && predicted_ns < TICK_NSEC) + predicted_ns = data->next_timer_ns; /* * Find the idle state with the lowest power while satisfying @@ -295,48 +308,54 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, if (idx == -1) idx = i; /* first enabled state */ - if (s->target_residency_ns > predicted_ns) { - /* - * Use a physical idle state, not busy polling, unless - * a timer is going to trigger soon enough. - */ - if ((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) && - s->exit_latency_ns <= latency_req && - s->target_residency_ns <= data->next_timer_ns) { - predicted_ns = s->target_residency_ns; - idx = i; - break; - } - if (predicted_ns < TICK_NSEC) - break; - - if (!tick_nohz_tick_stopped()) { - /* - * If the state selected so far is shallow, - * waking up early won't hurt, so retain the - * tick in that case and let the governor run - * again in the next iteration of the loop. - */ - predicted_ns = drv->states[idx].target_residency_ns; - break; - } + if (s->exit_latency_ns > latency_req) + break; - /* - * If the state selected so far is shallow and this - * state's target residency matches the time till the - * closest timer event, select this one to avoid getting - * stuck in the shallow one for too long. - */ - if (drv->states[idx].target_residency_ns < TICK_NSEC && - s->target_residency_ns <= delta_tick) - idx = i; + if (s->target_residency_ns <= predicted_ns) { + idx = i; + continue; + } - return idx; + /* + * Use a physical idle state instead of busy polling so long as + * its target residency is below the residency threshold, its + * exit latency is not greater than the predicted idle duration, + * and the next timer doesn't expire soon. + */ + if ((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) && + s->target_residency_ns < RESIDENCY_THRESHOLD_NS && + s->target_residency_ns <= data->next_timer_ns && + s->exit_latency_ns <= predicted_ns) { + predicted_ns = s->target_residency_ns; + idx = i; + break; } - if (s->exit_latency_ns > latency_req) + + if (predicted_ns < TICK_NSEC) break; - idx = i; + if (!tick_nohz_tick_stopped()) { + /* + * If the state selected so far is shallow, waking up + * early won't hurt, so retain the tick in that case and + * let the governor run again in the next iteration of + * the idle loop. + */ + predicted_ns = drv->states[idx].target_residency_ns; + break; + } + + /* + * If the state selected so far is shallow and this state's + * target residency matches the time till the closest timer + * event, select this one to avoid getting stuck in the shallow + * one for too long. + */ + if (drv->states[idx].target_residency_ns < TICK_NSEC && + s->target_residency_ns <= delta_tick) + idx = i; + + return idx; } if (idx == -1) @@ -477,10 +496,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) data->correction_factor[data->bucket] = new_factor; - /* update the repeating-pattern data */ - data->intervals[data->interval_ptr++] = ktime_to_us(measured_ns); - if (data->interval_ptr >= INTERVALS) - data->interval_ptr = 0; + menu_update_intervals(data, ktime_to_us(measured_ns)); } /** |
