From d1e7c2996e988866e7ceceb4641a0886885b7889 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 29 Oct 2020 12:12:46 +0100 Subject: cpufreq: schedutil: Always call driver if CPUFREQ_NEED_UPDATE_LIMITS is set Because sugov_update_next_freq() may skip a frequency update even if the need_freq_update flag has been set for the policy at hand, policy limits updates may not take effect as expected. For example, if the intel_pstate driver operates in the passive mode with HWP enabled, it needs to update the HWP min and max limits when the policy min and max limits change, respectively, but that may not happen if the target frequency does not change along with the limit at hand. In particular, if the policy min is changed first, causing the target frequency to be adjusted to it, and the policy max limit is changed later to the same value, the HWP max limit will not be updated to follow it as expected, because the target frequency is still equal to the policy min limit and it will not change until that limit is updated. To address this issue, modify get_next_freq() to let the driver callback run if the CPUFREQ_NEED_UPDATE_LIMITS cpufreq driver flag is set regardless of whether or not the new frequency to set is equal to the previous one. Fixes: f6ebbcf08f37 ("cpufreq: intel_pstate: Implement passive mode with HWP enabled") Reported-by: Zhang Rui Tested-by: Zhang Rui Cc: 5.9+ # 5.9+: 1c534352f47f cpufreq: Introduce CPUFREQ_NEED_UPDATE_LIMITS ... Cc: 5.9+ # 5.9+: a62f68f5ca53 cpufreq: Introduce cpufreq_driver_test_flags() Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- kernel/sched/cpufreq_schedutil.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'kernel/sched/cpufreq_schedutil.c') diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index e254745a82cb..c03a5775d019 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -102,7 +102,8 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) static bool sugov_update_next_freq(struct sugov_policy *sg_policy, u64 time, unsigned int next_freq) { - if (sg_policy->next_freq == next_freq) + if (sg_policy->next_freq == next_freq && + !cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS)) return false; sg_policy->next_freq = next_freq; @@ -161,7 +162,8 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy, freq = map_util_freq(util, freq, max); - if (freq == sg_policy->cached_raw_freq && !sg_policy->need_freq_update) + if (freq == sg_policy->cached_raw_freq && !sg_policy->need_freq_update && + !cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS)) return sg_policy->next_freq; sg_policy->need_freq_update = false; -- cgit From 23a881852f3eff6a7ba8d240b57de076763fdef9 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 30 Oct 2020 12:51:08 +0530 Subject: cpufreq: schedutil: Don't skip freq update if need_freq_update is set The cpufreq policy's frequency limits (min/max) can get changed at any point of time, while schedutil is trying to update the next frequency. Though the schedutil governor has necessary locking and support in place to make sure we don't miss any of those updates, there is a corner case where the governor will find that the CPU is already running at the desired frequency and so may skip an update. For example, consider that the CPU can run at 1 GHz, 1.2 GHz and 1.4 GHz and is running at 1 GHz currently. Schedutil tries to update the frequency to 1.2 GHz, during this time the policy limits get changed as policy->min = 1.4 GHz. As schedutil (and cpufreq core) does clamp the frequency at various instances, we will eventually set the frequency to 1.4 GHz, while we will save 1.2 GHz in sg_policy->next_freq. Now lets say the policy limits get changed back at this time with policy->min as 1 GHz. The next time schedutil is invoked by the scheduler, we will reevaluate the next frequency (because need_freq_update will get set due to limits change event) and lets say we want to set the frequency to 1.2 GHz again. At this point sugov_update_next_freq() will find the next_freq == current_freq and will abort the update, while the CPU actually runs at 1.4 GHz. Until now need_freq_update was used as a flag to indicate that the policy's frequency limits have changed, and that we should consider the new limits while reevaluating the next frequency. This patch fixes the above mentioned issue by extending the purpose of the need_freq_update flag. If this flag is set now, the schedutil governor will not try to abort a frequency change even if next_freq == current_freq. As similar behavior is required in the case of CPUFREQ_NEED_UPDATE_LIMITS flag as well, need_freq_update will never be set to false if that flag is set for the driver. We also don't need to consider the need_freq_update flag in sugov_update_single() anymore to handle the special case of busy CPU, as we won't abort a frequency update anymore. Reported-by: zhuguangqing Suggested-by: Rafael J. Wysocki Signed-off-by: Viresh Kumar [ rjw: Rearrange code to avoid a branch ] Signed-off-by: Rafael J. Wysocki --- kernel/sched/cpufreq_schedutil.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'kernel/sched/cpufreq_schedutil.c') diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index c03a5775d019..d73bccde2720 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -102,9 +102,12 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) static bool sugov_update_next_freq(struct sugov_policy *sg_policy, u64 time, unsigned int next_freq) { - if (sg_policy->next_freq == next_freq && - !cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS)) - return false; + if (!sg_policy->need_freq_update) { + if (sg_policy->next_freq == next_freq) + return false; + } else { + sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS); + } sg_policy->next_freq = next_freq; sg_policy->last_freq_update_time = time; @@ -162,11 +165,9 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy, freq = map_util_freq(util, freq, max); - if (freq == sg_policy->cached_raw_freq && !sg_policy->need_freq_update && - !cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS)) + if (freq == sg_policy->cached_raw_freq && !sg_policy->need_freq_update) return sg_policy->next_freq; - sg_policy->need_freq_update = false; sg_policy->cached_raw_freq = freq; return cpufreq_driver_resolve_freq(policy, freq); } @@ -442,7 +443,6 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, struct sugov_policy *sg_policy = sg_cpu->sg_policy; unsigned long util, max; unsigned int next_f; - bool busy; unsigned int cached_freq = sg_policy->cached_raw_freq; sugov_iowait_boost(sg_cpu, time, flags); @@ -453,9 +453,6 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, if (!sugov_should_update_freq(sg_policy, time)) return; - /* Limits may have changed, don't skip frequency update */ - busy = !sg_policy->need_freq_update && sugov_cpu_is_busy(sg_cpu); - util = sugov_get_util(sg_cpu); max = sg_cpu->max; util = sugov_iowait_apply(sg_cpu, time, util, max); @@ -464,7 +461,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, * Do not reduce the frequency if the CPU has not been idle * recently, as the reduction is likely to be premature then. */ - if (busy && next_f < sg_policy->next_freq) { + if (sugov_cpu_is_busy(sg_cpu) && next_f < sg_policy->next_freq) { next_f = sg_policy->next_freq; /* Restore cached freq as next_freq has changed */ @@ -829,9 +826,10 @@ static int sugov_start(struct cpufreq_policy *policy) sg_policy->next_freq = 0; sg_policy->work_in_progress = false; sg_policy->limits_changed = false; - sg_policy->need_freq_update = false; sg_policy->cached_raw_freq = 0; + sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS); + for_each_cpu(cpu, policy->cpus) { struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu); -- cgit From 9a2a9ebc0a758d887ee06e067e9f7f0b36ff7574 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 10 Nov 2020 18:25:57 +0100 Subject: cpufreq: Introduce governor flags A new cpufreq governor flag will be added subsequently, so replace the bool dynamic_switching fleid in struct cpufreq_governor with a flags field and introduce CPUFREQ_GOV_DYNAMIC_SWITCHING to set for the "dynamic switching" governors instead of it. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- kernel/sched/cpufreq_schedutil.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/sched/cpufreq_schedutil.c') diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index d73bccde2720..97d318b0cd0c 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -881,7 +881,7 @@ static void sugov_limits(struct cpufreq_policy *policy) struct cpufreq_governor schedutil_gov = { .name = "schedutil", .owner = THIS_MODULE, - .dynamic_switching = true, + .flags = CPUFREQ_GOV_DYNAMIC_SWITCHING, .init = sugov_init, .exit = sugov_exit, .start = sugov_start, -- cgit