summaryrefslogtreecommitdiff
path: root/kernel/sched
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched')
-rw-r--r--kernel/sched/core.c2
-rw-r--r--kernel/sched/cpufreq_schedutil.c98
-rw-r--r--kernel/sched/cputime.c6
-rw-r--r--kernel/sched/deadline.c16
-rw-r--r--kernel/sched/fair.c8
-rw-r--r--kernel/sched/rt.c2
-rw-r--r--kernel/sched/sched.h10
-rw-r--r--kernel/sched/wait.c7
8 files changed, 98 insertions, 51 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 17c667b427b4..0869b20fba81 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2069,7 +2069,7 @@ out:
/**
* try_to_wake_up_local - try to wake up a local task with rq lock held
* @p: the thread to be awakened
- * @cookie: context's cookie for pinning
+ * @rf: request-queue flags for pinning
*
* Put @p on the run-queue if it's not already there. The caller must
* ensure that this_rq() is locked, @p is bound to this_rq() and not
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 29a397067ffa..9209d83ecdcf 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -52,9 +52,11 @@ struct sugov_policy {
struct sugov_cpu {
struct update_util_data update_util;
struct sugov_policy *sg_policy;
+ unsigned int cpu;
- unsigned long iowait_boost;
- unsigned long iowait_boost_max;
+ bool iowait_boost_pending;
+ unsigned int iowait_boost;
+ unsigned int iowait_boost_max;
u64 last_update;
/* The fields below are only needed when sharing a policy. */
@@ -76,6 +78,26 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time)
{
s64 delta_ns;
+ /*
+ * Since cpufreq_update_util() is called with rq->lock held for
+ * the @target_cpu, our per-cpu data is fully serialized.
+ *
+ * However, drivers cannot in general deal with cross-cpu
+ * requests, so while get_next_freq() will work, our
+ * sugov_update_commit() call may not for the fast switching platforms.
+ *
+ * Hence stop here for remote requests if they aren't supported
+ * by the hardware, as calculating the frequency is pointless if
+ * we cannot in fact act on it.
+ *
+ * For the slow switching platforms, the kthread is always scheduled on
+ * the right set of CPUs and any CPU can find the next frequency and
+ * schedule the kthread.
+ */
+ if (sg_policy->policy->fast_switch_enabled &&
+ !cpufreq_can_do_remote_dvfs(sg_policy->policy))
+ return false;
+
if (sg_policy->work_in_progress)
return false;
@@ -106,7 +128,7 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time,
if (policy->fast_switch_enabled) {
next_freq = cpufreq_driver_fast_switch(policy, next_freq);
- if (next_freq == CPUFREQ_ENTRY_INVALID)
+ if (!next_freq)
return;
policy->cur = next_freq;
@@ -154,12 +176,12 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy,
return cpufreq_driver_resolve_freq(policy, freq);
}
-static void sugov_get_util(unsigned long *util, unsigned long *max)
+static void sugov_get_util(unsigned long *util, unsigned long *max, int cpu)
{
- struct rq *rq = this_rq();
+ struct rq *rq = cpu_rq(cpu);
unsigned long cfs_max;
- cfs_max = arch_scale_cpu_capacity(NULL, smp_processor_id());
+ cfs_max = arch_scale_cpu_capacity(NULL, cpu);
*util = min(rq->cfs.avg.util_avg, cfs_max);
*max = cfs_max;
@@ -169,30 +191,54 @@ static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
unsigned int flags)
{
if (flags & SCHED_CPUFREQ_IOWAIT) {
- sg_cpu->iowait_boost = sg_cpu->iowait_boost_max;
+ if (sg_cpu->iowait_boost_pending)
+ return;
+
+ sg_cpu->iowait_boost_pending = true;
+
+ if (sg_cpu->iowait_boost) {
+ sg_cpu->iowait_boost <<= 1;
+ if (sg_cpu->iowait_boost > sg_cpu->iowait_boost_max)
+ sg_cpu->iowait_boost = sg_cpu->iowait_boost_max;
+ } else {
+ sg_cpu->iowait_boost = sg_cpu->sg_policy->policy->min;
+ }
} else if (sg_cpu->iowait_boost) {
s64 delta_ns = time - sg_cpu->last_update;
/* Clear iowait_boost if the CPU apprears to have been idle. */
- if (delta_ns > TICK_NSEC)
+ if (delta_ns > TICK_NSEC) {
sg_cpu->iowait_boost = 0;
+ sg_cpu->iowait_boost_pending = false;
+ }
}
}
static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util,
unsigned long *max)
{
- unsigned long boost_util = sg_cpu->iowait_boost;
- unsigned long boost_max = sg_cpu->iowait_boost_max;
+ unsigned int boost_util, boost_max;
- if (!boost_util)
+ if (!sg_cpu->iowait_boost)
return;
+ if (sg_cpu->iowait_boost_pending) {
+ sg_cpu->iowait_boost_pending = false;
+ } else {
+ sg_cpu->iowait_boost >>= 1;
+ if (sg_cpu->iowait_boost < sg_cpu->sg_policy->policy->min) {
+ sg_cpu->iowait_boost = 0;
+ return;
+ }
+ }
+
+ boost_util = sg_cpu->iowait_boost;
+ boost_max = sg_cpu->iowait_boost_max;
+
if (*util * boost_max < *max * boost_util) {
*util = boost_util;
*max = boost_max;
}
- sg_cpu->iowait_boost >>= 1;
}
#ifdef CONFIG_NO_HZ_COMMON
@@ -229,7 +275,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
if (flags & SCHED_CPUFREQ_RT_DL) {
next_f = policy->cpuinfo.max_freq;
} else {
- sugov_get_util(&util, &max);
+ sugov_get_util(&util, &max, sg_cpu->cpu);
sugov_iowait_boost(sg_cpu, &util, &max);
next_f = get_next_freq(sg_policy, util, max);
/*
@@ -264,6 +310,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
delta_ns = time - j_sg_cpu->last_update;
if (delta_ns > TICK_NSEC) {
j_sg_cpu->iowait_boost = 0;
+ j_sg_cpu->iowait_boost_pending = false;
continue;
}
if (j_sg_cpu->flags & SCHED_CPUFREQ_RT_DL)
@@ -290,7 +337,7 @@ static void sugov_update_shared(struct update_util_data *hook, u64 time,
unsigned long util, max;
unsigned int next_f;
- sugov_get_util(&util, &max);
+ sugov_get_util(&util, &max, sg_cpu->cpu);
raw_spin_lock(&sg_policy->update_lock);
@@ -445,7 +492,11 @@ static int sugov_kthread_create(struct sugov_policy *sg_policy)
}
sg_policy->thread = thread;
- kthread_bind_mask(thread, policy->related_cpus);
+
+ /* Kthread is bound to all CPUs by default */
+ if (!policy->dvfs_possible_from_any_cpu)
+ kthread_bind_mask(thread, policy->related_cpus);
+
init_irq_work(&sg_policy->irq_work, sugov_irq_work);
mutex_init(&sg_policy->work_lock);
@@ -528,16 +579,7 @@ static int sugov_init(struct cpufreq_policy *policy)
goto stop_kthread;
}
- if (policy->transition_delay_us) {
- tunables->rate_limit_us = policy->transition_delay_us;
- } else {
- unsigned int lat;
-
- tunables->rate_limit_us = LATENCY_MULTIPLIER;
- lat = policy->cpuinfo.transition_latency / NSEC_PER_USEC;
- if (lat)
- tunables->rate_limit_us *= lat;
- }
+ tunables->rate_limit_us = cpufreq_policy_transition_delay_us(policy);
policy->governor_data = sg_policy;
sg_policy->tunables = tunables;
@@ -655,6 +697,7 @@ static void sugov_limits(struct cpufreq_policy *policy)
static struct cpufreq_governor schedutil_gov = {
.name = "schedutil",
.owner = THIS_MODULE,
+ .dynamic_switching = true,
.init = sugov_init,
.exit = sugov_exit,
.start = sugov_start,
@@ -671,6 +714,11 @@ struct cpufreq_governor *cpufreq_default_governor(void)
static int __init sugov_register(void)
{
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ per_cpu(sugov_cpu, cpu).cpu = cpu;
+
return cpufreq_register_governor(&schedutil_gov);
}
fs_initcall(sugov_register);
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 6e3ea4ac1bda..14d2dbf97c53 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -683,7 +683,7 @@ static u64 vtime_delta(struct vtime *vtime)
{
unsigned long long clock;
- clock = sched_clock_cpu(smp_processor_id());
+ clock = sched_clock();
if (clock < vtime->starttime)
return 0;
@@ -814,7 +814,7 @@ void arch_vtime_task_switch(struct task_struct *prev)
write_seqcount_begin(&vtime->seqcount);
vtime->state = VTIME_SYS;
- vtime->starttime = sched_clock_cpu(smp_processor_id());
+ vtime->starttime = sched_clock();
write_seqcount_end(&vtime->seqcount);
}
@@ -826,7 +826,7 @@ void vtime_init_idle(struct task_struct *t, int cpu)
local_irq_save(flags);
write_seqcount_begin(&vtime->seqcount);
vtime->state = VTIME_SYS;
- vtime->starttime = sched_clock_cpu(cpu);
+ vtime->starttime = sched_clock();
write_seqcount_end(&vtime->seqcount);
local_irq_restore(flags);
}
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index a84299f44b5d..5c3bf4bd0327 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1136,7 +1136,7 @@ static void update_curr_dl(struct rq *rq)
}
/* kick cpufreq (see the comment in kernel/sched/sched.h). */
- cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_DL);
+ cpufreq_update_util(rq, SCHED_CPUFREQ_DL);
schedstat_set(curr->se.statistics.exec_max,
max(curr->se.statistics.exec_max, delta_exec));
@@ -1392,17 +1392,19 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
struct sched_dl_entity *pi_se = &p->dl;
/*
- * Use the scheduling parameters of the top pi-waiter
- * task if we have one and its (absolute) deadline is
- * smaller than our one... OTW we keep our runtime and
- * deadline.
+ * Use the scheduling parameters of the top pi-waiter task if:
+ * - we have a top pi-waiter which is a SCHED_DEADLINE task AND
+ * - our dl_boosted is set (i.e. the pi-waiter's (absolute) deadline is
+ * smaller than our deadline OR we are a !SCHED_DEADLINE task getting
+ * boosted due to a SCHED_DEADLINE pi-waiter).
+ * Otherwise we keep our runtime and deadline.
*/
- if (pi_task && p->dl.dl_boosted && dl_prio(pi_task->normal_prio)) {
+ if (pi_task && dl_prio(pi_task->normal_prio) && p->dl.dl_boosted) {
pi_se = &pi_task->dl;
} else if (!dl_prio(p->normal_prio)) {
/*
* Special case in which we have a !SCHED_DEADLINE task
- * that is going to be deboosted, but exceedes its
+ * that is going to be deboosted, but exceeds its
* runtime while doing so. No point in replenishing
* it, as it's going to return back to its original
* scheduling class after this.
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c95880e216f6..d378d02fdfcb 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3278,7 +3278,9 @@ static inline void set_tg_cfs_propagate(struct cfs_rq *cfs_rq) {}
static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
{
- if (&this_rq()->cfs == cfs_rq) {
+ struct rq *rq = rq_of(cfs_rq);
+
+ if (&rq->cfs == cfs_rq) {
/*
* There are a few boundary cases this might miss but it should
* get called often enough that that should (hopefully) not be
@@ -3295,7 +3297,7 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
*
* See cpu_util().
*/
- cpufreq_update_util(rq_of(cfs_rq), 0);
+ cpufreq_update_util(rq, 0);
}
}
@@ -4875,7 +4877,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
* passed.
*/
if (p->in_iowait)
- cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_IOWAIT);
+ cpufreq_update_util(rq, SCHED_CPUFREQ_IOWAIT);
for_each_sched_entity(se) {
if (se->on_rq)
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 45caf937ef90..0af5ca9e3e3f 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -970,7 +970,7 @@ static void update_curr_rt(struct rq *rq)
return;
/* Kick cpufreq (see the comment in kernel/sched/sched.h). */
- cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_RT);
+ cpufreq_update_util(rq, SCHED_CPUFREQ_RT);
schedstat_set(curr->se.statistics.exec_max,
max(curr->se.statistics.exec_max, delta_exec));
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index eeef1a3086d1..aa9d5b87b4f8 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2070,19 +2070,13 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
{
struct update_util_data *data;
- data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data));
+ data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data,
+ cpu_of(rq)));
if (data)
data->func(data, rq_clock(rq), flags);
}
-
-static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags)
-{
- if (cpu_of(rq) == smp_processor_id())
- cpufreq_update_util(rq, flags);
-}
#else
static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
-static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags) {}
#endif /* CONFIG_CPU_FREQ */
#ifdef arch_scale_freq_capacity
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 17f11c6b0a9f..d6afed6d0752 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -70,9 +70,10 @@ static void __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode,
list_for_each_entry_safe(curr, next, &wq_head->head, entry) {
unsigned flags = curr->flags;
-
- if (curr->func(curr, mode, wake_flags, key) &&
- (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
+ int ret = curr->func(curr, mode, wake_flags, key);
+ if (ret < 0)
+ break;
+ if (ret && (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
break;
}
}