summaryrefslogtreecommitdiff
path: root/kernel/sched/core.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-03-30 17:01:51 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-03-30 17:01:51 -0700
commit642e53ead6aea8740a219ede509a5d138fd4f780 (patch)
tree5c4680d0c07315dab24fe7333c62f56bc19ec4e4 /kernel/sched/core.c
parent9b82f05f869a823d43ea4186f5f732f2924d3693 (diff)
parent313f16e2e35abb833eab5bdebc6ae30699adca18 (diff)
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar: "The main changes in this cycle are: - Various NUMA scheduling updates: harmonize the load-balancer and NUMA placement logic to not work against each other. The intended result is better locality, better utilization and fewer migrations. - Introduce Thermal Pressure tracking and optimizations, to improve task placement on thermally overloaded systems. - Implement frequency invariant scheduler accounting on (some) x86 CPUs. This is done by observing and sampling the 'recent' CPU frequency average at ~tick boundaries. The CPU provides this data via the APERF/MPERF MSRs. This hopefully makes our capacity estimates more precise and keeps tasks on the same CPU better even if it might seem overloaded at a lower momentary frequency. (As usual, turbo mode is a complication that we resolve by observing the maximum frequency and renormalizing to it.) - Add asymmetric CPU capacity wakeup scan to improve capacity utilization on asymmetric topologies. (big.LITTLE systems) - PSI fixes and optimizations. - RT scheduling capacity awareness fixes & improvements. - Optimize the CONFIG_RT_GROUP_SCHED constraints code. - Misc fixes, cleanups and optimizations - see the changelog for details" * 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (62 commits) threads: Update PID limit comment according to futex UAPI change sched/fair: Fix condition of avg_load calculation sched/rt: cpupri_find: Trigger a full search as fallback kthread: Do not preempt current task if it is going to call schedule() sched/fair: Improve spreading of utilization sched: Avoid scale real weight down to zero psi: Move PF_MEMSTALL out of task->flags MAINTAINERS: Add maintenance information for psi psi: Optimize switching tasks inside shared cgroups psi: Fix cpu.pressure for cpu.max and competing cgroups sched/core: Distribute tasks within affinity masks sched/fair: Fix enqueue_task_fair warning thermal/cpu-cooling, sched/core: Move the arch_set_thermal_pressure() API to generic scheduler code sched/rt: Remove unnecessary push for unfit tasks sched/rt: Allow pulling unfitting task sched/rt: Optimize cpupri_find() on non-heterogenous systems sched/rt: Re-instate old behavior in select_task_rq_rt() sched/rt: cpupri_find: Implement fallback mechanism for !fit case sched/fair: Fix reordering of enqueue/dequeue_task_fair() sched/fair: Fix runnable_avg for throttled cfs ...
Diffstat (limited to 'kernel/sched/core.c')
-rw-r--r--kernel/sched/core.c27
1 files changed, 23 insertions, 4 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 1a9983da4408..c1f923d647ee 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -761,7 +761,6 @@ static void set_load_weight(struct task_struct *p, bool update_load)
if (task_has_idle_policy(p)) {
load->weight = scale_load(WEIGHT_IDLEPRIO);
load->inv_weight = WMULT_IDLEPRIO;
- p->se.runnable_weight = load->weight;
return;
}
@@ -774,7 +773,6 @@ static void set_load_weight(struct task_struct *p, bool update_load)
} else {
load->weight = scale_load(sched_prio_to_weight[prio]);
load->inv_weight = sched_prio_to_wmult[prio];
- p->se.runnable_weight = load->weight;
}
}
@@ -1652,7 +1650,12 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
if (cpumask_equal(p->cpus_ptr, new_mask))
goto out;
- dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
+ /*
+ * Picking a ~random cpu helps in cases where we are changing affinity
+ * for groups of tasks (ie. cpuset), so that load balancing is not
+ * immediately required to distribute the tasks within their new mask.
+ */
+ dest_cpu = cpumask_any_and_distribute(cpu_valid_mask, new_mask);
if (dest_cpu >= nr_cpu_ids) {
ret = -EINVAL;
goto out;
@@ -3578,6 +3581,17 @@ unsigned long long task_sched_runtime(struct task_struct *p)
return ns;
}
+DEFINE_PER_CPU(unsigned long, thermal_pressure);
+
+void arch_set_thermal_pressure(struct cpumask *cpus,
+ unsigned long th_pressure)
+{
+ int cpu;
+
+ for_each_cpu(cpu, cpus)
+ WRITE_ONCE(per_cpu(thermal_pressure, cpu), th_pressure);
+}
+
/*
* This function gets called by the timer code, with HZ frequency.
* We call it with interrupts disabled.
@@ -3588,12 +3602,16 @@ void scheduler_tick(void)
struct rq *rq = cpu_rq(cpu);
struct task_struct *curr = rq->curr;
struct rq_flags rf;
+ unsigned long thermal_pressure;
+ arch_scale_freq_tick();
sched_clock_tick();
rq_lock(rq, &rf);
update_rq_clock(rq);
+ thermal_pressure = arch_scale_thermal_pressure(cpu_of(rq));
+ update_thermal_load_avg(rq_clock_thermal(rq), rq, thermal_pressure);
curr->sched_class->task_tick(rq, curr, 0);
calc_global_load_tick(rq);
psi_task_tick(rq);
@@ -3671,7 +3689,6 @@ static void sched_tick_remote(struct work_struct *work)
if (cpu_is_offline(cpu))
goto out_unlock;
- curr = rq->curr;
update_rq_clock(rq);
if (!is_idle_task(curr)) {
@@ -4074,6 +4091,8 @@ static void __sched notrace __schedule(bool preempt)
*/
++*switch_count;
+ psi_sched_switch(prev, next, !task_on_rq_queued(prev));
+
trace_sched_switch(preempt, prev, next);
/* Also unlocks the rq: */