diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-03-30 17:01:51 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-03-30 17:01:51 -0700 |
commit | 642e53ead6aea8740a219ede509a5d138fd4f780 (patch) | |
tree | 5c4680d0c07315dab24fe7333c62f56bc19ec4e4 /kernel/sched/core.c | |
parent | 9b82f05f869a823d43ea4186f5f732f2924d3693 (diff) | |
parent | 313f16e2e35abb833eab5bdebc6ae30699adca18 (diff) |
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar:
"The main changes in this cycle are:
- Various NUMA scheduling updates: harmonize the load-balancer and
NUMA placement logic to not work against each other. The intended
result is better locality, better utilization and fewer migrations.
- Introduce Thermal Pressure tracking and optimizations, to improve
task placement on thermally overloaded systems.
- Implement frequency invariant scheduler accounting on (some) x86
CPUs. This is done by observing and sampling the 'recent' CPU
frequency average at ~tick boundaries. The CPU provides this data
via the APERF/MPERF MSRs. This hopefully makes our capacity
estimates more precise and keeps tasks on the same CPU better even
if it might seem overloaded at a lower momentary frequency. (As
usual, turbo mode is a complication that we resolve by observing
the maximum frequency and renormalizing to it.)
- Add asymmetric CPU capacity wakeup scan to improve capacity
utilization on asymmetric topologies. (big.LITTLE systems)
- PSI fixes and optimizations.
- RT scheduling capacity awareness fixes & improvements.
- Optimize the CONFIG_RT_GROUP_SCHED constraints code.
- Misc fixes, cleanups and optimizations - see the changelog for
details"
* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (62 commits)
threads: Update PID limit comment according to futex UAPI change
sched/fair: Fix condition of avg_load calculation
sched/rt: cpupri_find: Trigger a full search as fallback
kthread: Do not preempt current task if it is going to call schedule()
sched/fair: Improve spreading of utilization
sched: Avoid scale real weight down to zero
psi: Move PF_MEMSTALL out of task->flags
MAINTAINERS: Add maintenance information for psi
psi: Optimize switching tasks inside shared cgroups
psi: Fix cpu.pressure for cpu.max and competing cgroups
sched/core: Distribute tasks within affinity masks
sched/fair: Fix enqueue_task_fair warning
thermal/cpu-cooling, sched/core: Move the arch_set_thermal_pressure() API to generic scheduler code
sched/rt: Remove unnecessary push for unfit tasks
sched/rt: Allow pulling unfitting task
sched/rt: Optimize cpupri_find() on non-heterogenous systems
sched/rt: Re-instate old behavior in select_task_rq_rt()
sched/rt: cpupri_find: Implement fallback mechanism for !fit case
sched/fair: Fix reordering of enqueue/dequeue_task_fair()
sched/fair: Fix runnable_avg for throttled cfs
...
Diffstat (limited to 'kernel/sched/core.c')
-rw-r--r-- | kernel/sched/core.c | 27 |
1 files changed, 23 insertions, 4 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 1a9983da4408..c1f923d647ee 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -761,7 +761,6 @@ static void set_load_weight(struct task_struct *p, bool update_load) if (task_has_idle_policy(p)) { load->weight = scale_load(WEIGHT_IDLEPRIO); load->inv_weight = WMULT_IDLEPRIO; - p->se.runnable_weight = load->weight; return; } @@ -774,7 +773,6 @@ static void set_load_weight(struct task_struct *p, bool update_load) } else { load->weight = scale_load(sched_prio_to_weight[prio]); load->inv_weight = sched_prio_to_wmult[prio]; - p->se.runnable_weight = load->weight; } } @@ -1652,7 +1650,12 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, if (cpumask_equal(p->cpus_ptr, new_mask)) goto out; - dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask); + /* + * Picking a ~random cpu helps in cases where we are changing affinity + * for groups of tasks (ie. cpuset), so that load balancing is not + * immediately required to distribute the tasks within their new mask. + */ + dest_cpu = cpumask_any_and_distribute(cpu_valid_mask, new_mask); if (dest_cpu >= nr_cpu_ids) { ret = -EINVAL; goto out; @@ -3578,6 +3581,17 @@ unsigned long long task_sched_runtime(struct task_struct *p) return ns; } +DEFINE_PER_CPU(unsigned long, thermal_pressure); + +void arch_set_thermal_pressure(struct cpumask *cpus, + unsigned long th_pressure) +{ + int cpu; + + for_each_cpu(cpu, cpus) + WRITE_ONCE(per_cpu(thermal_pressure, cpu), th_pressure); +} + /* * This function gets called by the timer code, with HZ frequency. * We call it with interrupts disabled. @@ -3588,12 +3602,16 @@ void scheduler_tick(void) struct rq *rq = cpu_rq(cpu); struct task_struct *curr = rq->curr; struct rq_flags rf; + unsigned long thermal_pressure; + arch_scale_freq_tick(); sched_clock_tick(); rq_lock(rq, &rf); update_rq_clock(rq); + thermal_pressure = arch_scale_thermal_pressure(cpu_of(rq)); + update_thermal_load_avg(rq_clock_thermal(rq), rq, thermal_pressure); curr->sched_class->task_tick(rq, curr, 0); calc_global_load_tick(rq); psi_task_tick(rq); @@ -3671,7 +3689,6 @@ static void sched_tick_remote(struct work_struct *work) if (cpu_is_offline(cpu)) goto out_unlock; - curr = rq->curr; update_rq_clock(rq); if (!is_idle_task(curr)) { @@ -4074,6 +4091,8 @@ static void __sched notrace __schedule(bool preempt) */ ++*switch_count; + psi_sched_switch(prev, next, !task_on_rq_queued(prev)); + trace_sched_switch(preempt, prev, next); /* Also unlocks the rq: */ |