summaryrefslogtreecommitdiff
path: root/kernel/sched
diff options
context:
space:
mode:
authorMel Gorman <mgorman@techsingularity.net>2020-02-24 09:52:21 +0000
committerIngo Molnar <mingo@kernel.org>2020-02-24 11:36:39 +0100
commit5fb52dd93a2fe9a738f730de9da108bd1f6c30d0 (patch)
treee2834aabfbc0dc1d5487e3e3af318c2efbc80db6 /kernel/sched
parentff7db0bf24db919f69121bf5df8f3cb6d79f49af (diff)
sched/numa: Find an alternative idle CPU if the CPU is part of an active NUMA balance
Multiple tasks can attempt to select and idle CPU but fail because numa_migrate_on is already set and the migration fails. Instead of failing, scan for an alternative idle CPU. select_idle_sibling is not used because it requires IRQs to be disabled and it ignores numa_migrate_on allowing multiple tasks to stack. This scan may still fail if there are idle candidate CPUs due to races but if this occurs, it's best that a task stay on an available CPU that move to a contended one. Signed-off-by: Mel Gorman <mgorman@techsingularity.net> Signed-off-by: Ingo Molnar <mingo@kernel.org> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Vincent Guittot <vincent.guittot@linaro.org> Cc: Juri Lelli <juri.lelli@redhat.com> Cc: Dietmar Eggemann <dietmar.eggemann@arm.com> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Valentin Schneider <valentin.schneider@arm.com> Cc: Phil Auld <pauld@redhat.com> Cc: Hillf Danton <hdanton@sina.com> Link: https://lore.kernel.org/r/20200224095223.13361-12-mgorman@techsingularity.net
Diffstat (limited to 'kernel/sched')
-rw-r--r--kernel/sched/fair.c40
1 files changed, 22 insertions, 18 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2da21f44e4d0..050c1b19bfc0 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1624,15 +1624,34 @@ static void task_numa_assign(struct task_numa_env *env,
{
struct rq *rq = cpu_rq(env->dst_cpu);
- /* Bail out if run-queue part of active NUMA balance. */
- if (env->best_cpu != env->dst_cpu && xchg(&rq->numa_migrate_on, 1))
+ /* Check if run-queue part of active NUMA balance. */
+ if (env->best_cpu != env->dst_cpu && xchg(&rq->numa_migrate_on, 1)) {
+ int cpu;
+ int start = env->dst_cpu;
+
+ /* Find alternative idle CPU. */
+ for_each_cpu_wrap(cpu, cpumask_of_node(env->dst_nid), start) {
+ if (cpu == env->best_cpu || !idle_cpu(cpu) ||
+ !cpumask_test_cpu(cpu, env->p->cpus_ptr)) {
+ continue;
+ }
+
+ env->dst_cpu = cpu;
+ rq = cpu_rq(env->dst_cpu);
+ if (!xchg(&rq->numa_migrate_on, 1))
+ goto assign;
+ }
+
+ /* Failed to find an alternative idle CPU */
return;
+ }
+assign:
/*
* Clear previous best_cpu/rq numa-migrate flag, since task now
* found a better CPU to move/swap.
*/
- if (env->best_cpu != -1) {
+ if (env->best_cpu != -1 && env->best_cpu != env->dst_cpu) {
rq = cpu_rq(env->best_cpu);
WRITE_ONCE(rq->numa_migrate_on, 0);
}
@@ -1806,21 +1825,6 @@ assign:
cpu = env->best_cpu;
}
- /*
- * Use select_idle_sibling if the previously found idle CPU is
- * not idle any more.
- */
- if (!idle_cpu(cpu)) {
- /*
- * select_idle_siblings() uses an per-CPU cpumask that
- * can be used from IRQ context.
- */
- local_irq_disable();
- cpu = select_idle_sibling(env->p, env->src_cpu,
- env->dst_cpu);
- local_irq_enable();
- }
-
env->dst_cpu = cpu;
}