From af449901b84c98cbd84a0113223ba3bcfcb12a26 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 17 Sep 2020 10:38:30 +0200 Subject: sched: Add migrate_disable() Add the base migrate_disable() support (under protest). While migrate_disable() is (currently) required for PREEMPT_RT, it is also one of the biggest flaws in the system. Notably this is just the base implementation, it is broken vs sched_setaffinity() and hotplug, both solved in additional patches for ease of review. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Reviewed-by: Daniel Bristot de Oliveira Link: https://lkml.kernel.org/r/20201023102346.818170844@infradead.org --- include/linux/sched.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 063cd120b459..0732356c0eca 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -714,6 +714,9 @@ struct task_struct { int nr_cpus_allowed; const cpumask_t *cpus_ptr; cpumask_t cpus_mask; +#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT) + int migration_disabled; +#endif #ifdef CONFIG_PREEMPT_RCU int rcu_read_lock_nesting; -- cgit From 6d337eab041d56bb8f0e7794f39906c21054c512 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 18 Sep 2020 17:24:31 +0200 Subject: sched: Fix migrate_disable() vs set_cpus_allowed_ptr() Concurrent migrate_disable() and set_cpus_allowed_ptr() has interesting features. We rely on set_cpus_allowed_ptr() to not return until the task runs inside the provided mask. This expectation is exported to userspace. This means that any set_cpus_allowed_ptr() caller must wait until migrate_enable() allows migrations. At the same time, we don't want migrate_enable() to schedule, due to patterns like: preempt_disable(); migrate_disable(); ... migrate_enable(); preempt_enable(); And: raw_spin_lock(&B); spin_unlock(&A); this means that when migrate_enable() must restore the affinity mask, it cannot wait for completion thereof. Luck will have it that that is exactly the case where there is a pending set_cpus_allowed_ptr(), so let that provide storage for the async stop machine. Much thanks to Valentin who used TLA+ most effective and found lots of 'interesting' cases. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Reviewed-by: Daniel Bristot de Oliveira Link: https://lkml.kernel.org/r/20201023102346.921768277@infradead.org --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 0732356c0eca..90a0c92741d7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -714,6 +714,7 @@ struct task_struct { int nr_cpus_allowed; const cpumask_t *cpus_ptr; cpumask_t cpus_mask; + void *migration_pending; #if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT) int migration_disabled; #endif -- cgit From a7c81556ec4d341dfdbf2cc478ead89d73e474a7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 28 Sep 2020 17:06:07 +0200 Subject: sched: Fix migrate_disable() vs rt/dl balancing In order to minimize the interference of migrate_disable() on lower priority tasks, which can be deprived of runtime due to being stuck below a higher priority task. Teach the RT/DL balancers to push away these higher priority tasks when a lower priority task gets selected to run on a freshly demoted CPU (pull). This adds migration interference to the higher priority task, but restores bandwidth to system that would otherwise be irrevocably lost. Without this it would be possible to have all tasks on the system stuck on a single CPU, each task preempted in a migrate_disable() section with a single high priority task running. This way we can still approximate running the M highest priority tasks on the system. Migrating the top task away is (ofcourse) still subject to migrate_disable() too, which means the lower task is subject to an interference equivalent to the worst case migrate_disable() section. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Daniel Bristot de Oliveira Link: https://lkml.kernel.org/r/20201023102347.499155098@infradead.org --- include/linux/sched.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 90a0c92741d7..3af9d52fe093 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -716,8 +716,9 @@ struct task_struct { cpumask_t cpus_mask; void *migration_pending; #if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT) - int migration_disabled; + unsigned short migration_disabled; #endif + unsigned short migration_flags; #ifdef CONFIG_PREEMPT_RCU int rcu_read_lock_nesting; -- cgit From 74d862b682f51e45d25b95b1ecf212428a4967b0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 18 Nov 2020 20:48:42 +0100 Subject: sched: Make migrate_disable/enable() independent of RT Now that the scheduler can deal with migrate disable properly, there is no real compelling reason to make it only available for RT. There are quite some code pathes which needlessly disable preemption in order to prevent migration and some constructs like kmap_atomic() enforce it implicitly. Making it available independent of RT allows to provide a preemptible variant of kmap_atomic() and makes the code more consistent in general. Signed-off-by: Thomas Gleixner Grudgingly-Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20201118204007.269943012@linutronix.de --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 3af9d52fe093..a33f35f68060 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -715,7 +715,7 @@ struct task_struct { const cpumask_t *cpus_ptr; cpumask_t cpus_mask; void *migration_pending; -#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT) +#ifdef CONFIG_SMP unsigned short migration_disabled; #endif unsigned short migration_flags; -- cgit