From 1ea6c46a23f1213d1972bfae220db5c165e27bba Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 6 May 2017 15:59:54 +0200 Subject: sched/fair: Propagate an effective runnable_load_avg The load balancer uses runnable_load_avg as load indicator. For !cgroup this is: runnable_load_avg = \Sum se->avg.load_avg ; where se->on_rq That is, a direct sum of all runnable tasks on that runqueue. As opposed to load_avg, which is a sum of all tasks on the runqueue, which includes a blocked component. However, in the cgroup case, this comes apart since the group entities are always runnable, even if most of their constituent entities are blocked. Therefore introduce a runnable_weight which for task entities is the same as the regular weight, but for group entities is a fraction of the entity weight and represents the runnable part of the group runqueue. Then propagate this load through the PELT hierarchy to arrive at an effective runnable load avgerage -- which we should not confuse with the canonical runnable load average. Suggested-by: Tejun Heo Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 26a7df4e558c..bdd6ad6fcce1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -331,9 +331,11 @@ struct load_weight { struct sched_avg { u64 last_update_time; u64 load_sum; + u64 runnable_load_sum; u32 util_sum; u32 period_contrib; unsigned long load_avg; + unsigned long runnable_load_avg; unsigned long util_avg; }; @@ -376,6 +378,7 @@ struct sched_statistics { struct sched_entity { /* For load-balancing: */ struct load_weight load; + unsigned long runnable_weight; struct rb_node run_node; struct list_head group_node; unsigned int on_rq; -- cgit From 1d48b080bcce0a5e7d7aa2dbcdb35deefc188c3f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 29 Sep 2017 13:50:16 +0200 Subject: sched/debug: Rename task-state printing helpers Steve requested better names for the new task-state helper functions. So introduce the concept of task-state index for the printing and rename __get_task_state() to task_state_index() and __task_state_to_char() to task_index_to_char(). Requested-by: Steven Rostedt Signed-off-by: Peter Zijlstra (Intel) Acked-by: Steven Rostedt Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170929115016.pzlqc7ss3ccystyg@hirez.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- include/linux/sched.h | 6 +++--- include/trace/events/sched.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index bdd6ad6fcce1..33a01f4deb00 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1248,7 +1248,7 @@ static inline pid_t task_pgrp_nr(struct task_struct *tsk) #define TASK_REPORT_IDLE (TASK_REPORT + 1) #define TASK_REPORT_MAX (TASK_REPORT_IDLE << 1) -static inline unsigned int __get_task_state(struct task_struct *tsk) +static inline unsigned int task_state_index(struct task_struct *tsk) { unsigned int tsk_state = READ_ONCE(tsk->state); unsigned int state = (tsk_state | tsk->exit_state) & TASK_REPORT; @@ -1261,7 +1261,7 @@ static inline unsigned int __get_task_state(struct task_struct *tsk) return fls(state); } -static inline char __task_state_to_char(unsigned int state) +static inline char task_index_to_char(unsigned int state) { static const char state_char[] = "RSDTtXZPI"; @@ -1272,7 +1272,7 @@ static inline char __task_state_to_char(unsigned int state) static inline char task_state_to_char(struct task_struct *tsk) { - return __task_state_to_char(__get_task_state(tsk)); + return task_index_to_char(task_state_index(tsk)); } /** diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 3c8b7f625670..fab74a12ca0f 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -117,7 +117,7 @@ static inline long __trace_sched_switch_state(bool preempt, struct task_struct * if (preempt) return TASK_STATE_MAX; - return __get_task_state(p); + return task_state_index(p); } #endif /* CREATE_TRACE_POINTS */ -- cgit From 799ba82de01e7543f6b2042e1a739f3a20255f23 Mon Sep 17 00:00:00 2001 From: luca abeni Date: Thu, 7 Sep 2017 12:09:31 +0200 Subject: sched/deadline: Use C bitfields for the state flags Ask the compiler to use a single bit for storing true / false values, instead of wasting the size of a whole int value. Tested with gcc 5.4.0 on x86_64, and the compiler produces the expected Assembly (similar to the Assembly code generated when explicitly accessing the bits with bitmasks, "&" and "|"). Signed-off-by: luca abeni Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Daniel Bristot de Oliveira Cc: Juri Lelli Cc: Linus Torvalds Cc: Mathieu Poirier Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1504778971-13573-5-git-send-email-luca.abeni@santannapisa.it Signed-off-by: Ingo Molnar --- include/linux/sched.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 33a01f4deb00..0f897dfc195e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -474,10 +474,10 @@ struct sched_dl_entity { * conditions between the inactive timer handler and the wakeup * code. */ - int dl_throttled; - int dl_boosted; - int dl_yielded; - int dl_non_contending; + int dl_throttled : 1; + int dl_boosted : 1; + int dl_yielded : 1; + int dl_non_contending : 1; /* * Bandwidth enforcement timer. Each -deadline task has its -- cgit From ff0d4a9dc16b1f4c954f6407c233ab848bdfe8b0 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 4 Oct 2017 17:49:00 +0200 Subject: sched/rt: Add a helper to test for a RT task This helper returns true if a task has elevated priority which is true for RT tasks (SCHED_RR and SCHED_FIFO) and also for SCHED_DEADLINE. A task which runs at RT priority due to PI-boosting is not considered as one with elevated priority. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Cc: Jens Axboe Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20171004154901.26904-1-bigeasy@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/sched/rt.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h index f93329aba31a..133001627ba1 100644 --- a/include/linux/sched/rt.h +++ b/include/linux/sched/rt.h @@ -17,6 +17,17 @@ static inline int rt_task(struct task_struct *p) return rt_prio(p->prio); } +static inline bool task_is_realtime(struct task_struct *tsk) +{ + int policy = tsk->policy; + + if (policy == SCHED_FIFO || policy == SCHED_RR) + return true; + if (policy == SCHED_DEADLINE) + return true; + return false; +} + #ifdef CONFIG_RT_MUTEXES /* * Must hold either p->pi_lock or task_rq(p)->lock. -- cgit From 36436440cd19f59f5be12a1b181d299af2725140 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 4 Oct 2017 17:49:01 +0200 Subject: block/ioprio: Use a helper to check for RT prio A side-effect to the old code is that now SCHED_DEADLINE is also recognized. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Cc: Jens Axboe Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20171004154901.26904-2-bigeasy@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/ioprio.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index 8c1239020d79..2f19aab84a4a 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -2,6 +2,7 @@ #define IOPRIO_H #include +#include #include /* @@ -62,7 +63,7 @@ static inline int task_nice_ioclass(struct task_struct *task) { if (task->policy == SCHED_IDLE) return IOPRIO_CLASS_IDLE; - else if (task->policy == SCHED_FIFO || task->policy == SCHED_RR) + else if (task_is_realtime(task)) return IOPRIO_CLASS_RT; else return IOPRIO_CLASS_BE; -- cgit From e22cdc3fc5991956146b9856d36b4971fe54dcd6 Mon Sep 17 00:00:00 2001 From: Rakib Mullick Date: Mon, 23 Oct 2017 19:01:54 +0600 Subject: sched/isolcpus: Fix "isolcpus=" boot parameter handling when !CONFIG_CPUMASK_OFFSTACK cpulist_parse() uses nr_cpumask_bits as a limit to parse the passed buffer from kernel commandline. What nr_cpumask_bits represents varies depending upon the CONFIG_CPUMASK_OFFSTACK option: - If CONFIG_CPUMASK_OFFSTACK=n, then nr_cpumask_bits is the same as NR_CPUS, which might not represent the # of CPUs that really exist (default 64). So, there's a chance of a gap between nr_cpu_ids and NR_CPUS, which ultimately lead towards invalid cpulist_parse() operation. For example, if isolcpus=9 is passed on an 8 cpu system (CONFIG_CPUMASK_OFFSTACK=n) it doesn't show the error that it's supposed to. This patch fixes this bug by finding the last CPU of the passed isolcpus= list and checking it against nr_cpu_ids. It also fixes the error message where the nr_cpu_ids should be nr_cpu_ids-1, since CPU numbering starts from 0. Signed-off-by: Rakib Mullick Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: adobriyan@gmail.com Cc: akpm@linux-foundation.org Cc: longman@redhat.com Cc: mka@chromium.org Cc: tj@kernel.org Link: http://lkml.kernel.org/r/20171023130154.9050-1-rakib.mullick@gmail.com [ Enhanced the changelog and the kernel message. ] Signed-off-by: Ingo Molnar include/linux/cpumask.h | 16 ++++++++++++++++ kernel/sched/topology.c | 4 ++-- 2 files changed, 18 insertions(+), 2 deletions(-) --- include/linux/cpumask.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index cd415b733c2a..63661de67ad4 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -130,6 +130,11 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp) return 0; } +static inline unsigned int cpumask_last(const struct cpumask *srcp) +{ + return 0; +} + /* Valid inputs for n are -1 and 0. */ static inline unsigned int cpumask_next(int n, const struct cpumask *srcp) { @@ -178,6 +183,17 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp) return find_first_bit(cpumask_bits(srcp), nr_cpumask_bits); } +/** + * cpumask_last - get the last CPU in a cpumask + * @srcp: - the cpumask pointer + * + * Returns >= nr_cpumask_bits if no CPUs set. + */ +static inline unsigned int cpumask_last(const struct cpumask *srcp) +{ + return find_last_bit(cpumask_bits(srcp), nr_cpumask_bits); +} + unsigned int cpumask_next(int n, const struct cpumask *srcp); /** -- cgit From 7863406143d8bbbbda07a61285c5f4c217908dfd Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 27 Oct 2017 04:42:28 +0200 Subject: sched/isolation: Move housekeeping related code to its own file The housekeeping code is currently tied to the NOHZ code. As we are planning to make housekeeping independent from it, start with moving the relevant code to its own file. Signed-off-by: Frederic Weisbecker Acked-by: Thomas Gleixner Acked-by: Paul E. McKenney Cc: Chris Metcalf Cc: Christoph Lameter Cc: Linus Torvalds Cc: Luiz Capitulino Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Rik van Riel Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1509072159-31808-2-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/isolation.h | 56 +++++++++++++++++++++++++++++++++++++++++ include/linux/tick.h | 37 --------------------------- 2 files changed, 56 insertions(+), 37 deletions(-) create mode 100644 include/linux/sched/isolation.h (limited to 'include') diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h new file mode 100644 index 000000000000..b7cfbc46286c --- /dev/null +++ b/include/linux/sched/isolation.h @@ -0,0 +1,56 @@ +#ifndef _LINUX_SCHED_ISOLATION_H +#define _LINUX_SCHED_ISOLATION_H + +#include +#include +#include + +#ifdef CONFIG_NO_HZ_FULL +extern cpumask_var_t housekeeping_mask; + +static inline int housekeeping_any_cpu(void) +{ + return cpumask_any_and(housekeeping_mask, cpu_online_mask); +} + +extern void __init housekeeping_init(void); + +#else + +static inline int housekeeping_any_cpu(void) +{ + return smp_processor_id(); +} + +static inline void housekeeping_init(void) { } +#endif /* CONFIG_NO_HZ_FULL */ + + +static inline const struct cpumask *housekeeping_cpumask(void) +{ +#ifdef CONFIG_NO_HZ_FULL + if (tick_nohz_full_enabled()) + return housekeeping_mask; +#endif + return cpu_possible_mask; +} + +static inline bool is_housekeeping_cpu(int cpu) +{ +#ifdef CONFIG_NO_HZ_FULL + if (tick_nohz_full_enabled()) + return cpumask_test_cpu(cpu, housekeeping_mask); +#endif + return true; +} + +static inline void housekeeping_affine(struct task_struct *t) +{ +#ifdef CONFIG_NO_HZ_FULL + if (tick_nohz_full_enabled()) + set_cpus_allowed_ptr(t, housekeeping_mask); + +#endif +} + +#endif /* _LINUX_SCHED_ISOLATION_H */ diff --git a/include/linux/tick.h b/include/linux/tick.h index fe01e68bf520..68afc09aa8ac 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -137,7 +137,6 @@ static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; } #ifdef CONFIG_NO_HZ_FULL extern bool tick_nohz_full_running; extern cpumask_var_t tick_nohz_full_mask; -extern cpumask_var_t housekeeping_mask; static inline bool tick_nohz_full_enabled(void) { @@ -161,11 +160,6 @@ static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) cpumask_or(mask, mask, tick_nohz_full_mask); } -static inline int housekeeping_any_cpu(void) -{ - return cpumask_any_and(housekeeping_mask, cpu_online_mask); -} - extern void tick_nohz_dep_set(enum tick_dep_bits bit); extern void tick_nohz_dep_clear(enum tick_dep_bits bit); extern void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit); @@ -235,10 +229,6 @@ static inline void tick_dep_clear_signal(struct signal_struct *signal, extern void tick_nohz_full_kick_cpu(int cpu); extern void __tick_nohz_task_switch(void); #else -static inline int housekeeping_any_cpu(void) -{ - return smp_processor_id(); -} static inline bool tick_nohz_full_enabled(void) { return false; } static inline bool tick_nohz_full_cpu(int cpu) { return false; } static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { } @@ -260,33 +250,6 @@ static inline void tick_nohz_full_kick_cpu(int cpu) { } static inline void __tick_nohz_task_switch(void) { } #endif -static inline const struct cpumask *housekeeping_cpumask(void) -{ -#ifdef CONFIG_NO_HZ_FULL - if (tick_nohz_full_enabled()) - return housekeeping_mask; -#endif - return cpu_possible_mask; -} - -static inline bool is_housekeeping_cpu(int cpu) -{ -#ifdef CONFIG_NO_HZ_FULL - if (tick_nohz_full_enabled()) - return cpumask_test_cpu(cpu, housekeeping_mask); -#endif - return true; -} - -static inline void housekeeping_affine(struct task_struct *t) -{ -#ifdef CONFIG_NO_HZ_FULL - if (tick_nohz_full_enabled()) - set_cpus_allowed_ptr(t, housekeeping_mask); - -#endif -} - static inline void tick_nohz_task_switch(void) { if (tick_nohz_full_enabled()) -- cgit From 9f0ca2d97ef0b5e966be2cfef26c7c094ec14e41 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 27 Oct 2017 04:42:30 +0200 Subject: sched/isolation: Provide a dynamic off-case to housekeeping_any_cpu() housekeeping_any_cpu() doesn't handle correctly the case where CONFIG_NO_HZ_FULL=y and no CPU is in nohz_full mode. So far no caller needs this but let's prepare to avoid any future surprise. Signed-off-by: Frederic Weisbecker Acked-by: Thomas Gleixner Cc: Chris Metcalf Cc: Christoph Lameter Cc: Linus Torvalds Cc: Luiz Capitulino Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1509072159-31808-4-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/isolation.h | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index b7cfbc46286c..040df04fa78a 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -7,25 +7,20 @@ #ifdef CONFIG_NO_HZ_FULL extern cpumask_var_t housekeeping_mask; - -static inline int housekeeping_any_cpu(void) -{ - return cpumask_any_and(housekeeping_mask, cpu_online_mask); -} - extern void __init housekeeping_init(void); - #else +static inline void housekeeping_init(void) { } +#endif /* CONFIG_NO_HZ_FULL */ static inline int housekeeping_any_cpu(void) { +#ifdef CONFIG_NO_HZ_FULL + if (tick_nohz_full_enabled()) + return cpumask_any_and(housekeeping_mask, cpu_online_mask); +#endif return smp_processor_id(); } -static inline void housekeeping_init(void) { } -#endif /* CONFIG_NO_HZ_FULL */ - - static inline const struct cpumask *housekeeping_cpumask(void) { #ifdef CONFIG_NO_HZ_FULL -- cgit From 7e56a1cf4b28f5739526877b8dbad623fae2e4e7 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 27 Oct 2017 04:42:31 +0200 Subject: sched/isolation: Make the housekeeping cpumask private Nobody needs to access this detail. housekeeping_cpumask() already takes care of it. Signed-off-by: Frederic Weisbecker Acked-by: Thomas Gleixner Cc: Chris Metcalf Cc: Christoph Lameter Cc: Linus Torvalds Cc: Luiz Capitulino Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1509072159-31808-5-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/isolation.h | 31 ++++++++++--------------------- 1 file changed, 10 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index 040df04fa78a..ed935ffc6ffa 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -6,46 +6,35 @@ #include #ifdef CONFIG_NO_HZ_FULL -extern cpumask_var_t housekeeping_mask; +extern int housekeeping_any_cpu(void); +extern const struct cpumask *housekeeping_cpumask(void); +extern void housekeeping_affine(struct task_struct *t); +extern bool housekeeping_test_cpu(int cpu); extern void __init housekeeping_init(void); + #else -static inline void housekeeping_init(void) { } -#endif /* CONFIG_NO_HZ_FULL */ static inline int housekeeping_any_cpu(void) { -#ifdef CONFIG_NO_HZ_FULL - if (tick_nohz_full_enabled()) - return cpumask_any_and(housekeeping_mask, cpu_online_mask); -#endif return smp_processor_id(); } static inline const struct cpumask *housekeeping_cpumask(void) { -#ifdef CONFIG_NO_HZ_FULL - if (tick_nohz_full_enabled()) - return housekeeping_mask; -#endif return cpu_possible_mask; } +static inline void housekeeping_affine(struct task_struct *t) { } +static inline void housekeeping_init(void) { } +#endif /* CONFIG_NO_HZ_FULL */ + static inline bool is_housekeeping_cpu(int cpu) { #ifdef CONFIG_NO_HZ_FULL if (tick_nohz_full_enabled()) - return cpumask_test_cpu(cpu, housekeeping_mask); + return housekeeping_test_cpu(cpu); #endif return true; } -static inline void housekeeping_affine(struct task_struct *t) -{ -#ifdef CONFIG_NO_HZ_FULL - if (tick_nohz_full_enabled()) - set_cpus_allowed_ptr(t, housekeeping_mask); - -#endif -} - #endif /* _LINUX_SCHED_ISOLATION_H */ -- cgit From e179f5a04ba46ee5c5439480c2bfd68c358168b7 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 27 Oct 2017 04:42:32 +0200 Subject: sched/isolation: Use its own static key Housekeeping code still depends on the nohz_full static key. Since we want to decouple housekeeping from NOHZ, let's create a housekeeping specific static key. It's mostly relevant for calls to is_housekeeping_cpu() from the scheduler. Signed-off-by: Frederic Weisbecker Acked-by: Thomas Gleixner Cc: Chris Metcalf Cc: Christoph Lameter Cc: Linus Torvalds Cc: Luiz Capitulino Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1509072159-31808-6-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/isolation.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index ed935ffc6ffa..194c586fbb12 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -6,6 +6,7 @@ #include #ifdef CONFIG_NO_HZ_FULL +DECLARE_STATIC_KEY_FALSE(housekeeping_overriden); extern int housekeeping_any_cpu(void); extern const struct cpumask *housekeeping_cpumask(void); extern void housekeeping_affine(struct task_struct *t); @@ -31,7 +32,7 @@ static inline void housekeeping_init(void) { } static inline bool is_housekeeping_cpu(int cpu) { #ifdef CONFIG_NO_HZ_FULL - if (tick_nohz_full_enabled()) + if (static_branch_unlikely(&housekeeping_overriden)) return housekeeping_test_cpu(cpu); #endif return true; -- cgit From 204c083a009378dfa751175b5fcddc75988bab6c Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 27 Oct 2017 04:42:33 +0200 Subject: sched/isolation: Rename is_housekeeping_cpu() to housekeeping_cpu() Fit it into the housekeeping_*() namespace. Signed-off-by: Frederic Weisbecker Acked-by: Thomas Gleixner Cc: Chris Metcalf Cc: Christoph Lameter Cc: Linus Torvalds Cc: Luiz Capitulino Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1509072159-31808-7-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/isolation.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index 194c586fbb12..ad0f5d986a2e 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -29,7 +29,7 @@ static inline void housekeeping_affine(struct task_struct *t) { } static inline void housekeeping_init(void) { } #endif /* CONFIG_NO_HZ_FULL */ -static inline bool is_housekeeping_cpu(int cpu) +static inline bool housekeeping_cpu(int cpu) { #ifdef CONFIG_NO_HZ_FULL if (static_branch_unlikely(&housekeeping_overriden)) -- cgit From 5c4991e24c69737bd41fc2737b1e3980abbf73f9 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 27 Oct 2017 04:42:34 +0200 Subject: sched/isolation: Split out new CONFIG_CPU_ISOLATION=y config from CONFIG_NO_HZ_FULL Split the housekeeping config from CONFIG_NO_HZ_FULL. This way we finally separate the isolation code from NOHZ. Although a dependency to CONFIG_NO_HZ_FULL remains for now, while the housekeeping code still deals with NOHZ internals. Signed-off-by: Frederic Weisbecker Acked-by: Thomas Gleixner Cc: Chris Metcalf Cc: Christoph Lameter Cc: Linus Torvalds Cc: Luiz Capitulino Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1509072159-31808-8-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/isolation.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index ad0f5d986a2e..93ac2367a520 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -5,7 +5,7 @@ #include #include -#ifdef CONFIG_NO_HZ_FULL +#ifdef CONFIG_CPU_ISOLATION DECLARE_STATIC_KEY_FALSE(housekeeping_overriden); extern int housekeeping_any_cpu(void); extern const struct cpumask *housekeeping_cpumask(void); @@ -27,11 +27,11 @@ static inline const struct cpumask *housekeeping_cpumask(void) static inline void housekeeping_affine(struct task_struct *t) { } static inline void housekeeping_init(void) { } -#endif /* CONFIG_NO_HZ_FULL */ +#endif /* CONFIG_CPU_ISOLATION */ static inline bool housekeeping_cpu(int cpu) { -#ifdef CONFIG_NO_HZ_FULL +#ifdef CONFIG_CPU_ISOLATION if (static_branch_unlikely(&housekeeping_overriden)) return housekeeping_test_cpu(cpu); #endif -- cgit From de201559df872f83d0c08fb4effe3efd28e6cbc8 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 27 Oct 2017 04:42:35 +0200 Subject: sched/isolation: Introduce housekeeping flags Before we implement isolcpus under housekeeping, we need the isolation features to be more finegrained. For example some people want NOHZ_FULL without the full scheduler isolation, others want full scheduler isolation without NOHZ_FULL. So let's cut all these isolation features piecewise, at the risk of overcutting it right now. We can still merge some flags later if they always make sense together. Signed-off-by: Frederic Weisbecker Acked-by: Thomas Gleixner Cc: Chris Metcalf Cc: Christoph Lameter Cc: Linus Torvalds Cc: Luiz Capitulino Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1509072159-31808-9-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/isolation.h | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index 93ac2367a520..9bb753eece3b 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -5,35 +5,43 @@ #include #include +enum hk_flags { + HK_FLAG_TIMER = 1, + HK_FLAG_RCU = (1 << 1), + HK_FLAG_MISC = (1 << 2), + HK_FLAG_SCHED = (1 << 3), +}; + #ifdef CONFIG_CPU_ISOLATION DECLARE_STATIC_KEY_FALSE(housekeeping_overriden); -extern int housekeeping_any_cpu(void); -extern const struct cpumask *housekeeping_cpumask(void); -extern void housekeeping_affine(struct task_struct *t); -extern bool housekeeping_test_cpu(int cpu); +extern int housekeeping_any_cpu(enum hk_flags flags); +extern const struct cpumask *housekeeping_cpumask(enum hk_flags flags); +extern void housekeeping_affine(struct task_struct *t, enum hk_flags flags); +extern bool housekeeping_test_cpu(int cpu, enum hk_flags flags); extern void __init housekeeping_init(void); #else -static inline int housekeeping_any_cpu(void) +static inline int housekeeping_any_cpu(enum hk_flags flags) { return smp_processor_id(); } -static inline const struct cpumask *housekeeping_cpumask(void) +static inline const struct cpumask *housekeeping_cpumask(enum hk_flags flags) { return cpu_possible_mask; } -static inline void housekeeping_affine(struct task_struct *t) { } +static inline void housekeeping_affine(struct task_struct *t, + enum hk_flags flags) { } static inline void housekeeping_init(void) { } #endif /* CONFIG_CPU_ISOLATION */ -static inline bool housekeeping_cpu(int cpu) +static inline bool housekeeping_cpu(int cpu, enum hk_flags flags) { #ifdef CONFIG_CPU_ISOLATION if (static_branch_unlikely(&housekeeping_overriden)) - return housekeeping_test_cpu(cpu); + return housekeeping_test_cpu(cpu, flags); #endif return true; } -- cgit From 6f1982fedd59856bcc42a9b521be4c3ffd2f60a7 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 27 Oct 2017 04:42:36 +0200 Subject: sched/isolation: Handle the nohz_full= parameter We want to centralize the isolation management, done by the housekeeping subsystem. Therefore we need to handle the nohz_full= parameter from there. Since nohz_full= so far has involved unbound timers, watchdog, RCU and tilegx NAPI isolation, we keep that default behaviour. nohz_full= will be deprecated in the future. We want to control the isolation features from the isolcpus= parameter. Signed-off-by: Frederic Weisbecker Acked-by: Thomas Gleixner Cc: Chris Metcalf Cc: Christoph Lameter Cc: Linus Torvalds Cc: Luiz Capitulino Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1509072159-31808-10-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/isolation.h | 1 + include/linux/tick.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index 9bb753eece3b..e53cfa96e91e 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -10,6 +10,7 @@ enum hk_flags { HK_FLAG_RCU = (1 << 1), HK_FLAG_MISC = (1 << 2), HK_FLAG_SCHED = (1 << 3), + HK_FLAG_TICK = (1 << 4), }; #ifdef CONFIG_CPU_ISOLATION diff --git a/include/linux/tick.h b/include/linux/tick.h index 68afc09aa8ac..e2a163a9f96c 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -228,6 +228,7 @@ static inline void tick_dep_clear_signal(struct signal_struct *signal, extern void tick_nohz_full_kick_cpu(int cpu); extern void __tick_nohz_task_switch(void); +extern void __init tick_nohz_full_setup(cpumask_var_t cpumask); #else static inline bool tick_nohz_full_enabled(void) { return false; } static inline bool tick_nohz_full_cpu(int cpu) { return false; } @@ -248,6 +249,7 @@ static inline void tick_dep_clear_signal(struct signal_struct *signal, static inline void tick_nohz_full_kick_cpu(int cpu) { } static inline void __tick_nohz_task_switch(void) { } +static inline void tick_nohz_full_setup(cpumask_var_t cpumask) { } #endif static inline void tick_nohz_task_switch(void) -- cgit From edb9382175c3ebdced8ffdb3e0f20052ad9fdbe9 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 27 Oct 2017 04:42:37 +0200 Subject: sched/isolation: Move isolcpus= handling to the housekeeping code We want to centralize the isolation features, to be done by the housekeeping subsystem and scheduler domain isolation is a significant part of it. No intended behaviour change, we just reuse the housekeeping cpumask and core code. Signed-off-by: Frederic Weisbecker Acked-by: Thomas Gleixner Cc: Chris Metcalf Cc: Christoph Lameter Cc: Linus Torvalds Cc: Luiz Capitulino Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1509072159-31808-11-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 -- include/linux/sched/isolation.h | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 0f897dfc195e..1b0cc0d6df8d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -165,8 +165,6 @@ struct task_group; /* Task command name length: */ #define TASK_COMM_LEN 16 -extern cpumask_var_t cpu_isolated_map; - extern void scheduler_tick(void); #define MAX_SCHEDULE_TIMEOUT LONG_MAX diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index e53cfa96e91e..d849431c8060 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -11,6 +11,7 @@ enum hk_flags { HK_FLAG_MISC = (1 << 2), HK_FLAG_SCHED = (1 << 3), HK_FLAG_TICK = (1 << 4), + HK_FLAG_DOMAIN = (1 << 5), }; #ifdef CONFIG_CPU_ISOLATION -- cgit From a9903f04e0a4ea522d959c2f287cdf0ab029e324 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Mon, 30 Oct 2017 11:08:16 -0700 Subject: sched/sysctl: Fix attributes of some extern declarations The definition of sysctl_sched_migration_cost, sysctl_sched_nr_migrate and sysctl_sched_time_avg includes the attribute const_debug. This attribute is not part of the extern declaration of these variables in include/linux/sched/sysctl.h, while it is in kernel/sched/sched.h, and as a result Clang generates warnings like this: kernel/sched/sched.h:1618:33: warning: section attribute is specified on redeclared variable [-Wsection] extern const_debug unsigned int sysctl_sched_time_avg; ^ ./include/linux/sched/sysctl.h:42:21: note: previous declaration is here extern unsigned int sysctl_sched_time_avg; The header only declares the variables when CONFIG_SCHED_DEBUG is defined, therefore it is not necessary to duplicate the definition of const_debug. Instead we can use the attribute __read_mostly, which is the expansion of const_debug when CONFIG_SCHED_DEBUG=y is set. Signed-off-by: Matthias Kaehlcke Reviewed-by: Nick Desaulniers Cc: Douglas Anderson Cc: Guenter Roeck Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Shile Zhang Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20171030180816.170850-1-mka@chromium.org Signed-off-by: Ingo Molnar --- include/linux/sched/sysctl.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 0f5ecd4d298e..d34c823f3d36 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -37,9 +37,9 @@ extern unsigned int sysctl_numa_balancing_scan_period_max; extern unsigned int sysctl_numa_balancing_scan_size; #ifdef CONFIG_SCHED_DEBUG -extern unsigned int sysctl_sched_migration_cost; -extern unsigned int sysctl_sched_nr_migrate; -extern unsigned int sysctl_sched_time_avg; +extern __read_mostly unsigned int sysctl_sched_migration_cost; +extern __read_mostly unsigned int sysctl_sched_nr_migrate; +extern __read_mostly unsigned int sysctl_sched_time_avg; int sched_proc_update_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, -- cgit