From 183f47fcaa54a5ffe671d990186d330ac8c63b10 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 18 Feb 2021 18:31:24 +0100 Subject: kcov: Remove kcov include from sched.h and move it to its users. The recent addition of in_serving_softirq() to kconv.h results in compile failure on PREEMPT_RT because it requires task_struct::softirq_disable_cnt. This is not available if kconv.h is included from sched.h. It is not needed to include kconv.h from sched.h. All but the net/ user already include the kconv header file. Move the include of the kconv.h header from sched.h it its users. Additionally include sched.h from kconv.h to ensure that everything task_struct related is available. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Acked-by: Johannes Berg Acked-by: Andrey Konovalov Link: https://lkml.kernel.org/r/20210218173124.iy5iyqv3a4oia4vv@linutronix.de --- include/linux/kcov.h | 1 + include/linux/sched.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kcov.h b/include/linux/kcov.h index 4e3037dc1204..55dc338f6bcd 100644 --- a/include/linux/kcov.h +++ b/include/linux/kcov.h @@ -2,6 +2,7 @@ #ifndef _LINUX_KCOV_H #define _LINUX_KCOV_H +#include #include struct task_struct; diff --git a/include/linux/sched.h b/include/linux/sched.h index ef00bb22164c..cf245bc237e7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include -- cgit From e7fcd762282332f765af2035a9568fb126fa3c01 Mon Sep 17 00:00:00 2001 From: Chengming Zhou Date: Wed, 3 Mar 2021 11:46:56 +0800 Subject: psi: Add PSI_CPU_FULL state The FULL state doesn't exist for the CPU resource at the system level, but exist at the cgroup level, means all non-idle tasks in a cgroup are delayed on the CPU resource which used by others outside of the cgroup or throttled by the cgroup cpu.max configuration. Co-developed-by: Muchun Song Signed-off-by: Muchun Song Signed-off-by: Chengming Zhou Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Acked-by: Johannes Weiner Link: https://lkml.kernel.org/r/20210303034659.91735-2-zhouchengming@bytedance.com --- include/linux/psi_types.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h index b95f3211566a..0a23300d49af 100644 --- a/include/linux/psi_types.h +++ b/include/linux/psi_types.h @@ -50,9 +50,10 @@ enum psi_states { PSI_MEM_SOME, PSI_MEM_FULL, PSI_CPU_SOME, + PSI_CPU_FULL, /* Only per-CPU, to weigh the CPU in the global average: */ PSI_NONIDLE, - NR_PSI_STATES = 6, + NR_PSI_STATES = 7, }; enum psi_aggregators { -- cgit From 7fae6c8171d20ac55402930ee8ae760cf85dff7b Mon Sep 17 00:00:00 2001 From: Chengming Zhou Date: Wed, 3 Mar 2021 11:46:57 +0800 Subject: psi: Use ONCPU state tracking machinery to detect reclaim Move the reclaim detection from the timer tick to the task state tracking machinery using the recently added ONCPU state. And we also add task psi_flags changes checking in the psi_task_switch() optimization to update the parents properly. In terms of performance and cost, this ONCPU task state tracking is not cheaper than previous timer tick in aggregate. But the code is simpler and shorter this way, so it's a maintainability win. And Johannes did some testing with perf bench, the performace and cost changes would be acceptable for real workloads. Thanks to Johannes Weiner for pointing out the psi_task_switch() optimization things and the clearer changelog. Co-developed-by: Muchun Song Signed-off-by: Muchun Song Signed-off-by: Chengming Zhou Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Acked-by: Johannes Weiner Link: https://lkml.kernel.org/r/20210303034659.91735-3-zhouchengming@bytedance.com --- include/linux/psi.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/psi.h b/include/linux/psi.h index 7361023f3fdd..65eb1476ac70 100644 --- a/include/linux/psi.h +++ b/include/linux/psi.h @@ -20,7 +20,6 @@ void psi_task_change(struct task_struct *task, int clear, int set); void psi_task_switch(struct task_struct *prev, struct task_struct *next, bool sleep); -void psi_memstall_tick(struct task_struct *task, int cpu); void psi_memstall_enter(unsigned long *flags); void psi_memstall_leave(unsigned long *flags); -- cgit From 90f093fa8ea48e5d991332cee160b761423d55c1 Mon Sep 17 00:00:00 2001 From: Piotr Figiel Date: Fri, 26 Feb 2021 14:51:56 +0100 Subject: rseq, ptrace: Add PTRACE_GET_RSEQ_CONFIGURATION request For userspace checkpoint and restore (C/R) a way of getting process state containing RSEQ configuration is needed. There are two ways this information is going to be used: - to re-enable RSEQ for threads which had it enabled before C/R - to detect if a thread was in a critical section during C/R Since C/R preserves TLS memory and addresses RSEQ ABI will be restored using the address registered before C/R. Detection whether the thread is in a critical section during C/R is needed to enforce behavior of RSEQ abort during C/R. Attaching with ptrace() before registers are dumped itself doesn't cause RSEQ abort. Restoring the instruction pointer within the critical section is problematic because rseq_cs may get cleared before the control is passed to the migrated application code leading to RSEQ invariants not being preserved. C/R code will use RSEQ ABI address to find the abort handler to which the instruction pointer needs to be set. To achieve above goals expose the RSEQ ABI address and the signature value with the new ptrace request PTRACE_GET_RSEQ_CONFIGURATION. This new ptrace request can also be used by debuggers so they are aware of stops within restartable sequences in progress. Signed-off-by: Piotr Figiel Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Reviewed-by: Michal Miroslaw Reviewed-by: Mathieu Desnoyers Acked-by: Oleg Nesterov Link: https://lkml.kernel.org/r/20210226135156.1081606-1-figiel@google.com --- include/uapi/linux/ptrace.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h index 83ee45fa634b..3747bf816f9a 100644 --- a/include/uapi/linux/ptrace.h +++ b/include/uapi/linux/ptrace.h @@ -102,6 +102,16 @@ struct ptrace_syscall_info { }; }; +#define PTRACE_GET_RSEQ_CONFIGURATION 0x420f + +struct ptrace_rseq_configuration { + __u64 rseq_abi_pointer; + __u32 rseq_abi_size; + __u32 signature; + __u32 flags; + __u32 pad; +}; + /* * These values are stored in task->ptrace_message * by tracehook_report_syscall_* to describe the current syscall-stop. -- cgit From 3b03706fa621ce31a3e9ef6307020fde4e6aae16 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 18 Mar 2021 13:38:50 +0100 Subject: sched: Fix various typos Fix ~42 single-word typos in scheduler code comments. We have accumulated a few fun ones over the years. :-) Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Juri Lelli Cc: Vincent Guittot Cc: Dietmar Eggemann Cc: Steven Rostedt Cc: Ben Segall Cc: Mel Gorman Cc: linux-kernel@vger.kernel.org --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index cf245bc237e7..05572e2140ad 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1097,7 +1097,7 @@ struct task_struct { #ifdef CONFIG_CPUSETS /* Protected by ->alloc_lock: */ nodemask_t mems_allowed; - /* Seqence number to catch updates: */ + /* Sequence number to catch updates: */ seqcount_spinlock_t mems_allowed_seq; int cpuset_mem_spread_rotor; int cpuset_slab_spread_rotor; -- cgit From 4bad58ebc8bc4f20d89cff95417c9b4674769709 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 23 Mar 2021 22:05:39 +0100 Subject: signal: Allow tasks to cache one sigqueue struct The idea for this originates from the real time tree to make signal delivery for realtime applications more efficient. In quite some of these application scenarios a control tasks signals workers to start their computations. There is usually only one signal per worker on flight. This works nicely as long as the kmem cache allocations do not hit the slow path and cause latencies. To cure this an optimistic caching was introduced (limited to RT tasks) which allows a task to cache a single sigqueue in a pointer in task_struct instead of handing it back to the kmem cache after consuming a signal. When the next signal is sent to the task then the cached sigqueue is used instead of allocating a new one. This solved the problem for this set of application scenarios nicely. The task cache is not preallocated so the first signal sent to a task goes always to the cache allocator. The cached sigqueue stays around until the task exits and is freed when task::sighand is dropped. After posting this solution for mainline the discussion came up whether this would be useful in general and should not be limited to realtime tasks: https://lore.kernel.org/r/m11rcu7nbr.fsf@fess.ebiederm.org One concern leading to the original limitation was to avoid a large amount of pointlessly cached sigqueues in alive tasks. The other concern was vs. RLIMIT_SIGPENDING as these cached sigqueues are not accounted for. The accounting problem is real, but on the other hand slightly academic. After gathering some statistics it turned out that after boot of a regular distro install there are less than 10 sigqueues cached in ~1500 tasks. In case of a 'mass fork and fire signal to child' scenario the extra 80 bytes of memory per task are well in the noise of the overall memory consumption of the fork bomb. If this should be limited then this would need an extra counter in struct user, more atomic instructions and a seperate rlimit. Yet another tunable which is mostly unused. The caching is actually used. After boot and a full kernel compile on a 64CPU machine with make -j128 the number of 'allocations' looks like this: From slab: 23996 From task cache: 52223 I.e. it reduces the number of slab cache operations by ~68%. A typical pattern there is: <...>-58490 __sigqueue_alloc: for 58488 from slab ffff8881132df460 <...>-58488 __sigqueue_free: cache ffff8881132df460 <...>-58488 __sigqueue_alloc: for 1149 from cache ffff8881103dc550 bash-1149 exit_task_sighand: free ffff8881132df460 bash-1149 __sigqueue_free: cache ffff8881103dc550 The interesting sequence is that the exiting task 58488 grabs the sigqueue from bash's task cache to signal exit and bash sticks it back into it's own cache. Lather, rinse and repeat. The caching is probably not noticable for the general use case, but the benefit for latency sensitive applications is clear. While kmem caches are usually just serving from the fast path the slab merging (default) can depending on the usage pattern of the merged slabs cause occasional slow path allocations. The time spared per cached entry is a few micro seconds per signal which is not relevant for e.g. a kernel build, but for signal heavy workloads it's measurable. As there is no real downside of this caching mechanism making it unconditionally available is preferred over more conditional code or new magic tunables. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Oleg Nesterov Link: https://lkml.kernel.org/r/87sg4lbmxo.fsf@nanos.tec.linutronix.de --- include/linux/sched.h | 1 + include/linux/signal.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 05572e2140ad..f5ca798acb3a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -984,6 +984,7 @@ struct task_struct { /* Signal handlers: */ struct signal_struct *signal; struct sighand_struct __rcu *sighand; + struct sigqueue *sigqueue_cache; sigset_t blocked; sigset_t real_blocked; /* Restored if set_restore_sigmask() was used: */ diff --git a/include/linux/signal.h b/include/linux/signal.h index 205526c4003a..c3cbea266136 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -265,6 +265,7 @@ static inline void init_sigpending(struct sigpending *sig) } extern void flush_sigqueue(struct sigpending *queue); +extern void exit_task_sigqueue_cache(struct task_struct *tsk); /* Test if 'sig' is valid signal. Use this instead of testing _NSIG directly */ static inline int valid_signal(unsigned long sig) -- cgit From b02a4fd8148f655095d9e3d6eddd8f0042bcc27c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 25 Jan 2021 16:46:49 +0100 Subject: cpumask: Make cpu_{online,possible,present,active}() inline Prepare for addition of another mask. Primarily a code movement to avoid having to create more #ifdef, but while there, convert everything with an argument to an inline function. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Link: https://lkml.kernel.org/r/20210310150109.045447765@infradead.org --- include/linux/cpumask.h | 97 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 66 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 383684e30f12..a58433668bb2 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -98,37 +98,6 @@ extern struct cpumask __cpu_active_mask; extern atomic_t __num_online_cpus; -#if NR_CPUS > 1 -/** - * num_online_cpus() - Read the number of online CPUs - * - * Despite the fact that __num_online_cpus is of type atomic_t, this - * interface gives only a momentary snapshot and is not protected against - * concurrent CPU hotplug operations unless invoked from a cpuhp_lock held - * region. - */ -static inline unsigned int num_online_cpus(void) -{ - return atomic_read(&__num_online_cpus); -} -#define num_possible_cpus() cpumask_weight(cpu_possible_mask) -#define num_present_cpus() cpumask_weight(cpu_present_mask) -#define num_active_cpus() cpumask_weight(cpu_active_mask) -#define cpu_online(cpu) cpumask_test_cpu((cpu), cpu_online_mask) -#define cpu_possible(cpu) cpumask_test_cpu((cpu), cpu_possible_mask) -#define cpu_present(cpu) cpumask_test_cpu((cpu), cpu_present_mask) -#define cpu_active(cpu) cpumask_test_cpu((cpu), cpu_active_mask) -#else -#define num_online_cpus() 1U -#define num_possible_cpus() 1U -#define num_present_cpus() 1U -#define num_active_cpus() 1U -#define cpu_online(cpu) ((cpu) == 0) -#define cpu_possible(cpu) ((cpu) == 0) -#define cpu_present(cpu) ((cpu) == 0) -#define cpu_active(cpu) ((cpu) == 0) -#endif - extern cpumask_t cpus_booted_once_mask; static inline void cpu_max_bits_warn(unsigned int cpu, unsigned int bits) @@ -894,6 +863,72 @@ static inline const struct cpumask *get_cpu_mask(unsigned int cpu) return to_cpumask(p); } +#if NR_CPUS > 1 +/** + * num_online_cpus() - Read the number of online CPUs + * + * Despite the fact that __num_online_cpus is of type atomic_t, this + * interface gives only a momentary snapshot and is not protected against + * concurrent CPU hotplug operations unless invoked from a cpuhp_lock held + * region. + */ +static inline unsigned int num_online_cpus(void) +{ + return atomic_read(&__num_online_cpus); +} +#define num_possible_cpus() cpumask_weight(cpu_possible_mask) +#define num_present_cpus() cpumask_weight(cpu_present_mask) +#define num_active_cpus() cpumask_weight(cpu_active_mask) + +static inline bool cpu_online(unsigned int cpu) +{ + return cpumask_test_cpu(cpu, cpu_online_mask); +} + +static inline bool cpu_possible(unsigned int cpu) +{ + return cpumask_test_cpu(cpu, cpu_possible_mask); +} + +static inline bool cpu_present(unsigned int cpu) +{ + return cpumask_test_cpu(cpu, cpu_present_mask); +} + +static inline bool cpu_active(unsigned int cpu) +{ + return cpumask_test_cpu(cpu, cpu_active_mask); +} + +#else + +#define num_online_cpus() 1U +#define num_possible_cpus() 1U +#define num_present_cpus() 1U +#define num_active_cpus() 1U + +static inline bool cpu_online(unsigned int cpu) +{ + return cpu == 0; +} + +static inline bool cpu_possible(unsigned int cpu) +{ + return cpu == 0; +} + +static inline bool cpu_present(unsigned int cpu) +{ + return cpu == 0; +} + +static inline bool cpu_active(unsigned int cpu) +{ + return cpu == 0; +} + +#endif /* NR_CPUS > 1 */ + #define cpu_is_offline(cpu) unlikely(!cpu_online(cpu)) #if NR_CPUS <= BITS_PER_LONG -- cgit From e40f74c535b8a0ecf3ef0388b51a34cdadb34fb5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 19 Jan 2021 18:43:45 +0100 Subject: cpumask: Introduce DYING mask Introduce a cpumask that indicates (for each CPU) what direction the CPU hotplug is currently going. Notably, it tracks rollbacks. Eg. when an up fails and we do a roll-back down, it will accurately reflect the direction. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Link: https://lkml.kernel.org/r/20210310150109.151441252@infradead.org --- include/linux/cpumask.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index a58433668bb2..e6b948a6000d 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -91,10 +91,12 @@ extern struct cpumask __cpu_possible_mask; extern struct cpumask __cpu_online_mask; extern struct cpumask __cpu_present_mask; extern struct cpumask __cpu_active_mask; +extern struct cpumask __cpu_dying_mask; #define cpu_possible_mask ((const struct cpumask *)&__cpu_possible_mask) #define cpu_online_mask ((const struct cpumask *)&__cpu_online_mask) #define cpu_present_mask ((const struct cpumask *)&__cpu_present_mask) #define cpu_active_mask ((const struct cpumask *)&__cpu_active_mask) +#define cpu_dying_mask ((const struct cpumask *)&__cpu_dying_mask) extern atomic_t __num_online_cpus; @@ -826,6 +828,14 @@ set_cpu_active(unsigned int cpu, bool active) cpumask_clear_cpu(cpu, &__cpu_active_mask); } +static inline void +set_cpu_dying(unsigned int cpu, bool dying) +{ + if (dying) + cpumask_set_cpu(cpu, &__cpu_dying_mask); + else + cpumask_clear_cpu(cpu, &__cpu_dying_mask); +} /** * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask * @@ -900,6 +910,11 @@ static inline bool cpu_active(unsigned int cpu) return cpumask_test_cpu(cpu, cpu_active_mask); } +static inline bool cpu_dying(unsigned int cpu) +{ + return cpumask_test_cpu(cpu, cpu_dying_mask); +} + #else #define num_online_cpus() 1U @@ -927,6 +942,11 @@ static inline bool cpu_active(unsigned int cpu) return cpu == 0; } +static inline bool cpu_dying(unsigned int cpu) +{ + return false; +} + #endif /* NR_CPUS > 1 */ #define cpu_is_offline(cpu) unlikely(!cpu_online(cpu)) -- cgit From 8a99b6833c884fa0e7919030d93fecedc69fc625 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 24 Mar 2021 11:43:21 +0100 Subject: sched: Move SCHED_DEBUG sysctl to debugfs Stop polluting sysctl with undocumented knobs that really are debug only, move them all to /debug/sched/ along with the existing /debug/sched_* files that already exist. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Greg Kroah-Hartman Tested-by: Valentin Schneider Link: https://lkml.kernel.org/r/20210412102001.287610138@infradead.org --- include/linux/sched/sysctl.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 3c31ba88aca5..0a3f34638cf5 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -26,10 +26,11 @@ int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, enum { sysctl_hung_task_timeout_secs = 0 }; #endif +extern unsigned int sysctl_sched_child_runs_first; + extern unsigned int sysctl_sched_latency; extern unsigned int sysctl_sched_min_granularity; extern unsigned int sysctl_sched_wakeup_granularity; -extern unsigned int sysctl_sched_child_runs_first; enum sched_tunable_scaling { SCHED_TUNABLESCALING_NONE, @@ -37,7 +38,7 @@ enum sched_tunable_scaling { SCHED_TUNABLESCALING_LINEAR, SCHED_TUNABLESCALING_END, }; -extern enum sched_tunable_scaling sysctl_sched_tunable_scaling; +extern unsigned int sysctl_sched_tunable_scaling; extern unsigned int sysctl_numa_balancing_scan_delay; extern unsigned int sysctl_numa_balancing_scan_period_min; @@ -47,9 +48,6 @@ extern unsigned int sysctl_numa_balancing_scan_size; #ifdef CONFIG_SCHED_DEBUG extern __read_mostly unsigned int sysctl_sched_migration_cost; extern __read_mostly unsigned int sysctl_sched_nr_migrate; - -int sched_proc_update_handler(struct ctl_table *table, int write, - void *buffer, size_t *length, loff_t *ppos); #endif /* -- cgit From 9af0440ec86ebdab075e1b3d231f81fe7decb575 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 25 Mar 2021 10:53:55 +0100 Subject: debugfs: Implement debugfs_create_str() Implement debugfs_create_str() to easily display names and such in debugfs. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Greg Kroah-Hartman Tested-by: Valentin Schneider Link: https://lkml.kernel.org/r/20210412102001.415407080@infradead.org --- include/linux/debugfs.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index d6c4cc9ecc77..1fdb4343af9c 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -128,6 +128,8 @@ void debugfs_create_atomic_t(const char *name, umode_t mode, struct dentry *parent, atomic_t *value); struct dentry *debugfs_create_bool(const char *name, umode_t mode, struct dentry *parent, bool *value); +void debugfs_create_str(const char *name, umode_t mode, + struct dentry *parent, char **value); struct dentry *debugfs_create_blob(const char *name, umode_t mode, struct dentry *parent, @@ -156,6 +158,9 @@ ssize_t debugfs_read_file_bool(struct file *file, char __user *user_buf, ssize_t debugfs_write_file_bool(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos); +ssize_t debugfs_read_file_str(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos); + #else #include @@ -297,6 +302,11 @@ static inline struct dentry *debugfs_create_bool(const char *name, umode_t mode, return ERR_PTR(-ENODEV); } +static inline void debugfs_create_str(const char *name, umode_t mode, + struct dentry *parent, + char **value) +{ } + static inline struct dentry *debugfs_create_blob(const char *name, umode_t mode, struct dentry *parent, struct debugfs_blob_wrapper *blob) @@ -348,6 +358,13 @@ static inline ssize_t debugfs_write_file_bool(struct file *file, return -ENODEV; } +static inline ssize_t debugfs_read_file_str(struct file *file, + char __user *user_buf, + size_t count, loff_t *ppos) +{ + return -ENODEV; +} + #endif /** -- cgit From c006fac556e401a62054d065da168099ea5a5b10 Mon Sep 17 00:00:00 2001 From: Paul Turner Date: Fri, 16 Apr 2021 14:29:36 -0700 Subject: sched: Warn on long periods of pending need_resched CPU scheduler marks need_resched flag to signal a schedule() on a particular CPU. But, schedule() may not happen immediately in cases where the current task is executing in the kernel mode (no preemption state) for extended periods of time. This patch adds a warn_on if need_resched is pending for more than the time specified in sysctl resched_latency_warn_ms. If it goes off, it is likely that there is a missing cond_resched() somewhere. Monitoring is done via the tick and the accuracy is hence limited to jiffy scale. This also means that we won't trigger the warning if the tick is disabled. This feature (LATENCY_WARN) is default disabled. Signed-off-by: Paul Turner Signed-off-by: Josh Don Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20210416212936.390566-1-joshdon@google.com --- include/linux/sched/sysctl.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 0a3f34638cf5..db2c0f34aaaf 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -48,6 +48,9 @@ extern unsigned int sysctl_numa_balancing_scan_size; #ifdef CONFIG_SCHED_DEBUG extern __read_mostly unsigned int sysctl_sched_migration_cost; extern __read_mostly unsigned int sysctl_sched_nr_migrate; + +extern int sysctl_resched_latency_warn_ms; +extern int sysctl_resched_latency_warn_once; #endif /* -- cgit