summaryrefslogtreecommitdiff
path: root/include/linux/sched.h
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-07-29 17:42:52 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-07-29 17:42:52 -0700
commitbf76f23aa1c178e9115eba17f699fa726aed669b (patch)
treea88270238eae8a0e6d98225c7de4b06bc2d2fb37 /include/linux/sched.h
parent14bed9bc81bae64db98349319f367bfc7dab0afd (diff)
parent1b5f1454091e9e9fb5c944b3161acf4ec0894d0d (diff)
Merge tag 'sched-core-2025-07-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar: "Core scheduler changes: - Better tracking of maximum lag of tasks in presence of different slices duration, for better handling of lag in the fair scheduler (Vincent Guittot) - Clean up and standardize #if/#else/#endif markers throughout the entire scheduler code base (Ingo Molnar) - Make SMP unconditional: build the SMP scheduler's data structures and logic on UP kernel too, even though they are not used, to simplify the scheduler and remove around 200 #ifdef/[#else]/#endif blocks from the scheduler (Ingo Molnar) - Reorganize cgroup bandwidth control interface handling for better interfacing with sched_ext (Tejun Heo) Balancing: - Bump sd->max_newidle_lb_cost when newidle balance fails (Chris Mason) - Remove sched_domain_topology_level::flags to simplify the code (Prateek Nayak) - Simplify and clean up build_sched_topology() (Li Chen) - Optimize build_sched_topology() on large machines (Li Chen) Real-time scheduling: - Add initial version of proxy execution: a mechanism for mutex-owning tasks to inherit the scheduling context of higher priority waiters. Currently limited to a single runqueue and conditional on CONFIG_EXPERT, and other limitations (John Stultz, Peter Zijlstra, Valentin Schneider) - Deadline scheduler (Juri Lelli): - Fix dl_servers initialization order (Juri Lelli) - Fix DL scheduler's root domain reinitialization logic (Juri Lelli) - Fix accounting bugs after global limits change (Juri Lelli) - Fix scalability regression by implementing less agressive dl_server handling (Peter Zijlstra) PSI: - Improve scalability by optimizing psi_group_change() cpu_clock() usage (Peter Zijlstra) Rust changes: - Make Task, CondVar and PollCondVar methods inline to avoid unnecessary function calls (Kunwu Chan, Panagiotis Foliadis) - Add might_sleep() support for Rust code: Rust's "#[track_caller]" mechanism is used so that Rust's might_sleep() doesn't need to be defined as a macro (Fujita Tomonori) - Introduce file_from_location() (Boqun Feng) Debugging & instrumentation: - Make clangd usable with scheduler source code files again (Peter Zijlstra) - tools: Add root_domains_dump.py which dumps root domains info (Juri Lelli) - tools: Add dl_bw_dump.py for printing bandwidth accounting info (Juri Lelli) Misc cleanups & fixes: - Remove play_idle() (Feng Lee) - Fix check_preemption_disabled() (Sebastian Andrzej Siewior) - Do not call __put_task_struct() on RT if pi_blocked_on is set (Luis Claudio R. Goncalves) - Correct the comment in place_entity() (wang wei)" * tag 'sched-core-2025-07-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (84 commits) sched/idle: Remove play_idle() sched: Do not call __put_task_struct() on rt if pi_blocked_on is set sched: Start blocked_on chain processing in find_proxy_task() sched: Fix proxy/current (push,pull)ability sched: Add an initial sketch of the find_proxy_task() function sched: Fix runtime accounting w/ split exec & sched contexts sched: Move update_curr_task logic into update_curr_se locking/mutex: Add p->blocked_on wrappers for correctness checks locking/mutex: Rework task_struct::blocked_on sched: Add CONFIG_SCHED_PROXY_EXEC & boot argument to enable/disable sched/topology: Remove sched_domain_topology_level::flags x86/smpboot: avoid SMT domain attach/destroy if SMT is not enabled x86/smpboot: moves x86_topology to static initialize and truncate x86/smpboot: remove redundant CONFIG_SCHED_SMT smpboot: introduce SDTL_INIT() helper to tidy sched topology setup tools/sched: Add dl_bw_dump.py for printing bandwidth accounting info tools/sched: Add root_domains_dump.py which dumps root domains info sched/deadline: Fix accounting after global limits change sched/deadline: Reset extra_bw to max_bw when clearing root domains sched/deadline: Initialize dl_servers after SMP ...
Diffstat (limited to 'include/linux/sched.h')
-rw-r--r--include/linux/sched.h148
1 files changed, 86 insertions, 62 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 63a9b6e23b07..40d2fa90df42 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -34,6 +34,7 @@
#include <linux/sched/prio.h>
#include <linux/sched/types.h>
#include <linux/signal_types.h>
+#include <linux/spinlock.h>
#include <linux/syscall_user_dispatch_types.h>
#include <linux/mm_types_task.h>
#include <linux/netdevice_xmit.h>
@@ -395,15 +396,10 @@ enum uclamp_id {
UCLAMP_CNT
};
-#ifdef CONFIG_SMP
extern struct root_domain def_root_domain;
extern struct mutex sched_domains_mutex;
extern void sched_domains_mutex_lock(void);
extern void sched_domains_mutex_unlock(void);
-#else
-static inline void sched_domains_mutex_lock(void) { }
-static inline void sched_domains_mutex_unlock(void) { }
-#endif
struct sched_param {
int sched_priority;
@@ -584,7 +580,15 @@ struct sched_entity {
u64 sum_exec_runtime;
u64 prev_sum_exec_runtime;
u64 vruntime;
- s64 vlag;
+ union {
+ /*
+ * When !@on_rq this field is vlag.
+ * When cfs_rq->curr == se (which implies @on_rq)
+ * this field is vprot. See protect_slice().
+ */
+ s64 vlag;
+ u64 vprot;
+ };
u64 slice;
u64 nr_migrations;
@@ -600,7 +604,6 @@ struct sched_entity {
unsigned long runnable_weight;
#endif
-#ifdef CONFIG_SMP
/*
* Per entity load average tracking.
*
@@ -608,7 +611,6 @@ struct sched_entity {
* collide with read-mostly values above.
*/
struct sched_avg avg;
-#endif
};
struct sched_rt_entity {
@@ -701,6 +703,7 @@ struct sched_dl_entity {
unsigned int dl_defer : 1;
unsigned int dl_defer_armed : 1;
unsigned int dl_defer_running : 1;
+ unsigned int dl_server_idle : 1;
/*
* Bandwidth enforcement timer. Each -deadline task has its
@@ -838,7 +841,6 @@ struct task_struct {
struct alloc_tag *alloc_tag;
#endif
-#ifdef CONFIG_SMP
int on_cpu;
struct __call_single_node wake_entry;
unsigned int wakee_flips;
@@ -854,7 +856,6 @@ struct task_struct {
*/
int recent_used_cpu;
int wake_cpu;
-#endif
int on_rq;
int prio;
@@ -913,9 +914,7 @@ struct task_struct {
cpumask_t *user_cpus_ptr;
cpumask_t cpus_mask;
void *migration_pending;
-#ifdef CONFIG_SMP
unsigned short migration_disabled;
-#endif
unsigned short migration_flags;
#ifdef CONFIG_PREEMPT_RCU
@@ -947,10 +946,8 @@ struct task_struct {
struct sched_info sched_info;
struct list_head tasks;
-#ifdef CONFIG_SMP
struct plist_node pushable_tasks;
struct rb_node pushable_dl_tasks;
-#endif
struct mm_struct *mm;
struct mm_struct *active_mm;
@@ -1234,10 +1231,7 @@ struct task_struct {
struct rt_mutex_waiter *pi_blocked_on;
#endif
-#ifdef CONFIG_DEBUG_MUTEXES
- /* Mutex deadlock detection: */
- struct mutex_waiter *blocked_on;
-#endif
+ struct mutex *blocked_on; /* lock we're blocked on */
#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
/*
@@ -1662,6 +1656,19 @@ struct task_struct {
randomized_struct_fields_end
} __attribute__ ((aligned (64)));
+#ifdef CONFIG_SCHED_PROXY_EXEC
+DECLARE_STATIC_KEY_TRUE(__sched_proxy_exec);
+static inline bool sched_proxy_exec(void)
+{
+ return static_branch_likely(&__sched_proxy_exec);
+}
+#else
+static inline bool sched_proxy_exec(void)
+{
+ return false;
+}
+#endif
+
#define TASK_REPORT_IDLE (TASK_REPORT + 1)
#define TASK_REPORT_MAX (TASK_REPORT_IDLE << 1)
@@ -1776,12 +1783,8 @@ extern struct pid *cad_pid;
static __always_inline bool is_percpu_thread(void)
{
-#ifdef CONFIG_SMP
return (current->flags & PF_NO_SETAFFINITY) &&
(current->nr_cpus_allowed == 1);
-#else
- return true;
-#endif
}
/* Per-process atomic flags. */
@@ -1846,7 +1849,6 @@ extern int cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpu
extern int task_can_attach(struct task_struct *p);
extern int dl_bw_alloc(int cpu, u64 dl_bw);
extern void dl_bw_free(int cpu, u64 dl_bw);
-#ifdef CONFIG_SMP
/* do_set_cpus_allowed() - consider using set_cpus_allowed_ptr() instead */
extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask);
@@ -1864,33 +1866,6 @@ extern void release_user_cpus_ptr(struct task_struct *p);
extern int dl_task_check_affinity(struct task_struct *p, const struct cpumask *mask);
extern void force_compatible_cpus_allowed_ptr(struct task_struct *p);
extern void relax_compatible_cpus_allowed_ptr(struct task_struct *p);
-#else
-static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
-{
-}
-static inline int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
-{
- /* Opencoded cpumask_test_cpu(0, new_mask) to avoid dependency on cpumask.h */
- if ((*cpumask_bits(new_mask) & 1) == 0)
- return -EINVAL;
- return 0;
-}
-static inline int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, int node)
-{
- if (src->user_cpus_ptr)
- return -EINVAL;
- return 0;
-}
-static inline void release_user_cpus_ptr(struct task_struct *p)
-{
- WARN_ON(p->user_cpus_ptr);
-}
-
-static inline int dl_task_check_affinity(struct task_struct *p, const struct cpumask *mask)
-{
- return 0;
-}
-#endif
extern int yield_to(struct task_struct *p, bool preempt);
extern void set_user_nice(struct task_struct *p, long nice);
@@ -1979,11 +1954,7 @@ extern int wake_up_state(struct task_struct *tsk, unsigned int state);
extern int wake_up_process(struct task_struct *tsk);
extern void wake_up_new_task(struct task_struct *tsk);
-#ifdef CONFIG_SMP
extern void kick_process(struct task_struct *tsk);
-#else
-static inline void kick_process(struct task_struct *tsk) { }
-#endif
extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec);
#define set_task_comm(tsk, from) ({ \
@@ -2010,7 +1981,6 @@ extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec
buf; \
})
-#ifdef CONFIG_SMP
static __always_inline void scheduler_ipi(void)
{
/*
@@ -2020,9 +1990,6 @@ static __always_inline void scheduler_ipi(void)
*/
preempt_fold_need_resched();
}
-#else
-static inline void scheduler_ipi(void) { }
-#endif
extern unsigned long wait_task_inactive(struct task_struct *, unsigned int match_state);
@@ -2165,6 +2132,67 @@ extern int __cond_resched_rwlock_write(rwlock_t *lock);
__cond_resched_rwlock_write(lock); \
})
+#ifndef CONFIG_PREEMPT_RT
+static inline struct mutex *__get_task_blocked_on(struct task_struct *p)
+{
+ struct mutex *m = p->blocked_on;
+
+ if (m)
+ lockdep_assert_held_once(&m->wait_lock);
+ return m;
+}
+
+static inline void __set_task_blocked_on(struct task_struct *p, struct mutex *m)
+{
+ WARN_ON_ONCE(!m);
+ /* The task should only be setting itself as blocked */
+ WARN_ON_ONCE(p != current);
+ /* Currently we serialize blocked_on under the mutex::wait_lock */
+ lockdep_assert_held_once(&m->wait_lock);
+ /*
+ * Check ensure we don't overwrite existing mutex value
+ * with a different mutex. Note, setting it to the same
+ * lock repeatedly is ok.
+ */
+ WARN_ON_ONCE(p->blocked_on && p->blocked_on != m);
+ p->blocked_on = m;
+}
+
+static inline void set_task_blocked_on(struct task_struct *p, struct mutex *m)
+{
+ guard(raw_spinlock_irqsave)(&m->wait_lock);
+ __set_task_blocked_on(p, m);
+}
+
+static inline void __clear_task_blocked_on(struct task_struct *p, struct mutex *m)
+{
+ WARN_ON_ONCE(!m);
+ /* Currently we serialize blocked_on under the mutex::wait_lock */
+ lockdep_assert_held_once(&m->wait_lock);
+ /*
+ * There may be cases where we re-clear already cleared
+ * blocked_on relationships, but make sure we are not
+ * clearing the relationship with a different lock.
+ */
+ WARN_ON_ONCE(m && p->blocked_on && p->blocked_on != m);
+ p->blocked_on = NULL;
+}
+
+static inline void clear_task_blocked_on(struct task_struct *p, struct mutex *m)
+{
+ guard(raw_spinlock_irqsave)(&m->wait_lock);
+ __clear_task_blocked_on(p, m);
+}
+#else
+static inline void __clear_task_blocked_on(struct task_struct *p, struct rt_mutex *m)
+{
+}
+
+static inline void clear_task_blocked_on(struct task_struct *p, struct rt_mutex *m)
+{
+}
+#endif /* !CONFIG_PREEMPT_RT */
+
static __always_inline bool need_resched(void)
{
return unlikely(tif_need_resched());
@@ -2204,8 +2232,6 @@ extern bool sched_task_on_rq(struct task_struct *p);
extern unsigned long get_wchan(struct task_struct *p);
extern struct task_struct *cpu_curr_snapshot(int cpu);
-#include <linux/spinlock.h>
-
/*
* In order to reduce various lock holder preemption latencies provide an
* interface to see if a vCPU is currently running or not.
@@ -2228,7 +2254,6 @@ extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
#define TASK_SIZE_OF(tsk) TASK_SIZE
#endif
-#ifdef CONFIG_SMP
static inline bool owner_on_cpu(struct task_struct *owner)
{
/*
@@ -2240,7 +2265,6 @@ static inline bool owner_on_cpu(struct task_struct *owner)
/* Returns effective CPU energy utilization, as seen by the scheduler */
unsigned long sched_cpu_util(int cpu);
-#endif /* CONFIG_SMP */
#ifdef CONFIG_SCHED_CORE
extern void sched_core_free(struct task_struct *tsk);