summaryrefslogtreecommitdiff
path: root/kernel/sched/rt.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-07-29 17:42:52 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-07-29 17:42:52 -0700
commitbf76f23aa1c178e9115eba17f699fa726aed669b (patch)
treea88270238eae8a0e6d98225c7de4b06bc2d2fb37 /kernel/sched/rt.c
parent14bed9bc81bae64db98349319f367bfc7dab0afd (diff)
parent1b5f1454091e9e9fb5c944b3161acf4ec0894d0d (diff)
Merge tag 'sched-core-2025-07-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar: "Core scheduler changes: - Better tracking of maximum lag of tasks in presence of different slices duration, for better handling of lag in the fair scheduler (Vincent Guittot) - Clean up and standardize #if/#else/#endif markers throughout the entire scheduler code base (Ingo Molnar) - Make SMP unconditional: build the SMP scheduler's data structures and logic on UP kernel too, even though they are not used, to simplify the scheduler and remove around 200 #ifdef/[#else]/#endif blocks from the scheduler (Ingo Molnar) - Reorganize cgroup bandwidth control interface handling for better interfacing with sched_ext (Tejun Heo) Balancing: - Bump sd->max_newidle_lb_cost when newidle balance fails (Chris Mason) - Remove sched_domain_topology_level::flags to simplify the code (Prateek Nayak) - Simplify and clean up build_sched_topology() (Li Chen) - Optimize build_sched_topology() on large machines (Li Chen) Real-time scheduling: - Add initial version of proxy execution: a mechanism for mutex-owning tasks to inherit the scheduling context of higher priority waiters. Currently limited to a single runqueue and conditional on CONFIG_EXPERT, and other limitations (John Stultz, Peter Zijlstra, Valentin Schneider) - Deadline scheduler (Juri Lelli): - Fix dl_servers initialization order (Juri Lelli) - Fix DL scheduler's root domain reinitialization logic (Juri Lelli) - Fix accounting bugs after global limits change (Juri Lelli) - Fix scalability regression by implementing less agressive dl_server handling (Peter Zijlstra) PSI: - Improve scalability by optimizing psi_group_change() cpu_clock() usage (Peter Zijlstra) Rust changes: - Make Task, CondVar and PollCondVar methods inline to avoid unnecessary function calls (Kunwu Chan, Panagiotis Foliadis) - Add might_sleep() support for Rust code: Rust's "#[track_caller]" mechanism is used so that Rust's might_sleep() doesn't need to be defined as a macro (Fujita Tomonori) - Introduce file_from_location() (Boqun Feng) Debugging & instrumentation: - Make clangd usable with scheduler source code files again (Peter Zijlstra) - tools: Add root_domains_dump.py which dumps root domains info (Juri Lelli) - tools: Add dl_bw_dump.py for printing bandwidth accounting info (Juri Lelli) Misc cleanups & fixes: - Remove play_idle() (Feng Lee) - Fix check_preemption_disabled() (Sebastian Andrzej Siewior) - Do not call __put_task_struct() on RT if pi_blocked_on is set (Luis Claudio R. Goncalves) - Correct the comment in place_entity() (wang wei)" * tag 'sched-core-2025-07-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (84 commits) sched/idle: Remove play_idle() sched: Do not call __put_task_struct() on rt if pi_blocked_on is set sched: Start blocked_on chain processing in find_proxy_task() sched: Fix proxy/current (push,pull)ability sched: Add an initial sketch of the find_proxy_task() function sched: Fix runtime accounting w/ split exec & sched contexts sched: Move update_curr_task logic into update_curr_se locking/mutex: Add p->blocked_on wrappers for correctness checks locking/mutex: Rework task_struct::blocked_on sched: Add CONFIG_SCHED_PROXY_EXEC & boot argument to enable/disable sched/topology: Remove sched_domain_topology_level::flags x86/smpboot: avoid SMT domain attach/destroy if SMT is not enabled x86/smpboot: moves x86_topology to static initialize and truncate x86/smpboot: remove redundant CONFIG_SCHED_SMT smpboot: introduce SDTL_INIT() helper to tidy sched topology setup tools/sched: Add dl_bw_dump.py for printing bandwidth accounting info tools/sched: Add root_domains_dump.py which dumps root domains info sched/deadline: Fix accounting after global limits change sched/deadline: Reset extra_bw to max_bw when clearing root domains sched/deadline: Initialize dl_servers after SMP ...
Diffstat (limited to 'kernel/sched/rt.c')
-rw-r--r--kernel/sched/rt.c112
1 files changed, 29 insertions, 83 deletions
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index e40422c37033..7936d4333731 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -4,6 +4,9 @@
* policies)
*/
+#include "sched.h"
+#include "pelt.h"
+
int sched_rr_timeslice = RR_TIMESLICE;
/* More than 4 hours if BW_SHIFT equals 20. */
static const u64 max_rt_runtime = MAX_BW;
@@ -60,7 +63,7 @@ static int __init sched_rt_sysctl_init(void)
return 0;
}
late_initcall(sched_rt_sysctl_init);
-#endif
+#endif /* CONFIG_SYSCTL */
void init_rt_rq(struct rt_rq *rt_rq)
{
@@ -75,12 +78,10 @@ void init_rt_rq(struct rt_rq *rt_rq)
/* delimiter for bitsearch: */
__set_bit(MAX_RT_PRIO, array->bitmap);
-#if defined CONFIG_SMP
rt_rq->highest_prio.curr = MAX_RT_PRIO-1;
rt_rq->highest_prio.next = MAX_RT_PRIO-1;
rt_rq->overloaded = 0;
plist_head_init(&rt_rq->pushable_tasks);
-#endif /* CONFIG_SMP */
/* We start is dequeued state, because no RT tasks are queued */
rt_rq->rt_queued = 0;
@@ -291,7 +292,7 @@ err:
return 0;
}
-#else /* CONFIG_RT_GROUP_SCHED */
+#else /* !CONFIG_RT_GROUP_SCHED: */
#define rt_entity_is_task(rt_se) (1)
@@ -327,9 +328,7 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
{
return 1;
}
-#endif /* CONFIG_RT_GROUP_SCHED */
-
-#ifdef CONFIG_SMP
+#endif /* !CONFIG_RT_GROUP_SCHED */
static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
{
@@ -430,21 +429,6 @@ static void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
}
}
-#else
-
-static inline void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
-{
-}
-
-static inline void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
-{
-}
-
-static inline void rt_queue_push_tasks(struct rq *rq)
-{
-}
-#endif /* CONFIG_SMP */
-
static void enqueue_top_rt_rq(struct rt_rq *rt_rq);
static void dequeue_top_rt_rq(struct rt_rq *rt_rq, unsigned int count);
@@ -485,12 +469,12 @@ static inline bool rt_task_fits_capacity(struct task_struct *p, int cpu)
return cpu_cap >= min(min_cap, max_cap);
}
-#else
+#else /* !CONFIG_UCLAMP_TASK: */
static inline bool rt_task_fits_capacity(struct task_struct *p, int cpu)
{
return true;
}
-#endif
+#endif /* !CONFIG_UCLAMP_TASK */
#ifdef CONFIG_RT_GROUP_SCHED
@@ -594,17 +578,10 @@ static int rt_se_boosted(struct sched_rt_entity *rt_se)
return p->prio != p->normal_prio;
}
-#ifdef CONFIG_SMP
static inline const struct cpumask *sched_rt_period_mask(void)
{
return this_rq()->rd->span;
}
-#else
-static inline const struct cpumask *sched_rt_period_mask(void)
-{
- return cpu_online_mask;
-}
-#endif
static inline
struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
@@ -625,7 +602,6 @@ bool sched_rt_bandwidth_account(struct rt_rq *rt_rq)
rt_rq->rt_time < rt_b->rt_runtime);
}
-#ifdef CONFIG_SMP
/*
* We ran out of runtime, see if we can borrow some from our neighbours.
*/
@@ -798,9 +774,6 @@ static void balance_runtime(struct rt_rq *rt_rq)
raw_spin_lock(&rt_rq->rt_runtime_lock);
}
}
-#else /* !CONFIG_SMP */
-static inline void balance_runtime(struct rt_rq *rt_rq) {}
-#endif /* CONFIG_SMP */
static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
{
@@ -930,7 +903,7 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
return 0;
}
-#else /* !CONFIG_RT_GROUP_SCHED */
+#else /* !CONFIG_RT_GROUP_SCHED: */
typedef struct rt_rq *rt_rq_iter_t;
@@ -977,12 +950,10 @@ struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
return &cpu_rq(cpu)->rt;
}
-#ifdef CONFIG_SMP
static void __enable_runtime(struct rq *rq) { }
static void __disable_runtime(struct rq *rq) { }
-#endif
-#endif /* CONFIG_RT_GROUP_SCHED */
+#endif /* !CONFIG_RT_GROUP_SCHED */
static inline int rt_se_prio(struct sched_rt_entity *rt_se)
{
@@ -1033,7 +1004,7 @@ static void update_curr_rt(struct rq *rq)
do_start_rt_bandwidth(sched_rt_bandwidth(rt_rq));
}
}
-#endif
+#endif /* CONFIG_RT_GROUP_SCHED */
}
static void
@@ -1075,8 +1046,6 @@ enqueue_top_rt_rq(struct rt_rq *rt_rq)
cpufreq_update_util(rq, 0);
}
-#if defined CONFIG_SMP
-
static void
inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
{
@@ -1107,16 +1076,6 @@ dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr);
}
-#else /* CONFIG_SMP */
-
-static inline
-void inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
-static inline
-void dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
-
-#endif /* CONFIG_SMP */
-
-#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
static void
inc_rt_prio(struct rt_rq *rt_rq, int prio)
{
@@ -1155,13 +1114,6 @@ dec_rt_prio(struct rt_rq *rt_rq, int prio)
dec_rt_prio_smp(rt_rq, prio, prev_prio);
}
-#else
-
-static inline void inc_rt_prio(struct rt_rq *rt_rq, int prio) {}
-static inline void dec_rt_prio(struct rt_rq *rt_rq, int prio) {}
-
-#endif /* CONFIG_SMP || CONFIG_RT_GROUP_SCHED */
-
#ifdef CONFIG_RT_GROUP_SCHED
static void
@@ -1182,7 +1134,7 @@ dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
WARN_ON(!rt_rq->rt_nr_running && rt_rq->rt_nr_boosted);
}
-#else /* CONFIG_RT_GROUP_SCHED */
+#else /* !CONFIG_RT_GROUP_SCHED: */
static void
inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
@@ -1192,7 +1144,7 @@ inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
static inline
void dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) {}
-#endif /* CONFIG_RT_GROUP_SCHED */
+#endif /* !CONFIG_RT_GROUP_SCHED */
static inline
unsigned int rt_se_nr_running(struct sched_rt_entity *rt_se)
@@ -1488,6 +1440,9 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
enqueue_rt_entity(rt_se, flags);
+ if (task_is_blocked(p))
+ return;
+
if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
enqueue_pushable_task(rq, p);
}
@@ -1538,7 +1493,6 @@ static void yield_task_rt(struct rq *rq)
requeue_task_rt(rq, rq->curr, 0);
}
-#ifdef CONFIG_SMP
static int find_lowest_rq(struct task_struct *task);
static int
@@ -1653,7 +1607,6 @@ static int balance_rt(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
return sched_stop_runnable(rq) || sched_dl_runnable(rq) || sched_rt_runnable(rq);
}
-#endif /* CONFIG_SMP */
/*
* Preempt the current task with a newly woken task if needed:
@@ -1667,7 +1620,6 @@ static void wakeup_preempt_rt(struct rq *rq, struct task_struct *p, int flags)
return;
}
-#ifdef CONFIG_SMP
/*
* If:
*
@@ -1682,7 +1634,6 @@ static void wakeup_preempt_rt(struct rq *rq, struct task_struct *p, int flags)
*/
if (p->prio == donor->prio && !test_tsk_need_resched(rq->curr))
check_preempt_equal_prio(rq, p);
-#endif
}
static inline void set_next_task_rt(struct rq *rq, struct task_struct *p, bool first)
@@ -1768,6 +1719,8 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p, struct task_s
update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 1);
+ if (task_is_blocked(p))
+ return;
/*
* The previous task needs to be made eligible for pushing
* if it is still active
@@ -1776,8 +1729,6 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p, struct task_s
enqueue_pushable_task(rq, p);
}
-#ifdef CONFIG_SMP
-
/* Only try algorithms three times */
#define RT_MAX_TRIES 3
@@ -2451,7 +2402,6 @@ void __init init_sched_rt_class(void)
GFP_KERNEL, cpu_to_node(i));
}
}
-#endif /* CONFIG_SMP */
/*
* When switching a task to RT, we may overload the runqueue
@@ -2475,10 +2425,8 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p)
* then see if we can move to another run queue.
*/
if (task_on_rq_queued(p)) {
-#ifdef CONFIG_SMP
if (p->nr_cpus_allowed > 1 && rq->rt.overloaded)
rt_queue_push_tasks(rq);
-#endif /* CONFIG_SMP */
if (p->prio < rq->donor->prio && cpu_online(cpu_of(rq)))
resched_curr(rq);
}
@@ -2495,7 +2443,6 @@ prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
return;
if (task_current_donor(rq, p)) {
-#ifdef CONFIG_SMP
/*
* If our priority decreases while running, we
* may need to pull tasks to this runqueue.
@@ -2509,11 +2456,6 @@ prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
*/
if (p->prio > rq->rt.highest_prio.curr)
resched_curr(rq);
-#else
- /* For UP simply resched on drop of prio */
- if (oldprio < p->prio)
- resched_curr(rq);
-#endif /* CONFIG_SMP */
} else {
/*
* This task is not running, but if it is
@@ -2549,9 +2491,9 @@ static void watchdog(struct rq *rq, struct task_struct *p)
}
}
}
-#else
+#else /* !CONFIG_POSIX_TIMERS: */
static inline void watchdog(struct rq *rq, struct task_struct *p) { }
-#endif
+#endif /* !CONFIG_POSIX_TIMERS */
/*
* scheduler tick hitting a task of our scheduling class.
@@ -2620,7 +2562,7 @@ static int task_is_throttled_rt(struct task_struct *p, int cpu)
return rt_rq_throttled(rt_rq);
}
-#endif
+#endif /* CONFIG_SCHED_CORE */
DEFINE_SCHED_CLASS(rt) = {
@@ -2634,7 +2576,6 @@ DEFINE_SCHED_CLASS(rt) = {
.put_prev_task = put_prev_task_rt,
.set_next_task = set_next_task_rt,
-#ifdef CONFIG_SMP
.balance = balance_rt,
.select_task_rq = select_task_rq_rt,
.set_cpus_allowed = set_cpus_allowed_common,
@@ -2643,7 +2584,6 @@ DEFINE_SCHED_CLASS(rt) = {
.task_woken = task_woken_rt,
.switched_from = switched_from_rt,
.find_lock_rq = find_lock_lowest_rq,
-#endif
.task_tick = task_tick_rt,
@@ -2887,7 +2827,7 @@ int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk)
return 1;
}
-#else /* !CONFIG_RT_GROUP_SCHED */
+#else /* !CONFIG_RT_GROUP_SCHED: */
#ifdef CONFIG_SYSCTL
static int sched_rt_global_constraints(void)
@@ -2895,7 +2835,7 @@ static int sched_rt_global_constraints(void)
return 0;
}
#endif /* CONFIG_SYSCTL */
-#endif /* CONFIG_RT_GROUP_SCHED */
+#endif /* !CONFIG_RT_GROUP_SCHED */
#ifdef CONFIG_SYSCTL
static int sched_rt_global_validate(void)
@@ -2951,6 +2891,12 @@ undo:
sched_domains_mutex_unlock();
mutex_unlock(&mutex);
+ /*
+ * After changing maximum available bandwidth for DEADLINE, we need to
+ * recompute per root domain and per cpus variables accordingly.
+ */
+ rebuild_sched_domains();
+
return ret;
}