diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-07-29 17:42:52 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-07-29 17:42:52 -0700 |
commit | bf76f23aa1c178e9115eba17f699fa726aed669b (patch) | |
tree | a88270238eae8a0e6d98225c7de4b06bc2d2fb37 /kernel/sched/topology.c | |
parent | 14bed9bc81bae64db98349319f367bfc7dab0afd (diff) | |
parent | 1b5f1454091e9e9fb5c944b3161acf4ec0894d0d (diff) |
Merge tag 'sched-core-2025-07-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar:
"Core scheduler changes:
- Better tracking of maximum lag of tasks in presence of different
slices duration, for better handling of lag in the fair scheduler
(Vincent Guittot)
- Clean up and standardize #if/#else/#endif markers throughout the
entire scheduler code base (Ingo Molnar)
- Make SMP unconditional: build the SMP scheduler's data structures
and logic on UP kernel too, even though they are not used, to
simplify the scheduler and remove around 200 #ifdef/[#else]/#endif
blocks from the scheduler (Ingo Molnar)
- Reorganize cgroup bandwidth control interface handling for better
interfacing with sched_ext (Tejun Heo)
Balancing:
- Bump sd->max_newidle_lb_cost when newidle balance fails (Chris
Mason)
- Remove sched_domain_topology_level::flags to simplify the code
(Prateek Nayak)
- Simplify and clean up build_sched_topology() (Li Chen)
- Optimize build_sched_topology() on large machines (Li Chen)
Real-time scheduling:
- Add initial version of proxy execution: a mechanism for
mutex-owning tasks to inherit the scheduling context of higher
priority waiters.
Currently limited to a single runqueue and conditional on
CONFIG_EXPERT, and other limitations (John Stultz, Peter Zijlstra,
Valentin Schneider)
- Deadline scheduler (Juri Lelli):
- Fix dl_servers initialization order (Juri Lelli)
- Fix DL scheduler's root domain reinitialization logic (Juri
Lelli)
- Fix accounting bugs after global limits change (Juri Lelli)
- Fix scalability regression by implementing less agressive
dl_server handling (Peter Zijlstra)
PSI:
- Improve scalability by optimizing psi_group_change() cpu_clock()
usage (Peter Zijlstra)
Rust changes:
- Make Task, CondVar and PollCondVar methods inline to avoid
unnecessary function calls (Kunwu Chan, Panagiotis Foliadis)
- Add might_sleep() support for Rust code: Rust's "#[track_caller]"
mechanism is used so that Rust's might_sleep() doesn't need to be
defined as a macro (Fujita Tomonori)
- Introduce file_from_location() (Boqun Feng)
Debugging & instrumentation:
- Make clangd usable with scheduler source code files again (Peter
Zijlstra)
- tools: Add root_domains_dump.py which dumps root domains info (Juri
Lelli)
- tools: Add dl_bw_dump.py for printing bandwidth accounting info
(Juri Lelli)
Misc cleanups & fixes:
- Remove play_idle() (Feng Lee)
- Fix check_preemption_disabled() (Sebastian Andrzej Siewior)
- Do not call __put_task_struct() on RT if pi_blocked_on is set (Luis
Claudio R. Goncalves)
- Correct the comment in place_entity() (wang wei)"
* tag 'sched-core-2025-07-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (84 commits)
sched/idle: Remove play_idle()
sched: Do not call __put_task_struct() on rt if pi_blocked_on is set
sched: Start blocked_on chain processing in find_proxy_task()
sched: Fix proxy/current (push,pull)ability
sched: Add an initial sketch of the find_proxy_task() function
sched: Fix runtime accounting w/ split exec & sched contexts
sched: Move update_curr_task logic into update_curr_se
locking/mutex: Add p->blocked_on wrappers for correctness checks
locking/mutex: Rework task_struct::blocked_on
sched: Add CONFIG_SCHED_PROXY_EXEC & boot argument to enable/disable
sched/topology: Remove sched_domain_topology_level::flags
x86/smpboot: avoid SMT domain attach/destroy if SMT is not enabled
x86/smpboot: moves x86_topology to static initialize and truncate
x86/smpboot: remove redundant CONFIG_SCHED_SMT
smpboot: introduce SDTL_INIT() helper to tidy sched topology setup
tools/sched: Add dl_bw_dump.py for printing bandwidth accounting info
tools/sched: Add root_domains_dump.py which dumps root domains info
sched/deadline: Fix accounting after global limits change
sched/deadline: Reset extra_bw to max_bw when clearing root domains
sched/deadline: Initialize dl_servers after SMP
...
Diffstat (limited to 'kernel/sched/topology.c')
-rw-r--r-- | kernel/sched/topology.c | 57 |
1 files changed, 24 insertions, 33 deletions
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index b958fe48e020..977e133bb8a4 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -3,7 +3,9 @@ * Scheduler topology setup/handling methods */ +#include <linux/sched/isolation.h> #include <linux/bsearch.h> +#include "sched.h" DEFINE_MUTEX(sched_domains_mutex); void sched_domains_mutex_lock(void) @@ -87,7 +89,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, break; } - if (!(sd->flags & SD_OVERLAP) && + if (!(sd->flags & SD_NUMA) && cpumask_intersects(groupmask, sched_group_span(group))) { printk(KERN_CONT "\n"); printk(KERN_ERR "ERROR: repeated CPUs\n"); @@ -100,7 +102,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, group->sgc->id, cpumask_pr_args(sched_group_span(group))); - if ((sd->flags & SD_OVERLAP) && + if ((sd->flags & SD_NUMA) && !cpumask_equal(group_balance_mask(group), sched_group_span(group))) { printk(KERN_CONT " mask=%*pbl", cpumask_pr_args(group_balance_mask(group))); @@ -313,7 +315,7 @@ static int __init sched_energy_aware_sysctl_init(void) } late_initcall(sched_energy_aware_sysctl_init); -#endif +#endif /* CONFIG_PROC_SYSCTL */ static void free_pd(struct perf_domain *pd) { @@ -449,9 +451,9 @@ free: return false; } -#else +#else /* !(CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL): */ static void free_pd(struct perf_domain *pd) { } -#endif /* CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL*/ +#endif /* !(CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL) */ static void free_rootdomain(struct rcu_head *rcu) { @@ -1318,8 +1320,6 @@ next: update_group_capacity(sd, cpu); } -#ifdef CONFIG_SMP - /* Update the "asym_prefer_cpu" when arch_asym_cpu_priority() changes. */ void sched_update_asym_prefer_cpu(int cpu, int old_prio, int new_prio) { @@ -1344,7 +1344,7 @@ void sched_update_asym_prefer_cpu(int cpu, int old_prio, int new_prio) * "sg->asym_prefer_cpu" to "sg->sgc->asym_prefer_cpu" * which is shared by all the overlapping groups. */ - WARN_ON_ONCE(sd->flags & SD_OVERLAP); + WARN_ON_ONCE(sd->flags & SD_NUMA); sg = sd->groups; if (cpu != sg->asym_prefer_cpu) { @@ -1374,8 +1374,6 @@ void sched_update_asym_prefer_cpu(int cpu, int old_prio, int new_prio) } } -#endif /* CONFIG_SMP */ - /* * Set of available CPUs grouped by their corresponding capacities * Each list entry contains a CPU mask reflecting CPUs that share the same @@ -1598,7 +1596,7 @@ static int sched_domains_curr_level; int sched_max_numa_distance; static int *sched_domains_numa_distance; static struct cpumask ***sched_domains_numa_masks; -#endif +#endif /* CONFIG_NUMA */ /* * SD_flags allowed in topology descriptions. @@ -1714,7 +1712,7 @@ sd_init(struct sched_domain_topology_level *tl, SD_WAKE_AFFINE); } -#endif +#endif /* CONFIG_NUMA */ } else { sd->cache_nice_tries = 1; } @@ -1739,17 +1737,17 @@ sd_init(struct sched_domain_topology_level *tl, */ static struct sched_domain_topology_level default_topology[] = { #ifdef CONFIG_SCHED_SMT - { cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) }, + SDTL_INIT(cpu_smt_mask, cpu_smt_flags, SMT), #endif #ifdef CONFIG_SCHED_CLUSTER - { cpu_clustergroup_mask, cpu_cluster_flags, SD_INIT_NAME(CLS) }, + SDTL_INIT(cpu_clustergroup_mask, cpu_cluster_flags, CLS), #endif #ifdef CONFIG_SCHED_MC - { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, + SDTL_INIT(cpu_coregroup_mask, cpu_core_flags, MC), #endif - { cpu_cpu_mask, SD_INIT_NAME(PKG) }, + SDTL_INIT(cpu_cpu_mask, NULL, PKG), { NULL, }, }; @@ -2010,23 +2008,14 @@ void sched_init_numa(int offline_node) /* * Add the NUMA identity distance, aka single NODE. */ - tl[i++] = (struct sched_domain_topology_level){ - .mask = sd_numa_mask, - .numa_level = 0, - SD_INIT_NAME(NODE) - }; + tl[i++] = SDTL_INIT(sd_numa_mask, NULL, NODE); /* * .. and append 'j' levels of NUMA goodness. */ for (j = 1; j < nr_levels; i++, j++) { - tl[i] = (struct sched_domain_topology_level){ - .mask = sd_numa_mask, - .sd_flags = cpu_numa_flags, - .flags = SDTL_OVERLAP, - .numa_level = j, - SD_INIT_NAME(NUMA) - }; + tl[i] = SDTL_INIT(sd_numa_mask, cpu_numa_flags, NUMA); + tl[i].numa_level = j; } sched_domain_topology_saved = sched_domain_topology; @@ -2337,7 +2326,7 @@ static void __sdt_free(const struct cpumask *cpu_map) if (sdd->sd) { sd = *per_cpu_ptr(sdd->sd, j); - if (sd && (sd->flags & SD_OVERLAP)) + if (sd && (sd->flags & SD_NUMA)) free_sched_groups(sd->groups, 0); kfree(*per_cpu_ptr(sdd->sd, j)); } @@ -2403,9 +2392,13 @@ static bool topology_span_sane(const struct cpumask *cpu_map) id_seen = sched_domains_tmpmask2; for_each_sd_topology(tl) { + int tl_common_flags = 0; + + if (tl->sd_flags) + tl_common_flags = (*tl->sd_flags)(); /* NUMA levels are allowed to overlap */ - if (tl->flags & SDTL_OVERLAP) + if (tl_common_flags & SD_NUMA) continue; cpumask_clear(covered); @@ -2476,8 +2469,6 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att if (tl == sched_domain_topology) *per_cpu_ptr(d.sd, i) = sd; - if (tl->flags & SDTL_OVERLAP) - sd->flags |= SD_OVERLAP; if (cpumask_equal(cpu_map, sched_domain_span(sd))) break; } @@ -2490,7 +2481,7 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att for_each_cpu(i, cpu_map) { for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) { sd->span_weight = cpumask_weight(sched_domain_span(sd)); - if (sd->flags & SD_OVERLAP) { + if (sd->flags & SD_NUMA) { if (build_overlap_sched_groups(sd, i)) goto error; } else { |