diff options
Diffstat (limited to 'kernel/sched/topology.c')
-rw-r--r-- | kernel/sched/topology.c | 199 |
1 files changed, 126 insertions, 73 deletions
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index f1ebc60d967f..977e133bb8a4 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -3,7 +3,9 @@ * Scheduler topology setup/handling methods */ +#include <linux/sched/isolation.h> #include <linux/bsearch.h> +#include "sched.h" DEFINE_MUTEX(sched_domains_mutex); void sched_domains_mutex_lock(void) @@ -87,7 +89,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, break; } - if (!(sd->flags & SD_OVERLAP) && + if (!(sd->flags & SD_NUMA) && cpumask_intersects(groupmask, sched_group_span(group))) { printk(KERN_CONT "\n"); printk(KERN_ERR "ERROR: repeated CPUs\n"); @@ -100,7 +102,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, group->sgc->id, cpumask_pr_args(sched_group_span(group))); - if ((sd->flags & SD_OVERLAP) && + if ((sd->flags & SD_NUMA) && !cpumask_equal(group_balance_mask(group), sched_group_span(group))) { printk(KERN_CONT " mask=%*pbl", cpumask_pr_args(group_balance_mask(group))); @@ -212,8 +214,6 @@ static bool sched_energy_update; static bool sched_is_eas_possible(const struct cpumask *cpu_mask) { bool any_asym_capacity = false; - struct cpufreq_policy *policy; - struct cpufreq_governor *gov; int i; /* EAS is enabled for asymmetric CPU capacity topologies. */ @@ -248,25 +248,12 @@ static bool sched_is_eas_possible(const struct cpumask *cpu_mask) return false; } - /* Do not attempt EAS if schedutil is not being used. */ - for_each_cpu(i, cpu_mask) { - policy = cpufreq_cpu_get(i); - if (!policy) { - if (sched_debug()) { - pr_info("rd %*pbl: Checking EAS, cpufreq policy not set for CPU: %d", - cpumask_pr_args(cpu_mask), i); - } - return false; - } - gov = policy->governor; - cpufreq_cpu_put(policy); - if (gov != &schedutil_gov) { - if (sched_debug()) { - pr_info("rd %*pbl: Checking EAS, schedutil is mandatory\n", - cpumask_pr_args(cpu_mask)); - } - return false; + if (!cpufreq_ready_for_eas(cpu_mask)) { + if (sched_debug()) { + pr_info("rd %*pbl: Checking EAS: cpufreq is not ready\n", + cpumask_pr_args(cpu_mask)); } + return false; } return true; @@ -328,7 +315,7 @@ static int __init sched_energy_aware_sysctl_init(void) } late_initcall(sched_energy_aware_sysctl_init); -#endif +#endif /* CONFIG_PROC_SYSCTL */ static void free_pd(struct perf_domain *pd) { @@ -464,9 +451,9 @@ free: return false; } -#else +#else /* !(CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL): */ static void free_pd(struct perf_domain *pd) { } -#endif /* CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL*/ +#endif /* !(CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL) */ static void free_rootdomain(struct rcu_head *rcu) { @@ -1333,6 +1320,60 @@ next: update_group_capacity(sd, cpu); } +/* Update the "asym_prefer_cpu" when arch_asym_cpu_priority() changes. */ +void sched_update_asym_prefer_cpu(int cpu, int old_prio, int new_prio) +{ + int asym_prefer_cpu = cpu; + struct sched_domain *sd; + + guard(rcu)(); + + for_each_domain(cpu, sd) { + struct sched_group *sg; + int group_cpu; + + if (!(sd->flags & SD_ASYM_PACKING)) + continue; + + /* + * Groups of overlapping domain are replicated per NUMA + * node and will require updating "asym_prefer_cpu" on + * each local copy. + * + * If you are hitting this warning, consider moving + * "sg->asym_prefer_cpu" to "sg->sgc->asym_prefer_cpu" + * which is shared by all the overlapping groups. + */ + WARN_ON_ONCE(sd->flags & SD_NUMA); + + sg = sd->groups; + if (cpu != sg->asym_prefer_cpu) { + /* + * Since the parent is a superset of the current group, + * if the cpu is not the "asym_prefer_cpu" at the + * current level, it cannot be the preferred CPU at a + * higher levels either. + */ + if (!sched_asym_prefer(cpu, sg->asym_prefer_cpu)) + return; + + WRITE_ONCE(sg->asym_prefer_cpu, cpu); + continue; + } + + /* Ranking has improved; CPU is still the preferred one. */ + if (new_prio >= old_prio) + continue; + + for_each_cpu(group_cpu, sched_group_span(sg)) { + if (sched_asym_prefer(group_cpu, asym_prefer_cpu)) + asym_prefer_cpu = group_cpu; + } + + WRITE_ONCE(sg->asym_prefer_cpu, asym_prefer_cpu); + } +} + /* * Set of available CPUs grouped by their corresponding capacities * Each list entry contains a CPU mask reflecting CPUs that share the same @@ -1555,7 +1596,7 @@ static int sched_domains_curr_level; int sched_max_numa_distance; static int *sched_domains_numa_distance; static struct cpumask ***sched_domains_numa_masks; -#endif +#endif /* CONFIG_NUMA */ /* * SD_flags allowed in topology descriptions. @@ -1671,7 +1712,7 @@ sd_init(struct sched_domain_topology_level *tl, SD_WAKE_AFFINE); } -#endif +#endif /* CONFIG_NUMA */ } else { sd->cache_nice_tries = 1; } @@ -1696,17 +1737,17 @@ sd_init(struct sched_domain_topology_level *tl, */ static struct sched_domain_topology_level default_topology[] = { #ifdef CONFIG_SCHED_SMT - { cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) }, + SDTL_INIT(cpu_smt_mask, cpu_smt_flags, SMT), #endif #ifdef CONFIG_SCHED_CLUSTER - { cpu_clustergroup_mask, cpu_cluster_flags, SD_INIT_NAME(CLS) }, + SDTL_INIT(cpu_clustergroup_mask, cpu_cluster_flags, CLS), #endif #ifdef CONFIG_SCHED_MC - { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, + SDTL_INIT(cpu_coregroup_mask, cpu_core_flags, MC), #endif - { cpu_cpu_mask, SD_INIT_NAME(PKG) }, + SDTL_INIT(cpu_cpu_mask, NULL, PKG), { NULL, }, }; @@ -1967,23 +2008,14 @@ void sched_init_numa(int offline_node) /* * Add the NUMA identity distance, aka single NODE. */ - tl[i++] = (struct sched_domain_topology_level){ - .mask = sd_numa_mask, - .numa_level = 0, - SD_INIT_NAME(NODE) - }; + tl[i++] = SDTL_INIT(sd_numa_mask, NULL, NODE); /* * .. and append 'j' levels of NUMA goodness. */ for (j = 1; j < nr_levels; i++, j++) { - tl[i] = (struct sched_domain_topology_level){ - .mask = sd_numa_mask, - .sd_flags = cpu_numa_flags, - .flags = SDTL_OVERLAP, - .numa_level = j, - SD_INIT_NAME(NUMA) - }; + tl[i] = SDTL_INIT(sd_numa_mask, cpu_numa_flags, NUMA); + tl[i].numa_level = j; } sched_domain_topology_saved = sched_domain_topology; @@ -2098,7 +2130,7 @@ int sched_numa_find_closest(const struct cpumask *cpus, int cpu) for (i = 0; i < sched_domains_numa_levels; i++) { if (!masks[i][j]) break; - cpu = cpumask_any_and(cpus, masks[i][j]); + cpu = cpumask_any_and_distribute(cpus, masks[i][j]); if (cpu < nr_cpu_ids) { found = cpu; break; @@ -2294,7 +2326,7 @@ static void __sdt_free(const struct cpumask *cpu_map) if (sdd->sd) { sd = *per_cpu_ptr(sdd->sd, j); - if (sd && (sd->flags & SD_OVERLAP)) + if (sd && (sd->flags & SD_NUMA)) free_sched_groups(sd->groups, 0); kfree(*per_cpu_ptr(sdd->sd, j)); } @@ -2347,35 +2379,58 @@ static struct sched_domain *build_sched_domain(struct sched_domain_topology_leve /* * Ensure topology masks are sane, i.e. there are no conflicts (overlaps) for - * any two given CPUs at this (non-NUMA) topology level. + * any two given CPUs on non-NUMA topology levels. */ -static bool topology_span_sane(struct sched_domain_topology_level *tl, - const struct cpumask *cpu_map, int cpu) +static bool topology_span_sane(const struct cpumask *cpu_map) { - int i = cpu + 1; + struct sched_domain_topology_level *tl; + struct cpumask *covered, *id_seen; + int cpu; - /* NUMA levels are allowed to overlap */ - if (tl->flags & SDTL_OVERLAP) - return true; + lockdep_assert_held(&sched_domains_mutex); + covered = sched_domains_tmpmask; + id_seen = sched_domains_tmpmask2; + + for_each_sd_topology(tl) { + int tl_common_flags = 0; + + if (tl->sd_flags) + tl_common_flags = (*tl->sd_flags)(); + + /* NUMA levels are allowed to overlap */ + if (tl_common_flags & SD_NUMA) + continue; + + cpumask_clear(covered); + cpumask_clear(id_seen); - /* - * Non-NUMA levels cannot partially overlap - they must be either - * completely equal or completely disjoint. Otherwise we can end up - * breaking the sched_group lists - i.e. a later get_group() pass - * breaks the linking done for an earlier span. - */ - for_each_cpu_from(i, cpu_map) { /* - * We should 'and' all those masks with 'cpu_map' to exactly - * match the topology we're about to build, but that can only - * remove CPUs, which only lessens our ability to detect - * overlaps + * Non-NUMA levels cannot partially overlap - they must be either + * completely equal or completely disjoint. Otherwise we can end up + * breaking the sched_group lists - i.e. a later get_group() pass + * breaks the linking done for an earlier span. */ - if (!cpumask_equal(tl->mask(cpu), tl->mask(i)) && - cpumask_intersects(tl->mask(cpu), tl->mask(i))) - return false; - } + for_each_cpu(cpu, cpu_map) { + const struct cpumask *tl_cpu_mask = tl->mask(cpu); + int id; + /* lowest bit set in this mask is used as a unique id */ + id = cpumask_first(tl_cpu_mask); + + if (cpumask_test_cpu(id, id_seen)) { + /* First CPU has already been seen, ensure identical spans */ + if (!cpumask_equal(tl->mask(id), tl_cpu_mask)) + return false; + } else { + /* First CPU hasn't been seen before, ensure it's a completely new span */ + if (cpumask_intersects(tl_cpu_mask, covered)) + return false; + + cpumask_or(covered, covered, tl_cpu_mask); + cpumask_set_cpu(id, id_seen); + } + } + } return true; } @@ -2408,27 +2463,25 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att sd = NULL; for_each_sd_topology(tl) { - if (WARN_ON(!topology_span_sane(tl, cpu_map, i))) - goto error; - sd = build_sched_domain(tl, cpu_map, attr, sd, i); has_asym |= sd->flags & SD_ASYM_CPUCAPACITY; if (tl == sched_domain_topology) *per_cpu_ptr(d.sd, i) = sd; - if (tl->flags & SDTL_OVERLAP) - sd->flags |= SD_OVERLAP; if (cpumask_equal(cpu_map, sched_domain_span(sd))) break; } } + if (WARN_ON(!topology_span_sane(cpu_map))) + goto error; + /* Build the groups for the domains */ for_each_cpu(i, cpu_map) { for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) { sd->span_weight = cpumask_weight(sched_domain_span(sd)); - if (sd->flags & SD_OVERLAP) { + if (sd->flags & SD_NUMA) { if (build_overlap_sched_groups(sd, i)) goto error; } else { |