summaryrefslogtreecommitdiff
path: root/kernel/sched/topology.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/topology.c')
-rw-r--r--kernel/sched/topology.c134
1 files changed, 68 insertions, 66 deletions
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 10d1391e7416..c49aea8c1025 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -285,7 +285,7 @@ void rebuild_sched_domains_energy(void)
}
#ifdef CONFIG_PROC_SYSCTL
-static int sched_energy_aware_handler(struct ctl_table *table, int write,
+static int sched_energy_aware_handler(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int ret, state;
@@ -312,7 +312,7 @@ static int sched_energy_aware_handler(struct ctl_table *table, int write,
return ret;
}
-static struct ctl_table sched_energy_aware_sysctls[] = {
+static const struct ctl_table sched_energy_aware_sysctls[] = {
{
.procname = "sched_energy_aware",
.data = &sysctl_sched_energy_aware,
@@ -322,7 +322,6 @@ static struct ctl_table sched_energy_aware_sysctls[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
- {}
};
static int __init sched_energy_aware_sysctl_init(void)
@@ -502,7 +501,7 @@ void rq_attach_root(struct rq *rq, struct root_domain *rd)
cpumask_clear_cpu(rq->cpu, old_rd->span);
/*
- * If we dont want to free the old_rd yet then
+ * If we don't want to free the old_rd yet then
* set old_rd to NULL to skip the freeing later
* in this function:
*/
@@ -517,6 +516,14 @@ void rq_attach_root(struct rq *rq, struct root_domain *rd)
if (cpumask_test_cpu(rq->cpu, cpu_active_mask))
set_rq_online(rq);
+ /*
+ * Because the rq is not a task, dl_add_task_root_domain() did not
+ * move the fair server bw to the rd if it already started.
+ * Add it now.
+ */
+ if (rq->fair_server.dl_server)
+ __dl_server_attach_root(&rq->fair_server, rq);
+
rq_unlock_irqrestore(rq, &rf);
if (old_rd)
@@ -657,13 +664,13 @@ static void destroy_sched_domains(struct sched_domain *sd)
}
/*
- * Keep a special pointer to the highest sched_domain that has
- * SD_SHARE_PKG_RESOURCE set (Last Level Cache Domain) for this
- * allows us to avoid some pointer chasing select_idle_sibling().
+ * Keep a special pointer to the highest sched_domain that has SD_SHARE_LLC set
+ * (Last Level Cache Domain) for this allows us to avoid some pointer chasing
+ * select_idle_sibling().
*
- * Also keep a unique ID per domain (we use the first CPU number in
- * the cpumask of the domain), this allows us to quickly tell if
- * two CPUs are in the same cache domain, see cpus_share_cache().
+ * Also keep a unique ID per domain (we use the first CPU number in the cpumask
+ * of the domain), this allows us to quickly tell if two CPUs are in the same
+ * cache domain, see cpus_share_cache().
*/
DEFINE_PER_CPU(struct sched_domain __rcu *, sd_llc);
DEFINE_PER_CPU(int, sd_llc_size);
@@ -684,7 +691,7 @@ static void update_top_cache_domain(int cpu)
int id = cpu;
int size = 1;
- sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES);
+ sd = highest_flag_domain(cpu, SD_SHARE_LLC);
if (sd) {
id = cpumask_first(sched_domain_span(sd));
size = cpumask_weight(sched_domain_span(sd));
@@ -1177,7 +1184,7 @@ fail:
* uniquely identify each group (for a given domain):
*
* - The first is the balance_cpu (see should_we_balance() and the
- * load-balance blub in fair.c); for each group we only want 1 CPU to
+ * load-balance blurb in fair.c); for each group we only want 1 CPU to
* continue balancing at a higher domain.
*
* - The second is the sched_group_capacity; we want all identical groups
@@ -1330,23 +1337,12 @@ next:
}
/*
- * Asymmetric CPU capacity bits
- */
-struct asym_cap_data {
- struct list_head link;
- unsigned long capacity;
- unsigned long cpus[];
-};
-
-/*
* Set of available CPUs grouped by their corresponding capacities
* Each list entry contains a CPU mask reflecting CPUs that share the same
* capacity.
* The lifespan of data is unlimited.
*/
-static LIST_HEAD(asym_cap_list);
-
-#define cpu_capacity_span(asym_data) to_cpumask((asym_data)->cpus)
+LIST_HEAD(asym_cap_list);
/*
* Verify whether there is any CPU capacity asymmetry in a given sched domain.
@@ -1386,21 +1382,39 @@ asym_cpu_capacity_classify(const struct cpumask *sd_span,
}
+static void free_asym_cap_entry(struct rcu_head *head)
+{
+ struct asym_cap_data *entry = container_of(head, struct asym_cap_data, rcu);
+ kfree(entry);
+}
+
static inline void asym_cpu_capacity_update_data(int cpu)
{
unsigned long capacity = arch_scale_cpu_capacity(cpu);
- struct asym_cap_data *entry = NULL;
+ struct asym_cap_data *insert_entry = NULL;
+ struct asym_cap_data *entry;
+ /*
+ * Search if capacity already exits. If not, track which the entry
+ * where we should insert to keep the list ordered descending.
+ */
list_for_each_entry(entry, &asym_cap_list, link) {
if (capacity == entry->capacity)
goto done;
+ else if (!insert_entry && capacity > entry->capacity)
+ insert_entry = list_prev_entry(entry, link);
}
entry = kzalloc(sizeof(*entry) + cpumask_size(), GFP_KERNEL);
if (WARN_ONCE(!entry, "Failed to allocate memory for asymmetry data\n"))
return;
entry->capacity = capacity;
- list_add(&entry->link, &asym_cap_list);
+
+ /* If NULL then the new capacity is the smallest, add last. */
+ if (!insert_entry)
+ list_add_tail_rcu(&entry->link, &asym_cap_list);
+ else
+ list_add_rcu(&entry->link, &insert_entry->link);
done:
__cpumask_set_cpu(cpu, cpu_capacity_span(entry));
}
@@ -1423,8 +1437,8 @@ static void asym_cpu_capacity_scan(void)
list_for_each_entry_safe(entry, next, &asym_cap_list, link) {
if (cpumask_empty(cpu_capacity_span(entry))) {
- list_del(&entry->link);
- kfree(entry);
+ list_del_rcu(&entry->link);
+ call_rcu(&entry->rcu, free_asym_cap_entry);
}
}
@@ -1434,8 +1448,8 @@ static void asym_cpu_capacity_scan(void)
*/
if (list_is_singular(&asym_cap_list)) {
entry = list_first_entry(&asym_cap_list, typeof(*entry), link);
- list_del(&entry->link);
- kfree(entry);
+ list_del_rcu(&entry->link);
+ call_rcu(&entry->rcu, free_asym_cap_entry);
}
}
@@ -1468,7 +1482,7 @@ static void set_domain_attribute(struct sched_domain *sd,
} else
request = attr->relax_domain_level;
- if (sd->level > request) {
+ if (sd->level >= request) {
/* Turn off idle balance on this domain: */
sd->flags &= ~(SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE);
}
@@ -1551,11 +1565,12 @@ static struct cpumask ***sched_domains_numa_masks;
*
* These flags are purely descriptive of the topology and do not prescribe
* behaviour. Behaviour is artificial and mapped in the below sd_init()
- * function:
+ * function. For details, see include/linux/sched/sd_flags.h.
*
- * SD_SHARE_CPUCAPACITY - describes SMT topologies
- * SD_SHARE_PKG_RESOURCES - describes shared caches
- * SD_NUMA - describes NUMA topologies
+ * SD_SHARE_CPUCAPACITY
+ * SD_SHARE_LLC
+ * SD_CLUSTER
+ * SD_NUMA
*
* Odd one out, which beside describing the topology has a quirk also
* prescribes the desired behaviour that goes along with it:
@@ -1565,7 +1580,7 @@ static struct cpumask ***sched_domains_numa_masks;
#define TOPOLOGY_SD_FLAGS \
(SD_SHARE_CPUCAPACITY | \
SD_CLUSTER | \
- SD_SHARE_PKG_RESOURCES | \
+ SD_SHARE_LLC | \
SD_NUMA | \
SD_ASYM_PACKING)
@@ -1608,7 +1623,7 @@ sd_init(struct sched_domain_topology_level *tl,
| 0*SD_BALANCE_WAKE
| 1*SD_WAKE_AFFINE
| 0*SD_SHARE_CPUCAPACITY
- | 0*SD_SHARE_PKG_RESOURCES
+ | 0*SD_SHARE_LLC
| 0*SD_SERIALIZE
| 1*SD_PREFER_SIBLING
| 0*SD_NUMA
@@ -1620,9 +1635,7 @@ sd_init(struct sched_domain_topology_level *tl,
.max_newidle_lb_cost = 0,
.last_decay_max_lb_cost = jiffies,
.child = child,
-#ifdef CONFIG_SCHED_DEBUG
.name = tl->name,
-#endif
};
sd_span = sched_domain_span(sd);
@@ -1645,7 +1658,7 @@ sd_init(struct sched_domain_topology_level *tl,
if (sd->flags & SD_SHARE_CPUCAPACITY) {
sd->imbalance_pct = 110;
- } else if (sd->flags & SD_SHARE_PKG_RESOURCES) {
+ } else if (sd->flags & SD_SHARE_LLC) {
sd->imbalance_pct = 117;
sd->cache_nice_tries = 1;
@@ -1670,7 +1683,7 @@ sd_init(struct sched_domain_topology_level *tl,
* For all levels sharing cache; connect a sched_domain_shared
* instance.
*/
- if (sd->flags & SD_SHARE_PKG_RESOURCES) {
+ if (sd->flags & SD_SHARE_LLC) {
sd->shared = *per_cpu_ptr(sdd->sds, sd_id);
atomic_inc(&sd->shared->ref);
atomic_set(&sd->shared->nr_busy_cpus, sd_weight);
@@ -1846,7 +1859,7 @@ void sched_init_numa(int offline_node)
struct cpumask ***masks;
/*
- * O(nr_nodes^2) deduplicating selection sort -- in order to find the
+ * O(nr_nodes^2) de-duplicating selection sort -- in order to find the
* unique distances in the node_distance() table.
*/
distance_map = bitmap_alloc(NR_DISTANCE_VALUES, GFP_KERNEL);
@@ -2323,10 +2336,8 @@ static struct sched_domain *build_sched_domain(struct sched_domain_topology_leve
if (!cpumask_subset(sched_domain_span(child),
sched_domain_span(sd))) {
pr_err("BUG: arch topology borken\n");
-#ifdef CONFIG_SCHED_DEBUG
pr_err(" the %s domain not a subset of the %s domain\n",
child->name, sd->name);
-#endif
/* Fixup, ensure @sd has at least @child CPUs. */
cpumask_or(sched_domain_span(sd),
sched_domain_span(sd),
@@ -2346,7 +2357,7 @@ static struct sched_domain *build_sched_domain(struct sched_domain_topology_leve
static bool topology_span_sane(struct sched_domain_topology_level *tl,
const struct cpumask *cpu_map, int cpu)
{
- int i;
+ int i = cpu + 1;
/* NUMA levels are allowed to overlap */
if (tl->flags & SDTL_OVERLAP)
@@ -2358,9 +2369,7 @@ static bool topology_span_sane(struct sched_domain_topology_level *tl,
* breaking the sched_group lists - i.e. a later get_group() pass
* breaks the linking done for an earlier span.
*/
- for_each_cpu(i, cpu_map) {
- if (i == cpu)
- continue;
+ for_each_cpu_from(i, cpu_map) {
/*
* We should 'and' all those masks with 'cpu_map' to exactly
* match the topology we're about to build, but that can only
@@ -2445,8 +2454,8 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
struct sched_domain *child = sd->child;
- if (!(sd->flags & SD_SHARE_PKG_RESOURCES) && child &&
- (child->flags & SD_SHARE_PKG_RESOURCES)) {
+ if (!(sd->flags & SD_SHARE_LLC) && child &&
+ (child->flags & SD_SHARE_LLC)) {
struct sched_domain __rcu *top_p;
unsigned int nr_llcs;
@@ -2506,16 +2515,9 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
/* Attach the domains */
rcu_read_lock();
for_each_cpu(i, cpu_map) {
- unsigned long capacity;
-
rq = cpu_rq(i);
sd = *per_cpu_ptr(d.sd, i);
- capacity = arch_scale_cpu_capacity(i);
- /* Use READ_ONCE()/WRITE_ONCE() to avoid load/store tearing: */
- if (capacity > READ_ONCE(d.rd->max_cpu_capacity))
- WRITE_ONCE(d.rd->max_cpu_capacity, capacity);
-
cpu_attach_domain(sd, d.rd, i);
if (lowest_flag_domain(i, SD_CLUSTER))
@@ -2529,10 +2531,8 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
if (has_cluster)
static_branch_inc_cpuslocked(&sched_cluster_active);
- if (rq && sched_debug_verbose) {
- pr_info("root domain span: %*pbl (max cpu_capacity = %lu)\n",
- cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity);
- }
+ if (rq && sched_debug_verbose)
+ pr_info("root domain span: %*pbl\n", cpumask_pr_args(cpu_map));
ret = 0;
error:
@@ -2717,9 +2717,11 @@ void partition_sched_domains_locked(int ndoms_new, cpumask_var_t doms_new[],
/*
* This domain won't be destroyed and as such
- * its dl_bw->total_bw needs to be cleared. It
- * will be recomputed in function
- * update_tasks_root_domain().
+ * its dl_bw->total_bw needs to be cleared.
+ * Tasks contribution will be then recomputed
+ * in function dl_update_tasks_root_domain(),
+ * dl_servers contribution in function
+ * dl_restore_server_root_domain().
*/
rd = cpu_rq(cpumask_any(doms_cur[i]))->rd;
dl_clear_root_domain(rd);
@@ -2754,7 +2756,7 @@ match2:
}
#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
- /* Build perf. domains: */
+ /* Build perf domains: */
for (i = 0; i < ndoms_new; i++) {
for (j = 0; j < n && !sched_energy_update; j++) {
if (cpumask_equal(doms_new[i], doms_cur[j]) &&
@@ -2763,7 +2765,7 @@ match2:
goto match3;
}
}
- /* No match - add perf. domains for a new rd */
+ /* No match - add perf domains for a new rd */
has_eas |= build_perf_domains(doms_new[i]);
match3:
;