diff options
Diffstat (limited to 'kernel/sched/topology.c')
-rw-r--r-- | kernel/sched/topology.c | 134 |
1 files changed, 68 insertions, 66 deletions
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 10d1391e7416..c49aea8c1025 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -285,7 +285,7 @@ void rebuild_sched_domains_energy(void) } #ifdef CONFIG_PROC_SYSCTL -static int sched_energy_aware_handler(struct ctl_table *table, int write, +static int sched_energy_aware_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret, state; @@ -312,7 +312,7 @@ static int sched_energy_aware_handler(struct ctl_table *table, int write, return ret; } -static struct ctl_table sched_energy_aware_sysctls[] = { +static const struct ctl_table sched_energy_aware_sysctls[] = { { .procname = "sched_energy_aware", .data = &sysctl_sched_energy_aware, @@ -322,7 +322,6 @@ static struct ctl_table sched_energy_aware_sysctls[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, - {} }; static int __init sched_energy_aware_sysctl_init(void) @@ -502,7 +501,7 @@ void rq_attach_root(struct rq *rq, struct root_domain *rd) cpumask_clear_cpu(rq->cpu, old_rd->span); /* - * If we dont want to free the old_rd yet then + * If we don't want to free the old_rd yet then * set old_rd to NULL to skip the freeing later * in this function: */ @@ -517,6 +516,14 @@ void rq_attach_root(struct rq *rq, struct root_domain *rd) if (cpumask_test_cpu(rq->cpu, cpu_active_mask)) set_rq_online(rq); + /* + * Because the rq is not a task, dl_add_task_root_domain() did not + * move the fair server bw to the rd if it already started. + * Add it now. + */ + if (rq->fair_server.dl_server) + __dl_server_attach_root(&rq->fair_server, rq); + rq_unlock_irqrestore(rq, &rf); if (old_rd) @@ -657,13 +664,13 @@ static void destroy_sched_domains(struct sched_domain *sd) } /* - * Keep a special pointer to the highest sched_domain that has - * SD_SHARE_PKG_RESOURCE set (Last Level Cache Domain) for this - * allows us to avoid some pointer chasing select_idle_sibling(). + * Keep a special pointer to the highest sched_domain that has SD_SHARE_LLC set + * (Last Level Cache Domain) for this allows us to avoid some pointer chasing + * select_idle_sibling(). * - * Also keep a unique ID per domain (we use the first CPU number in - * the cpumask of the domain), this allows us to quickly tell if - * two CPUs are in the same cache domain, see cpus_share_cache(). + * Also keep a unique ID per domain (we use the first CPU number in the cpumask + * of the domain), this allows us to quickly tell if two CPUs are in the same + * cache domain, see cpus_share_cache(). */ DEFINE_PER_CPU(struct sched_domain __rcu *, sd_llc); DEFINE_PER_CPU(int, sd_llc_size); @@ -684,7 +691,7 @@ static void update_top_cache_domain(int cpu) int id = cpu; int size = 1; - sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES); + sd = highest_flag_domain(cpu, SD_SHARE_LLC); if (sd) { id = cpumask_first(sched_domain_span(sd)); size = cpumask_weight(sched_domain_span(sd)); @@ -1177,7 +1184,7 @@ fail: * uniquely identify each group (for a given domain): * * - The first is the balance_cpu (see should_we_balance() and the - * load-balance blub in fair.c); for each group we only want 1 CPU to + * load-balance blurb in fair.c); for each group we only want 1 CPU to * continue balancing at a higher domain. * * - The second is the sched_group_capacity; we want all identical groups @@ -1330,23 +1337,12 @@ next: } /* - * Asymmetric CPU capacity bits - */ -struct asym_cap_data { - struct list_head link; - unsigned long capacity; - unsigned long cpus[]; -}; - -/* * Set of available CPUs grouped by their corresponding capacities * Each list entry contains a CPU mask reflecting CPUs that share the same * capacity. * The lifespan of data is unlimited. */ -static LIST_HEAD(asym_cap_list); - -#define cpu_capacity_span(asym_data) to_cpumask((asym_data)->cpus) +LIST_HEAD(asym_cap_list); /* * Verify whether there is any CPU capacity asymmetry in a given sched domain. @@ -1386,21 +1382,39 @@ asym_cpu_capacity_classify(const struct cpumask *sd_span, } +static void free_asym_cap_entry(struct rcu_head *head) +{ + struct asym_cap_data *entry = container_of(head, struct asym_cap_data, rcu); + kfree(entry); +} + static inline void asym_cpu_capacity_update_data(int cpu) { unsigned long capacity = arch_scale_cpu_capacity(cpu); - struct asym_cap_data *entry = NULL; + struct asym_cap_data *insert_entry = NULL; + struct asym_cap_data *entry; + /* + * Search if capacity already exits. If not, track which the entry + * where we should insert to keep the list ordered descending. + */ list_for_each_entry(entry, &asym_cap_list, link) { if (capacity == entry->capacity) goto done; + else if (!insert_entry && capacity > entry->capacity) + insert_entry = list_prev_entry(entry, link); } entry = kzalloc(sizeof(*entry) + cpumask_size(), GFP_KERNEL); if (WARN_ONCE(!entry, "Failed to allocate memory for asymmetry data\n")) return; entry->capacity = capacity; - list_add(&entry->link, &asym_cap_list); + + /* If NULL then the new capacity is the smallest, add last. */ + if (!insert_entry) + list_add_tail_rcu(&entry->link, &asym_cap_list); + else + list_add_rcu(&entry->link, &insert_entry->link); done: __cpumask_set_cpu(cpu, cpu_capacity_span(entry)); } @@ -1423,8 +1437,8 @@ static void asym_cpu_capacity_scan(void) list_for_each_entry_safe(entry, next, &asym_cap_list, link) { if (cpumask_empty(cpu_capacity_span(entry))) { - list_del(&entry->link); - kfree(entry); + list_del_rcu(&entry->link); + call_rcu(&entry->rcu, free_asym_cap_entry); } } @@ -1434,8 +1448,8 @@ static void asym_cpu_capacity_scan(void) */ if (list_is_singular(&asym_cap_list)) { entry = list_first_entry(&asym_cap_list, typeof(*entry), link); - list_del(&entry->link); - kfree(entry); + list_del_rcu(&entry->link); + call_rcu(&entry->rcu, free_asym_cap_entry); } } @@ -1468,7 +1482,7 @@ static void set_domain_attribute(struct sched_domain *sd, } else request = attr->relax_domain_level; - if (sd->level > request) { + if (sd->level >= request) { /* Turn off idle balance on this domain: */ sd->flags &= ~(SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE); } @@ -1551,11 +1565,12 @@ static struct cpumask ***sched_domains_numa_masks; * * These flags are purely descriptive of the topology and do not prescribe * behaviour. Behaviour is artificial and mapped in the below sd_init() - * function: + * function. For details, see include/linux/sched/sd_flags.h. * - * SD_SHARE_CPUCAPACITY - describes SMT topologies - * SD_SHARE_PKG_RESOURCES - describes shared caches - * SD_NUMA - describes NUMA topologies + * SD_SHARE_CPUCAPACITY + * SD_SHARE_LLC + * SD_CLUSTER + * SD_NUMA * * Odd one out, which beside describing the topology has a quirk also * prescribes the desired behaviour that goes along with it: @@ -1565,7 +1580,7 @@ static struct cpumask ***sched_domains_numa_masks; #define TOPOLOGY_SD_FLAGS \ (SD_SHARE_CPUCAPACITY | \ SD_CLUSTER | \ - SD_SHARE_PKG_RESOURCES | \ + SD_SHARE_LLC | \ SD_NUMA | \ SD_ASYM_PACKING) @@ -1608,7 +1623,7 @@ sd_init(struct sched_domain_topology_level *tl, | 0*SD_BALANCE_WAKE | 1*SD_WAKE_AFFINE | 0*SD_SHARE_CPUCAPACITY - | 0*SD_SHARE_PKG_RESOURCES + | 0*SD_SHARE_LLC | 0*SD_SERIALIZE | 1*SD_PREFER_SIBLING | 0*SD_NUMA @@ -1620,9 +1635,7 @@ sd_init(struct sched_domain_topology_level *tl, .max_newidle_lb_cost = 0, .last_decay_max_lb_cost = jiffies, .child = child, -#ifdef CONFIG_SCHED_DEBUG .name = tl->name, -#endif }; sd_span = sched_domain_span(sd); @@ -1645,7 +1658,7 @@ sd_init(struct sched_domain_topology_level *tl, if (sd->flags & SD_SHARE_CPUCAPACITY) { sd->imbalance_pct = 110; - } else if (sd->flags & SD_SHARE_PKG_RESOURCES) { + } else if (sd->flags & SD_SHARE_LLC) { sd->imbalance_pct = 117; sd->cache_nice_tries = 1; @@ -1670,7 +1683,7 @@ sd_init(struct sched_domain_topology_level *tl, * For all levels sharing cache; connect a sched_domain_shared * instance. */ - if (sd->flags & SD_SHARE_PKG_RESOURCES) { + if (sd->flags & SD_SHARE_LLC) { sd->shared = *per_cpu_ptr(sdd->sds, sd_id); atomic_inc(&sd->shared->ref); atomic_set(&sd->shared->nr_busy_cpus, sd_weight); @@ -1846,7 +1859,7 @@ void sched_init_numa(int offline_node) struct cpumask ***masks; /* - * O(nr_nodes^2) deduplicating selection sort -- in order to find the + * O(nr_nodes^2) de-duplicating selection sort -- in order to find the * unique distances in the node_distance() table. */ distance_map = bitmap_alloc(NR_DISTANCE_VALUES, GFP_KERNEL); @@ -2323,10 +2336,8 @@ static struct sched_domain *build_sched_domain(struct sched_domain_topology_leve if (!cpumask_subset(sched_domain_span(child), sched_domain_span(sd))) { pr_err("BUG: arch topology borken\n"); -#ifdef CONFIG_SCHED_DEBUG pr_err(" the %s domain not a subset of the %s domain\n", child->name, sd->name); -#endif /* Fixup, ensure @sd has at least @child CPUs. */ cpumask_or(sched_domain_span(sd), sched_domain_span(sd), @@ -2346,7 +2357,7 @@ static struct sched_domain *build_sched_domain(struct sched_domain_topology_leve static bool topology_span_sane(struct sched_domain_topology_level *tl, const struct cpumask *cpu_map, int cpu) { - int i; + int i = cpu + 1; /* NUMA levels are allowed to overlap */ if (tl->flags & SDTL_OVERLAP) @@ -2358,9 +2369,7 @@ static bool topology_span_sane(struct sched_domain_topology_level *tl, * breaking the sched_group lists - i.e. a later get_group() pass * breaks the linking done for an earlier span. */ - for_each_cpu(i, cpu_map) { - if (i == cpu) - continue; + for_each_cpu_from(i, cpu_map) { /* * We should 'and' all those masks with 'cpu_map' to exactly * match the topology we're about to build, but that can only @@ -2445,8 +2454,8 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) { struct sched_domain *child = sd->child; - if (!(sd->flags & SD_SHARE_PKG_RESOURCES) && child && - (child->flags & SD_SHARE_PKG_RESOURCES)) { + if (!(sd->flags & SD_SHARE_LLC) && child && + (child->flags & SD_SHARE_LLC)) { struct sched_domain __rcu *top_p; unsigned int nr_llcs; @@ -2506,16 +2515,9 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att /* Attach the domains */ rcu_read_lock(); for_each_cpu(i, cpu_map) { - unsigned long capacity; - rq = cpu_rq(i); sd = *per_cpu_ptr(d.sd, i); - capacity = arch_scale_cpu_capacity(i); - /* Use READ_ONCE()/WRITE_ONCE() to avoid load/store tearing: */ - if (capacity > READ_ONCE(d.rd->max_cpu_capacity)) - WRITE_ONCE(d.rd->max_cpu_capacity, capacity); - cpu_attach_domain(sd, d.rd, i); if (lowest_flag_domain(i, SD_CLUSTER)) @@ -2529,10 +2531,8 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att if (has_cluster) static_branch_inc_cpuslocked(&sched_cluster_active); - if (rq && sched_debug_verbose) { - pr_info("root domain span: %*pbl (max cpu_capacity = %lu)\n", - cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity); - } + if (rq && sched_debug_verbose) + pr_info("root domain span: %*pbl\n", cpumask_pr_args(cpu_map)); ret = 0; error: @@ -2717,9 +2717,11 @@ void partition_sched_domains_locked(int ndoms_new, cpumask_var_t doms_new[], /* * This domain won't be destroyed and as such - * its dl_bw->total_bw needs to be cleared. It - * will be recomputed in function - * update_tasks_root_domain(). + * its dl_bw->total_bw needs to be cleared. + * Tasks contribution will be then recomputed + * in function dl_update_tasks_root_domain(), + * dl_servers contribution in function + * dl_restore_server_root_domain(). */ rd = cpu_rq(cpumask_any(doms_cur[i]))->rd; dl_clear_root_domain(rd); @@ -2754,7 +2756,7 @@ match2: } #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) - /* Build perf. domains: */ + /* Build perf domains: */ for (i = 0; i < ndoms_new; i++) { for (j = 0; j < n && !sched_energy_update; j++) { if (cpumask_equal(doms_new[i], doms_cur[j]) && @@ -2763,7 +2765,7 @@ match2: goto match3; } } - /* No match - add perf. domains for a new rd */ + /* No match - add perf domains for a new rd */ has_eas |= build_perf_domains(doms_new[i]); match3: ; |