summaryrefslogtreecommitdiff
path: root/arch/powerpc/kernel/smp.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel/smp.c')
-rw-r--r--arch/powerpc/kernel/smp.c231
1 files changed, 142 insertions, 89 deletions
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 0da6e59161cd..292fee8809bc 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -47,6 +47,7 @@
#include <asm/smp.h>
#include <asm/time.h>
#include <asm/machdep.h>
+#include <asm/mmu_context.h>
#include <asm/cputhreads.h>
#include <asm/cputable.h>
#include <asm/mpic.h>
@@ -60,6 +61,9 @@
#include <asm/ftrace.h>
#include <asm/kup.h>
#include <asm/fadump.h>
+#include <asm/systemcfg.h>
+
+#include <trace/events/ipi.h>
#ifdef DEBUG
#include <asm/udbg.h>
@@ -74,10 +78,10 @@ static DEFINE_PER_CPU(int, cpu_state) = { 0 };
#endif
struct task_struct *secondary_current;
-bool has_big_cores;
-bool coregroup_enabled;
-bool thread_group_shares_l2;
-bool thread_group_shares_l3;
+bool has_big_cores __ro_after_init;
+bool coregroup_enabled __ro_after_init;
+bool thread_group_shares_l2 __ro_after_init;
+bool thread_group_shares_l3 __ro_after_init;
DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
@@ -90,15 +94,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map);
EXPORT_PER_CPU_SYMBOL(cpu_core_map);
EXPORT_SYMBOL_GPL(has_big_cores);
-enum {
-#ifdef CONFIG_SCHED_SMT
- smt_idx,
-#endif
- cache_idx,
- mc_idx,
- die_idx,
-};
-
#define MAX_THREAD_LIST_SIZE 8
#define THREAD_GROUP_SHARE_L1 1
#define THREAD_GROUP_SHARE_L2_L3 2
@@ -289,7 +284,7 @@ void smp_muxed_ipi_set_message(int cpu, int msg)
* Order previous accesses before accesses in the IPI handler.
*/
smp_mb();
- message[msg] = 1;
+ WRITE_ONCE(message[msg], 1);
}
void smp_muxed_ipi_message_pass(int cpu, int msg)
@@ -348,7 +343,7 @@ irqreturn_t smp_ipi_demux_relaxed(void)
if (all & IPI_MESSAGE(PPC_MSG_NMI_IPI))
nmi_ipi_action(0, NULL);
#endif
- } while (info->messages);
+ } while (READ_ONCE(info->messages));
return IRQ_HANDLED;
}
@@ -364,12 +359,12 @@ static inline void do_message_pass(int cpu, int msg)
#endif
}
-void smp_send_reschedule(int cpu)
+void arch_smp_send_reschedule(int cpu)
{
if (likely(smp_ops))
do_message_pass(cpu, PPC_MSG_RESCHEDULE);
}
-EXPORT_SYMBOL_GPL(smp_send_reschedule);
+EXPORT_SYMBOL_GPL(arch_smp_send_reschedule);
void arch_send_call_function_single_ipi(int cpu)
{
@@ -415,9 +410,9 @@ noinstr static void nmi_ipi_lock_start(unsigned long *flags)
{
raw_local_irq_save(*flags);
hard_irq_disable();
- while (arch_atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) {
+ while (raw_atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) {
raw_local_irq_restore(*flags);
- spin_until_cond(arch_atomic_read(&__nmi_ipi_lock) == 0);
+ spin_until_cond(raw_atomic_read(&__nmi_ipi_lock) == 0);
raw_local_irq_save(*flags);
hard_irq_disable();
}
@@ -425,15 +420,15 @@ noinstr static void nmi_ipi_lock_start(unsigned long *flags)
noinstr static void nmi_ipi_lock(void)
{
- while (arch_atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1)
- spin_until_cond(arch_atomic_read(&__nmi_ipi_lock) == 0);
+ while (raw_atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1)
+ spin_until_cond(raw_atomic_read(&__nmi_ipi_lock) == 0);
}
noinstr static void nmi_ipi_unlock(void)
{
smp_mb();
- WARN_ON(arch_atomic_read(&__nmi_ipi_lock) != 1);
- arch_atomic_set(&__nmi_ipi_lock, 0);
+ WARN_ON(raw_atomic_read(&__nmi_ipi_lock) != 1);
+ raw_atomic_set(&__nmi_ipi_lock, 0);
}
noinstr static void nmi_ipi_unlock_end(unsigned long *flags)
@@ -594,7 +589,7 @@ void smp_send_debugger_break(void)
}
#endif
-#ifdef CONFIG_KEXEC_CORE
+#ifdef CONFIG_CRASH_DUMP
void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
{
int cpu;
@@ -637,7 +632,7 @@ void crash_smp_send_stop(void)
stopped = true;
-#ifdef CONFIG_KEXEC_CORE
+#ifdef CONFIG_CRASH_DUMP
if (kexec_crash_image) {
crash_kexec_prepare();
return;
@@ -984,13 +979,13 @@ static int __init init_thread_group_cache_map(int cpu, int cache_property)
return 0;
}
-static bool shared_caches;
+static bool shared_caches __ro_after_init;
#ifdef CONFIG_SCHED_SMT
/* cpumask of CPUs with asymmetric SMT dependency */
static int powerpc_smt_flags(void)
{
- int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
+ int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_LLC;
if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
@@ -1001,6 +996,13 @@ static int powerpc_smt_flags(void)
#endif
/*
+ * On shared processor LPARs scheduled on a big core (which has two or more
+ * independent thread groups per core), prefer lower numbered CPUs, so
+ * that workload consolidates to lesser number of cores.
+ */
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(splpar_asym_pack);
+
+/*
* P9 has a slightly odd architecture where pairs of cores share an L2 cache.
* This topology makes it *much* cheaper to migrate tasks between adjacent cores
* since the migrated task remains cache hot. We want to take advantage of this
@@ -1008,50 +1010,50 @@ static int powerpc_smt_flags(void)
*/
static int powerpc_shared_cache_flags(void)
{
- return SD_SHARE_PKG_RESOURCES;
+ if (static_branch_unlikely(&splpar_asym_pack))
+ return SD_SHARE_LLC | SD_ASYM_PACKING;
+
+ return SD_SHARE_LLC;
+}
+
+static int powerpc_shared_proc_flags(void)
+{
+ if (static_branch_unlikely(&splpar_asym_pack))
+ return SD_ASYM_PACKING;
+
+ return 0;
}
/*
* We can't just pass cpu_l2_cache_mask() directly because
* returns a non-const pointer and the compiler barfs on that.
*/
-static const struct cpumask *shared_cache_mask(int cpu)
+static const struct cpumask *tl_cache_mask(struct sched_domain_topology_level *tl, int cpu)
{
return per_cpu(cpu_l2_cache_map, cpu);
}
#ifdef CONFIG_SCHED_SMT
-static const struct cpumask *smallcore_smt_mask(int cpu)
+static const struct cpumask *tl_smallcore_smt_mask(struct sched_domain_topology_level *tl, int cpu)
{
return cpu_smallcore_mask(cpu);
}
#endif
-static struct cpumask *cpu_coregroup_mask(int cpu)
+struct cpumask *cpu_coregroup_mask(int cpu)
{
return per_cpu(cpu_coregroup_map, cpu);
}
static bool has_coregroup_support(void)
{
- return coregroup_enabled;
-}
+ /* Coregroup identification not available on shared systems */
+ if (is_shared_processor())
+ return 0;
-static const struct cpumask *cpu_mc_mask(int cpu)
-{
- return cpu_coregroup_mask(cpu);
+ return coregroup_enabled;
}
-static struct sched_domain_topology_level powerpc_topology[] = {
-#ifdef CONFIG_SCHED_SMT
- { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
-#endif
- { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) },
- { cpu_mc_mask, SD_INIT_NAME(MC) },
- { cpu_cpu_mask, SD_INIT_NAME(DIE) },
- { NULL, },
-};
-
static int __init init_big_cores(void)
{
int cpu;
@@ -1083,9 +1085,32 @@ static int __init init_big_cores(void)
return 0;
}
+/*
+ * die_mask and die_id are only available on systems which support
+ * multiple coregroups within a same package. On all other systems, die_mask
+ * would be same as package mask and die_id would be set to -1.
+ */
+const struct cpumask *cpu_die_mask(int cpu)
+{
+ if (has_coregroup_support())
+ return per_cpu(cpu_coregroup_map, cpu);
+ else
+ return cpu_node_mask(cpu);
+}
+EXPORT_SYMBOL_GPL(cpu_die_mask);
+
+int cpu_die_id(int cpu)
+{
+ if (has_coregroup_support())
+ return cpu_to_coregroup_id(cpu);
+ else
+ return -1;
+}
+EXPORT_SYMBOL_GPL(cpu_die_id);
+
void __init smp_prepare_cpus(unsigned int max_cpus)
{
- unsigned int cpu;
+ unsigned int cpu, num_threads;
DBG("smp_prepare_cpus\n");
@@ -1152,9 +1177,15 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
if (smp_ops && smp_ops->probe)
smp_ops->probe();
+
+ // Initalise the generic SMT topology support
+ num_threads = 1;
+ if (smt_enabled_at_boot)
+ num_threads = smt_enabled_at_boot;
+ cpu_smt_set_num_threads(num_threads, threads_per_core);
}
-void smp_prepare_boot_cpu(void)
+void __init smp_prepare_boot_cpu(void)
{
BUG_ON(smp_processor_id() != boot_cpuid);
#ifdef CONFIG_PPC64
@@ -1174,8 +1205,8 @@ int generic_cpu_disable(void)
return -EBUSY;
set_cpu_online(cpu, false);
-#ifdef CONFIG_PPC64
- vdso_data->processorCount--;
+#ifdef CONFIG_PPC64_PROC_SYSTEMCFG
+ systemcfg->processorCount--;
#endif
/* Update affinity of all IRQs previously aimed at this CPU */
irq_migrate_all_off_this_cpu();
@@ -1249,7 +1280,7 @@ static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle)
#ifdef CONFIG_PPC64
paca_ptrs[cpu]->__current = idle;
paca_ptrs[cpu]->kstack = (unsigned long)task_stack_page(idle) +
- THREAD_SIZE - STACK_FRAME_OVERHEAD;
+ THREAD_SIZE - STACK_FRAME_MIN_SIZE;
#endif
task_thread_info(idle)->cpu = cpu;
secondary_current = current_set[cpu] = idle;
@@ -1435,7 +1466,7 @@ static bool update_mask_by_l2(int cpu, cpumask_var_t *mask)
return false;
}
- cpumask_and(*mask, cpu_online_mask, cpu_cpu_mask(cpu));
+ cpumask_and(*mask, cpu_online_mask, cpu_node_mask(cpu));
/* Update l2-cache mask with all the CPUs that are part of submask */
or_cpumasks_related(cpu, cpu, submask_fn, cpu_l2_cache_mask);
@@ -1525,7 +1556,7 @@ static void update_coregroup_mask(int cpu, cpumask_var_t *mask)
return;
}
- cpumask_and(*mask, cpu_online_mask, cpu_cpu_mask(cpu));
+ cpumask_and(*mask, cpu_online_mask, cpu_node_mask(cpu));
/* Update coregroup mask with all the CPUs that are part of submask */
or_cpumasks_related(cpu, cpu, submask_fn, cpu_coregroup_mask);
@@ -1555,7 +1586,7 @@ static void add_cpu_to_masks(int cpu)
/*
* This CPU will not be in the online mask yet so we need to manually
- * add it to it's own thread sibling mask.
+ * add it to its own thread sibling mask.
*/
map_cpu_to_node(cpu, cpu_to_node(cpu));
cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
@@ -1586,9 +1617,9 @@ static void add_cpu_to_masks(int cpu)
/* Skip all CPUs already part of current CPU core mask */
cpumask_andnot(mask, cpu_online_mask, cpu_core_mask(cpu));
- /* If chip_id is -1; limit the cpu_core_mask to within DIE*/
+ /* If chip_id is -1; limit the cpu_core_mask to within PKG */
if (chip_id == -1)
- cpumask_and(mask, mask, cpu_cpu_mask(cpu));
+ cpumask_and(mask, mask, cpu_node_mask(cpu));
for_each_cpu(i, mask) {
if (chip_id == cpu_to_chip_id(i)) {
@@ -1603,6 +1634,7 @@ static void add_cpu_to_masks(int cpu)
}
/* Activate a secondary processor. */
+__no_stack_protector
void start_secondary(void *unused)
{
unsigned int cpu = raw_smp_processor_id();
@@ -1611,12 +1643,15 @@ void start_secondary(void *unused)
if (IS_ENABLED(CONFIG_PPC32))
setup_kup();
- mmgrab(&init_mm);
+ mmgrab_lazy_tlb(&init_mm);
current->active_mm = &init_mm;
+ VM_WARN_ON(cpumask_test_cpu(smp_processor_id(), mm_cpumask(&init_mm)));
+ cpumask_set_cpu(cpu, mm_cpumask(&init_mm));
+ inc_mm_active_cpus(&init_mm);
smp_store_cpu_info(cpu);
set_dec(tb_ticks_per_jiffy);
- rcu_cpu_starting(cpu);
+ rcutree_report_cpu_starting(cpu);
cpu_callin_map[cpu] = 1;
if (smp_ops->setup_cpu)
@@ -1626,10 +1661,12 @@ void start_secondary(void *unused)
secondary_cpu_time_init();
-#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC64_PROC_SYSTEMCFG
if (system_state == SYSTEM_RUNNING)
- vdso_data->processorCount++;
+ systemcfg->processorCount++;
+#endif
+#ifdef CONFIG_PPC64
vdso_getcpu_init();
#endif
set_numa_node(numa_cpu_lookup_table[cpu]);
@@ -1669,43 +1706,40 @@ void start_secondary(void *unused)
BUG();
}
-static void __init fixup_topology(void)
+static struct sched_domain_topology_level powerpc_topology[6];
+
+static void __init build_sched_topology(void)
{
- int i;
+ int i = 0;
+
+ if (is_shared_processor() && has_big_cores)
+ static_branch_enable(&splpar_asym_pack);
#ifdef CONFIG_SCHED_SMT
if (has_big_cores) {
pr_info("Big cores detected but using small core scheduling\n");
- powerpc_topology[smt_idx].mask = smallcore_smt_mask;
+ powerpc_topology[i++] =
+ SDTL_INIT(tl_smallcore_smt_mask, powerpc_smt_flags, SMT);
+ } else {
+ powerpc_topology[i++] = SDTL_INIT(tl_smt_mask, powerpc_smt_flags, SMT);
}
#endif
+ if (shared_caches) {
+ powerpc_topology[i++] =
+ SDTL_INIT(tl_cache_mask, powerpc_shared_cache_flags, CACHE);
+ }
- if (!has_coregroup_support())
- powerpc_topology[mc_idx].mask = powerpc_topology[cache_idx].mask;
-
- /*
- * Try to consolidate topology levels here instead of
- * allowing scheduler to degenerate.
- * - Dont consolidate if masks are different.
- * - Dont consolidate if sd_flags exists and are different.
- */
- for (i = 1; i <= die_idx; i++) {
- if (powerpc_topology[i].mask != powerpc_topology[i - 1].mask)
- continue;
+ if (has_coregroup_support()) {
+ powerpc_topology[i++] =
+ SDTL_INIT(tl_mc_mask, powerpc_shared_proc_flags, MC);
+ }
- if (powerpc_topology[i].sd_flags && powerpc_topology[i - 1].sd_flags &&
- powerpc_topology[i].sd_flags != powerpc_topology[i - 1].sd_flags)
- continue;
+ powerpc_topology[i++] = SDTL_INIT(tl_pkg_mask, powerpc_shared_proc_flags, PKG);
- if (!powerpc_topology[i - 1].sd_flags)
- powerpc_topology[i - 1].sd_flags = powerpc_topology[i].sd_flags;
+ /* There must be one trailing NULL entry left. */
+ BUG_ON(i >= ARRAY_SIZE(powerpc_topology) - 1);
- powerpc_topology[i].mask = powerpc_topology[i + 1].mask;
- powerpc_topology[i].sd_flags = powerpc_topology[i + 1].sd_flags;
-#ifdef CONFIG_SCHED_DEBUG
- powerpc_topology[i].name = powerpc_topology[i + 1].name;
-#endif
- }
+ set_sched_topology(powerpc_topology);
}
void __init smp_cpus_done(unsigned int max_cpus)
@@ -1720,9 +1754,20 @@ void __init smp_cpus_done(unsigned int max_cpus)
smp_ops->bringup_done();
dump_numa_cpu_topology();
+ build_sched_topology();
+}
- fixup_topology();
- set_sched_topology(powerpc_topology);
+/*
+ * For asym packing, by default lower numbered CPU has higher priority.
+ * On shared processors, pack to lower numbered core. However avoid moving
+ * between thread_groups within the same core.
+ */
+int arch_asym_cpu_priority(int cpu)
+{
+ if (static_branch_unlikely(&splpar_asym_pack))
+ return -cpu / threads_per_core;
+
+ return -cpu;
}
#ifdef CONFIG_HOTPLUG_CPU
@@ -1748,11 +1793,19 @@ int __cpu_disable(void)
void __cpu_die(unsigned int cpu)
{
+ /*
+ * This could perhaps be a generic call in idlea_task_dead(), but
+ * that requires testing from all archs, so first put it here to
+ */
+ VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(&init_mm)));
+ dec_mm_active_cpus(&init_mm);
+ cpumask_clear_cpu(cpu, mm_cpumask(&init_mm));
+
if (smp_ops->cpu_die)
smp_ops->cpu_die(cpu);
}
-void arch_cpu_idle_dead(void)
+void __noreturn arch_cpu_idle_dead(void)
{
/*
* Disable on the down path. This will be re-enabled by