summaryrefslogtreecommitdiff
path: root/kernel/cpu.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cpu.c')
-rw-r--r--kernel/cpu.c402
1 files changed, 383 insertions, 19 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c
index f4a2c5845bcb..88a7ede322bd 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -17,6 +17,7 @@
#include <linux/cpu.h>
#include <linux/oom.h>
#include <linux/rcupdate.h>
+#include <linux/delay.h>
#include <linux/export.h>
#include <linux/bug.h>
#include <linux/kthread.h>
@@ -59,6 +60,7 @@
* @last: For multi-instance rollback, remember how far we got
* @cb_state: The state for a single callback (install/uninstall)
* @result: Result of the operation
+ * @ap_sync_state: State for AP synchronization
* @done_up: Signal completion to the issuer of the task for cpu-up
* @done_down: Signal completion to the issuer of the task for cpu-down
*/
@@ -76,6 +78,7 @@ struct cpuhp_cpu_state {
struct hlist_node *last;
enum cpuhp_state cb_state;
int result;
+ atomic_t ap_sync_state;
struct completion done_up;
struct completion done_down;
#endif
@@ -276,6 +279,182 @@ static bool cpuhp_is_atomic_state(enum cpuhp_state state)
return CPUHP_AP_IDLE_DEAD <= state && state < CPUHP_AP_ONLINE;
}
+/* Synchronization state management */
+enum cpuhp_sync_state {
+ SYNC_STATE_DEAD,
+ SYNC_STATE_KICKED,
+ SYNC_STATE_SHOULD_DIE,
+ SYNC_STATE_ALIVE,
+ SYNC_STATE_SHOULD_ONLINE,
+ SYNC_STATE_ONLINE,
+};
+
+#ifdef CONFIG_HOTPLUG_CORE_SYNC
+/**
+ * cpuhp_ap_update_sync_state - Update synchronization state during bringup/teardown
+ * @state: The synchronization state to set
+ *
+ * No synchronization point. Just update of the synchronization state, but implies
+ * a full barrier so that the AP changes are visible before the control CPU proceeds.
+ */
+static inline void cpuhp_ap_update_sync_state(enum cpuhp_sync_state state)
+{
+ atomic_t *st = this_cpu_ptr(&cpuhp_state.ap_sync_state);
+
+ (void)atomic_xchg(st, state);
+}
+
+void __weak arch_cpuhp_sync_state_poll(void) { cpu_relax(); }
+
+static bool cpuhp_wait_for_sync_state(unsigned int cpu, enum cpuhp_sync_state state,
+ enum cpuhp_sync_state next_state)
+{
+ atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu);
+ ktime_t now, end, start = ktime_get();
+ int sync;
+
+ end = start + 10ULL * NSEC_PER_SEC;
+
+ sync = atomic_read(st);
+ while (1) {
+ if (sync == state) {
+ if (!atomic_try_cmpxchg(st, &sync, next_state))
+ continue;
+ return true;
+ }
+
+ now = ktime_get();
+ if (now > end) {
+ /* Timeout. Leave the state unchanged */
+ return false;
+ } else if (now - start < NSEC_PER_MSEC) {
+ /* Poll for one millisecond */
+ arch_cpuhp_sync_state_poll();
+ } else {
+ usleep_range_state(USEC_PER_MSEC, 2 * USEC_PER_MSEC, TASK_UNINTERRUPTIBLE);
+ }
+ sync = atomic_read(st);
+ }
+ return true;
+}
+#else /* CONFIG_HOTPLUG_CORE_SYNC */
+static inline void cpuhp_ap_update_sync_state(enum cpuhp_sync_state state) { }
+#endif /* !CONFIG_HOTPLUG_CORE_SYNC */
+
+#ifdef CONFIG_HOTPLUG_CORE_SYNC_DEAD
+/**
+ * cpuhp_ap_report_dead - Update synchronization state to DEAD
+ *
+ * No synchronization point. Just update of the synchronization state.
+ */
+void cpuhp_ap_report_dead(void)
+{
+ cpuhp_ap_update_sync_state(SYNC_STATE_DEAD);
+}
+
+void __weak arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) { }
+
+/*
+ * Late CPU shutdown synchronization point. Cannot use cpuhp_state::done_down
+ * because the AP cannot issue complete() at this stage.
+ */
+static void cpuhp_bp_sync_dead(unsigned int cpu)
+{
+ atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu);
+ int sync = atomic_read(st);
+
+ do {
+ /* CPU can have reported dead already. Don't overwrite that! */
+ if (sync == SYNC_STATE_DEAD)
+ break;
+ } while (!atomic_try_cmpxchg(st, &sync, SYNC_STATE_SHOULD_DIE));
+
+ if (cpuhp_wait_for_sync_state(cpu, SYNC_STATE_DEAD, SYNC_STATE_DEAD)) {
+ /* CPU reached dead state. Invoke the cleanup function */
+ arch_cpuhp_cleanup_dead_cpu(cpu);
+ return;
+ }
+
+ /* No further action possible. Emit message and give up. */
+ pr_err("CPU%u failed to report dead state\n", cpu);
+}
+#else /* CONFIG_HOTPLUG_CORE_SYNC_DEAD */
+static inline void cpuhp_bp_sync_dead(unsigned int cpu) { }
+#endif /* !CONFIG_HOTPLUG_CORE_SYNC_DEAD */
+
+#ifdef CONFIG_HOTPLUG_CORE_SYNC_FULL
+/**
+ * cpuhp_ap_sync_alive - Synchronize AP with the control CPU once it is alive
+ *
+ * Updates the AP synchronization state to SYNC_STATE_ALIVE and waits
+ * for the BP to release it.
+ */
+void cpuhp_ap_sync_alive(void)
+{
+ atomic_t *st = this_cpu_ptr(&cpuhp_state.ap_sync_state);
+
+ cpuhp_ap_update_sync_state(SYNC_STATE_ALIVE);
+
+ /* Wait for the control CPU to release it. */
+ while (atomic_read(st) != SYNC_STATE_SHOULD_ONLINE)
+ cpu_relax();
+}
+
+static bool cpuhp_can_boot_ap(unsigned int cpu)
+{
+ atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu);
+ int sync = atomic_read(st);
+
+again:
+ switch (sync) {
+ case SYNC_STATE_DEAD:
+ /* CPU is properly dead */
+ break;
+ case SYNC_STATE_KICKED:
+ /* CPU did not come up in previous attempt */
+ break;
+ case SYNC_STATE_ALIVE:
+ /* CPU is stuck cpuhp_ap_sync_alive(). */
+ break;
+ default:
+ /* CPU failed to report online or dead and is in limbo state. */
+ return false;
+ }
+
+ /* Prepare for booting */
+ if (!atomic_try_cmpxchg(st, &sync, SYNC_STATE_KICKED))
+ goto again;
+
+ return true;
+}
+
+void __weak arch_cpuhp_cleanup_kick_cpu(unsigned int cpu) { }
+
+/*
+ * Early CPU bringup synchronization point. Cannot use cpuhp_state::done_up
+ * because the AP cannot issue complete() so early in the bringup.
+ */
+static int cpuhp_bp_sync_alive(unsigned int cpu)
+{
+ int ret = 0;
+
+ if (!IS_ENABLED(CONFIG_HOTPLUG_CORE_SYNC_FULL))
+ return 0;
+
+ if (!cpuhp_wait_for_sync_state(cpu, SYNC_STATE_ALIVE, SYNC_STATE_SHOULD_ONLINE)) {
+ pr_err("CPU%u failed to report alive state\n", cpu);
+ ret = -EIO;
+ }
+
+ /* Let the architecture cleanup the kick alive mechanics. */
+ arch_cpuhp_cleanup_kick_cpu(cpu);
+ return ret;
+}
+#else /* CONFIG_HOTPLUG_CORE_SYNC_FULL */
+static inline int cpuhp_bp_sync_alive(unsigned int cpu) { return 0; }
+static inline bool cpuhp_can_boot_ap(unsigned int cpu) { return true; }
+#endif /* !CONFIG_HOTPLUG_CORE_SYNC_FULL */
+
/* Serializes the updates to cpu_online_mask, cpu_present_mask */
static DEFINE_MUTEX(cpu_add_remove_lock);
bool cpuhp_tasks_frozen;
@@ -470,8 +649,23 @@ bool cpu_smt_possible(void)
cpu_smt_control != CPU_SMT_NOT_SUPPORTED;
}
EXPORT_SYMBOL_GPL(cpu_smt_possible);
+
+static inline bool cpuhp_smt_aware(void)
+{
+ return topology_smt_supported();
+}
+
+static inline const struct cpumask *cpuhp_get_primary_thread_mask(void)
+{
+ return cpu_primary_thread_mask;
+}
#else
static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
+static inline bool cpuhp_smt_aware(void) { return false; }
+static inline const struct cpumask *cpuhp_get_primary_thread_mask(void)
+{
+ return cpu_present_mask;
+}
#endif
static inline enum cpuhp_state
@@ -558,7 +752,7 @@ static int cpuhp_kick_ap(int cpu, struct cpuhp_cpu_state *st,
return ret;
}
-static int bringup_wait_for_ap(unsigned int cpu)
+static int bringup_wait_for_ap_online(unsigned int cpu)
{
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
@@ -579,38 +773,94 @@ static int bringup_wait_for_ap(unsigned int cpu)
*/
if (!cpu_smt_allowed(cpu))
return -ECANCELED;
+ return 0;
+}
+
+#ifdef CONFIG_HOTPLUG_SPLIT_STARTUP
+static int cpuhp_kick_ap_alive(unsigned int cpu)
+{
+ if (!cpuhp_can_boot_ap(cpu))
+ return -EAGAIN;
+
+ return arch_cpuhp_kick_ap_alive(cpu, idle_thread_get(cpu));
+}
+
+static int cpuhp_bringup_ap(unsigned int cpu)
+{
+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+ int ret;
+
+ /*
+ * Some architectures have to walk the irq descriptors to
+ * setup the vector space for the cpu which comes online.
+ * Prevent irq alloc/free across the bringup.
+ */
+ irq_lock_sparse();
+
+ ret = cpuhp_bp_sync_alive(cpu);
+ if (ret)
+ goto out_unlock;
+
+ ret = bringup_wait_for_ap_online(cpu);
+ if (ret)
+ goto out_unlock;
+
+ irq_unlock_sparse();
if (st->target <= CPUHP_AP_ONLINE_IDLE)
return 0;
return cpuhp_kick_ap(cpu, st, st->target);
-}
+out_unlock:
+ irq_unlock_sparse();
+ return ret;
+}
+#else
static int bringup_cpu(unsigned int cpu)
{
+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
struct task_struct *idle = idle_thread_get(cpu);
int ret;
- /*
- * Reset stale stack state from the last time this CPU was online.
- */
- scs_task_reset(idle);
- kasan_unpoison_task_stack(idle);
+ if (!cpuhp_can_boot_ap(cpu))
+ return -EAGAIN;
/*
* Some architectures have to walk the irq descriptors to
* setup the vector space for the cpu which comes online.
- * Prevent irq alloc/free across the bringup.
+ *
+ * Prevent irq alloc/free across the bringup by acquiring the
+ * sparse irq lock. Hold it until the upcoming CPU completes the
+ * startup in cpuhp_online_idle() which allows to avoid
+ * intermediate synchronization points in the architecture code.
*/
irq_lock_sparse();
- /* Arch-specific enabling code. */
ret = __cpu_up(cpu, idle);
- irq_unlock_sparse();
if (ret)
- return ret;
- return bringup_wait_for_ap(cpu);
+ goto out_unlock;
+
+ ret = cpuhp_bp_sync_alive(cpu);
+ if (ret)
+ goto out_unlock;
+
+ ret = bringup_wait_for_ap_online(cpu);
+ if (ret)
+ goto out_unlock;
+
+ irq_unlock_sparse();
+
+ if (st->target <= CPUHP_AP_ONLINE_IDLE)
+ return 0;
+
+ return cpuhp_kick_ap(cpu, st, st->target);
+
+out_unlock:
+ irq_unlock_sparse();
+ return ret;
}
+#endif
static int finish_cpu(unsigned int cpu)
{
@@ -1099,6 +1349,8 @@ static int takedown_cpu(unsigned int cpu)
/* This actually kills the CPU. */
__cpu_die(cpu);
+ cpuhp_bp_sync_dead(cpu);
+
tick_cleanup_dead_cpu(cpu);
rcutree_migrate_callbacks(cpu);
return 0;
@@ -1345,8 +1597,10 @@ void cpuhp_online_idle(enum cpuhp_state state)
if (state != CPUHP_AP_ONLINE_IDLE)
return;
+ cpuhp_ap_update_sync_state(SYNC_STATE_ONLINE);
+
/*
- * Unpart the stopper thread before we start the idle loop (and start
+ * Unpark the stopper thread before we start the idle loop (and start
* scheduling); this ensures the stopper task is always available.
*/
stop_machine_unpark(smp_processor_id());
@@ -1383,6 +1637,12 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
ret = PTR_ERR(idle);
goto out;
}
+
+ /*
+ * Reset stale stack state from the last time this CPU was online.
+ */
+ scs_task_reset(idle);
+ kasan_unpoison_task_stack(idle);
}
cpuhp_tasks_frozen = tasks_frozen;
@@ -1502,18 +1762,96 @@ int bringup_hibernate_cpu(unsigned int sleep_cpu)
return 0;
}
-void bringup_nonboot_cpus(unsigned int setup_max_cpus)
+static void __init cpuhp_bringup_mask(const struct cpumask *mask, unsigned int ncpus,
+ enum cpuhp_state target)
{
unsigned int cpu;
- for_each_present_cpu(cpu) {
- if (num_online_cpus() >= setup_max_cpus)
+ for_each_cpu(cpu, mask) {
+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+
+ if (cpu_up(cpu, target) && can_rollback_cpu(st)) {
+ /*
+ * If this failed then cpu_up() might have only
+ * rolled back to CPUHP_BP_KICK_AP for the final
+ * online. Clean it up. NOOP if already rolled back.
+ */
+ WARN_ON(cpuhp_invoke_callback_range(false, cpu, st, CPUHP_OFFLINE));
+ }
+
+ if (!--ncpus)
break;
- if (!cpu_online(cpu))
- cpu_up(cpu, CPUHP_ONLINE);
}
}
+#ifdef CONFIG_HOTPLUG_PARALLEL
+static bool __cpuhp_parallel_bringup __ro_after_init = true;
+
+static int __init parallel_bringup_parse_param(char *arg)
+{
+ return kstrtobool(arg, &__cpuhp_parallel_bringup);
+}
+early_param("cpuhp.parallel", parallel_bringup_parse_param);
+
+/*
+ * On architectures which have enabled parallel bringup this invokes all BP
+ * prepare states for each of the to be onlined APs first. The last state
+ * sends the startup IPI to the APs. The APs proceed through the low level
+ * bringup code in parallel and then wait for the control CPU to release
+ * them one by one for the final onlining procedure.
+ *
+ * This avoids waiting for each AP to respond to the startup IPI in
+ * CPUHP_BRINGUP_CPU.
+ */
+static bool __init cpuhp_bringup_cpus_parallel(unsigned int ncpus)
+{
+ const struct cpumask *mask = cpu_present_mask;
+
+ if (__cpuhp_parallel_bringup)
+ __cpuhp_parallel_bringup = arch_cpuhp_init_parallel_bringup();
+ if (!__cpuhp_parallel_bringup)
+ return false;
+
+ if (cpuhp_smt_aware()) {
+ const struct cpumask *pmask = cpuhp_get_primary_thread_mask();
+ static struct cpumask tmp_mask __initdata;
+
+ /*
+ * X86 requires to prevent that SMT siblings stopped while
+ * the primary thread does a microcode update for various
+ * reasons. Bring the primary threads up first.
+ */
+ cpumask_and(&tmp_mask, mask, pmask);
+ cpuhp_bringup_mask(&tmp_mask, ncpus, CPUHP_BP_KICK_AP);
+ cpuhp_bringup_mask(&tmp_mask, ncpus, CPUHP_ONLINE);
+ /* Account for the online CPUs */
+ ncpus -= num_online_cpus();
+ if (!ncpus)
+ return true;
+ /* Create the mask for secondary CPUs */
+ cpumask_andnot(&tmp_mask, mask, pmask);
+ mask = &tmp_mask;
+ }
+
+ /* Bring the not-yet started CPUs up */
+ cpuhp_bringup_mask(mask, ncpus, CPUHP_BP_KICK_AP);
+ cpuhp_bringup_mask(mask, ncpus, CPUHP_ONLINE);
+ return true;
+}
+#else
+static inline bool cpuhp_bringup_cpus_parallel(unsigned int ncpus) { return false; }
+#endif /* CONFIG_HOTPLUG_PARALLEL */
+
+void __init bringup_nonboot_cpus(unsigned int setup_max_cpus)
+{
+ /* Try parallel bringup optimization if enabled */
+ if (cpuhp_bringup_cpus_parallel(setup_max_cpus))
+ return;
+
+ /* Full per CPU serialized bringup */
+ cpuhp_bringup_mask(cpu_present_mask, setup_max_cpus, CPUHP_ONLINE);
+}
+
#ifdef CONFIG_PM_SLEEP_SMP
static cpumask_var_t frozen_cpus;
@@ -1740,13 +2078,38 @@ static struct cpuhp_step cpuhp_hp_states[] = {
.startup.single = timers_prepare_cpu,
.teardown.single = timers_dead_cpu,
},
- /* Kicks the plugged cpu into life */
+
+#ifdef CONFIG_HOTPLUG_SPLIT_STARTUP
+ /*
+ * Kicks the AP alive. AP will wait in cpuhp_ap_sync_alive() until
+ * the next step will release it.
+ */
+ [CPUHP_BP_KICK_AP] = {
+ .name = "cpu:kick_ap",
+ .startup.single = cpuhp_kick_ap_alive,
+ },
+
+ /*
+ * Waits for the AP to reach cpuhp_ap_sync_alive() and then
+ * releases it for the complete bringup.
+ */
+ [CPUHP_BRINGUP_CPU] = {
+ .name = "cpu:bringup",
+ .startup.single = cpuhp_bringup_ap,
+ .teardown.single = finish_cpu,
+ .cant_stop = true,
+ },
+#else
+ /*
+ * All-in-one CPU bringup state which includes the kick alive.
+ */
[CPUHP_BRINGUP_CPU] = {
.name = "cpu:bringup",
.startup.single = bringup_cpu,
.teardown.single = finish_cpu,
.cant_stop = true,
},
+#endif
/* Final state before CPU kills itself */
[CPUHP_AP_IDLE_DEAD] = {
.name = "idle:dead",
@@ -2723,6 +3086,7 @@ void __init boot_cpu_hotplug_init(void)
{
#ifdef CONFIG_SMP
cpumask_set_cpu(smp_processor_id(), &cpus_booted_once_mask);
+ atomic_set(this_cpu_ptr(&cpuhp_state.ap_sync_state), SYNC_STATE_ONLINE);
#endif
this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);
this_cpu_write(cpuhp_state.target, CPUHP_ONLINE);