summaryrefslogtreecommitdiff
path: root/kernel/cpu.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cpu.c')
-rw-r--r--kernel/cpu.c1130
1 files changed, 904 insertions, 226 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 6ff2578ecf17..07455d25329c 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -17,6 +17,7 @@
#include <linux/cpu.h>
#include <linux/oom.h>
#include <linux/rcupdate.h>
+#include <linux/delay.h>
#include <linux/export.h>
#include <linux/bug.h>
#include <linux/kthread.h>
@@ -31,7 +32,11 @@
#include <linux/smpboot.h>
#include <linux/relay.h>
#include <linux/slab.h>
+#include <linux/scs.h>
#include <linux/percpu-rwsem.h>
+#include <linux/cpuset.h>
+#include <linux/random.h>
+#include <linux/cc_platform.h>
#include <trace/events/power.h>
#define CREATE_TRACE_POINTS
@@ -40,16 +45,21 @@
#include "smpboot.h"
/**
- * cpuhp_cpu_state - Per cpu hotplug state storage
+ * struct cpuhp_cpu_state - Per cpu hotplug state storage
* @state: The current cpu state
* @target: The target state
+ * @fail: Current CPU hotplug callback state
* @thread: Pointer to the hotplug thread
* @should_run: Thread should execute
* @rollback: Perform a rollback
* @single: Single callback invocation
* @bringup: Single callback bringup or teardown selector
+ * @node: Remote CPU node; for multi-instance, do a
+ * single entry callback for install/remove
+ * @last: For multi-instance rollback, remember how far we got
* @cb_state: The state for a single callback (install/uninstall)
* @result: Result of the operation
+ * @ap_sync_state: State for AP synchronization
* @done_up: Signal completion to the issuer of the task for cpu-up
* @done_down: Signal completion to the issuer of the task for cpu-down
*/
@@ -67,6 +77,7 @@ struct cpuhp_cpu_state {
struct hlist_node *last;
enum cpuhp_state cb_state;
int result;
+ atomic_t ap_sync_state;
struct completion done_up;
struct completion done_down;
#endif
@@ -104,11 +115,12 @@ static inline void cpuhp_lock_release(bool bringup) { }
#endif
/**
- * cpuhp_step - Hotplug state machine step
+ * struct cpuhp_step - Hotplug state machine step
* @name: Name of the step
* @startup: Startup function of the step
* @teardown: Teardown function of the step
* @cant_stop: Bringup/teardown can't be stopped at this step
+ * @multi_instance: State has multiple instances which get added afterwards
*/
struct cpuhp_step {
const char *name;
@@ -122,7 +134,9 @@ struct cpuhp_step {
int (*multi)(unsigned int cpu,
struct hlist_node *node);
} teardown;
+ /* private: */
struct hlist_head list;
+ /* public: */
bool cant_stop;
bool multi_instance;
};
@@ -135,8 +149,13 @@ static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
return cpuhp_hp_states + state;
}
+static bool cpuhp_step_empty(bool bringup, struct cpuhp_step *step)
+{
+ return bringup ? !step->startup.single : !step->teardown.single;
+}
+
/**
- * cpuhp_invoke_callback _ Invoke the callbacks for a given state
+ * cpuhp_invoke_callback - Invoke the callbacks for a given state
* @cpu: The cpu for which the callback should be invoked
* @state: The state to do callbacks for
* @bringup: True if the bringup callback should be invoked
@@ -144,6 +163,8 @@ static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
* @lastp: For multi-instance rollback, remember how far we got
*
* Called from cpu hotplug and from the state register machinery.
+ *
+ * Return: %0 on success or a negative errno code
*/
static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
bool bringup, struct hlist_node *node,
@@ -157,26 +178,24 @@ static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
if (st->fail == state) {
st->fail = CPUHP_INVALID;
-
- if (!(bringup ? step->startup.single : step->teardown.single))
- return 0;
-
return -EAGAIN;
}
+ if (cpuhp_step_empty(bringup, step)) {
+ WARN_ON_ONCE(1);
+ return 0;
+ }
+
if (!step->multi_instance) {
WARN_ON_ONCE(lastp && *lastp);
cb = bringup ? step->startup.single : step->teardown.single;
- if (!cb)
- return 0;
+
trace_cpuhp_enter(cpu, st->target, state, cb);
ret = cb(cpu);
trace_cpuhp_exit(cpu, st->state, state, ret);
return ret;
}
cbm = bringup ? step->startup.multi : step->teardown.multi;
- if (!cbm)
- return 0;
/* Single invocation for instance add/remove */
if (node) {
@@ -259,6 +278,182 @@ static bool cpuhp_is_atomic_state(enum cpuhp_state state)
return CPUHP_AP_IDLE_DEAD <= state && state < CPUHP_AP_ONLINE;
}
+/* Synchronization state management */
+enum cpuhp_sync_state {
+ SYNC_STATE_DEAD,
+ SYNC_STATE_KICKED,
+ SYNC_STATE_SHOULD_DIE,
+ SYNC_STATE_ALIVE,
+ SYNC_STATE_SHOULD_ONLINE,
+ SYNC_STATE_ONLINE,
+};
+
+#ifdef CONFIG_HOTPLUG_CORE_SYNC
+/**
+ * cpuhp_ap_update_sync_state - Update synchronization state during bringup/teardown
+ * @state: The synchronization state to set
+ *
+ * No synchronization point. Just update of the synchronization state, but implies
+ * a full barrier so that the AP changes are visible before the control CPU proceeds.
+ */
+static inline void cpuhp_ap_update_sync_state(enum cpuhp_sync_state state)
+{
+ atomic_t *st = this_cpu_ptr(&cpuhp_state.ap_sync_state);
+
+ (void)atomic_xchg(st, state);
+}
+
+void __weak arch_cpuhp_sync_state_poll(void) { cpu_relax(); }
+
+static bool cpuhp_wait_for_sync_state(unsigned int cpu, enum cpuhp_sync_state state,
+ enum cpuhp_sync_state next_state)
+{
+ atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu);
+ ktime_t now, end, start = ktime_get();
+ int sync;
+
+ end = start + 10ULL * NSEC_PER_SEC;
+
+ sync = atomic_read(st);
+ while (1) {
+ if (sync == state) {
+ if (!atomic_try_cmpxchg(st, &sync, next_state))
+ continue;
+ return true;
+ }
+
+ now = ktime_get();
+ if (now > end) {
+ /* Timeout. Leave the state unchanged */
+ return false;
+ } else if (now - start < NSEC_PER_MSEC) {
+ /* Poll for one millisecond */
+ arch_cpuhp_sync_state_poll();
+ } else {
+ usleep_range(USEC_PER_MSEC, 2 * USEC_PER_MSEC);
+ }
+ sync = atomic_read(st);
+ }
+ return true;
+}
+#else /* CONFIG_HOTPLUG_CORE_SYNC */
+static inline void cpuhp_ap_update_sync_state(enum cpuhp_sync_state state) { }
+#endif /* !CONFIG_HOTPLUG_CORE_SYNC */
+
+#ifdef CONFIG_HOTPLUG_CORE_SYNC_DEAD
+/**
+ * cpuhp_ap_report_dead - Update synchronization state to DEAD
+ *
+ * No synchronization point. Just update of the synchronization state.
+ */
+void cpuhp_ap_report_dead(void)
+{
+ cpuhp_ap_update_sync_state(SYNC_STATE_DEAD);
+}
+
+void __weak arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) { }
+
+/*
+ * Late CPU shutdown synchronization point. Cannot use cpuhp_state::done_down
+ * because the AP cannot issue complete() at this stage.
+ */
+static void cpuhp_bp_sync_dead(unsigned int cpu)
+{
+ atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu);
+ int sync = atomic_read(st);
+
+ do {
+ /* CPU can have reported dead already. Don't overwrite that! */
+ if (sync == SYNC_STATE_DEAD)
+ break;
+ } while (!atomic_try_cmpxchg(st, &sync, SYNC_STATE_SHOULD_DIE));
+
+ if (cpuhp_wait_for_sync_state(cpu, SYNC_STATE_DEAD, SYNC_STATE_DEAD)) {
+ /* CPU reached dead state. Invoke the cleanup function */
+ arch_cpuhp_cleanup_dead_cpu(cpu);
+ return;
+ }
+
+ /* No further action possible. Emit message and give up. */
+ pr_err("CPU%u failed to report dead state\n", cpu);
+}
+#else /* CONFIG_HOTPLUG_CORE_SYNC_DEAD */
+static inline void cpuhp_bp_sync_dead(unsigned int cpu) { }
+#endif /* !CONFIG_HOTPLUG_CORE_SYNC_DEAD */
+
+#ifdef CONFIG_HOTPLUG_CORE_SYNC_FULL
+/**
+ * cpuhp_ap_sync_alive - Synchronize AP with the control CPU once it is alive
+ *
+ * Updates the AP synchronization state to SYNC_STATE_ALIVE and waits
+ * for the BP to release it.
+ */
+void cpuhp_ap_sync_alive(void)
+{
+ atomic_t *st = this_cpu_ptr(&cpuhp_state.ap_sync_state);
+
+ cpuhp_ap_update_sync_state(SYNC_STATE_ALIVE);
+
+ /* Wait for the control CPU to release it. */
+ while (atomic_read(st) != SYNC_STATE_SHOULD_ONLINE)
+ cpu_relax();
+}
+
+static bool cpuhp_can_boot_ap(unsigned int cpu)
+{
+ atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu);
+ int sync = atomic_read(st);
+
+again:
+ switch (sync) {
+ case SYNC_STATE_DEAD:
+ /* CPU is properly dead */
+ break;
+ case SYNC_STATE_KICKED:
+ /* CPU did not come up in previous attempt */
+ break;
+ case SYNC_STATE_ALIVE:
+ /* CPU is stuck cpuhp_ap_sync_alive(). */
+ break;
+ default:
+ /* CPU failed to report online or dead and is in limbo state. */
+ return false;
+ }
+
+ /* Prepare for booting */
+ if (!atomic_try_cmpxchg(st, &sync, SYNC_STATE_KICKED))
+ goto again;
+
+ return true;
+}
+
+void __weak arch_cpuhp_cleanup_kick_cpu(unsigned int cpu) { }
+
+/*
+ * Early CPU bringup synchronization point. Cannot use cpuhp_state::done_up
+ * because the AP cannot issue complete() so early in the bringup.
+ */
+static int cpuhp_bp_sync_alive(unsigned int cpu)
+{
+ int ret = 0;
+
+ if (!IS_ENABLED(CONFIG_HOTPLUG_CORE_SYNC_FULL))
+ return 0;
+
+ if (!cpuhp_wait_for_sync_state(cpu, SYNC_STATE_ALIVE, SYNC_STATE_SHOULD_ONLINE)) {
+ pr_err("CPU%u failed to report alive state\n", cpu);
+ ret = -EIO;
+ }
+
+ /* Let the architecture cleanup the kick alive mechanics. */
+ arch_cpuhp_cleanup_kick_cpu(cpu);
+ return ret;
+}
+#else /* CONFIG_HOTPLUG_CORE_SYNC_FULL */
+static inline int cpuhp_bp_sync_alive(unsigned int cpu) { return 0; }
+static inline bool cpuhp_can_boot_ap(unsigned int cpu) { return true; }
+#endif /* !CONFIG_HOTPLUG_CORE_SYNC_FULL */
+
/* Serializes the updates to cpu_online_mask, cpu_present_mask */
static DEFINE_MUTEX(cpu_add_remove_lock);
bool cpuhp_tasks_frozen;
@@ -288,6 +483,8 @@ static int cpu_hotplug_disabled;
DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock);
+static bool cpu_hotplug_offline_disabled __ro_after_init;
+
void cpus_read_lock(void)
{
percpu_down_read(&cpu_hotplug_lock);
@@ -330,6 +527,13 @@ void lockdep_assert_cpus_held(void)
percpu_rwsem_assert_held(&cpu_hotplug_lock);
}
+#ifdef CONFIG_LOCKDEP
+int lockdep_is_cpus_held(void)
+{
+ return percpu_rwsem_is_held(&cpu_hotplug_lock);
+}
+#endif
+
static void lockdep_acquire_cpus_lock(void)
{
rwsem_acquire(&cpu_hotplug_lock.dep_map, 0, 0, _THIS_IP_);
@@ -340,6 +544,14 @@ static void lockdep_release_cpus_lock(void)
rwsem_release(&cpu_hotplug_lock.dep_map, _THIS_IP_);
}
+/* Declare CPU offlining not supported */
+void cpu_hotplug_disable_offlining(void)
+{
+ cpu_maps_update_begin();
+ cpu_hotplug_offline_disabled = true;
+ cpu_maps_update_done();
+}
+
/*
* Wait for currently running CPU hotplug operations to complete (if any) and
* disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
@@ -389,7 +601,10 @@ static void lockdep_release_cpus_lock(void)
void __weak arch_smt_update(void) { }
#ifdef CONFIG_HOTPLUG_SMT
+
enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
+static unsigned int cpu_smt_max_threads __ro_after_init;
+unsigned int cpu_smt_num_threads __read_mostly = UINT_MAX;
void __init cpu_smt_disable(bool force)
{
@@ -403,16 +618,33 @@ void __init cpu_smt_disable(bool force)
pr_info("SMT: disabled\n");
cpu_smt_control = CPU_SMT_DISABLED;
}
+ cpu_smt_num_threads = 1;
}
/*
* The decision whether SMT is supported can only be done after the full
* CPU identification. Called from architecture code.
*/
-void __init cpu_smt_check_topology(void)
+void __init cpu_smt_set_num_threads(unsigned int num_threads,
+ unsigned int max_threads)
{
- if (!topology_smt_supported())
+ WARN_ON(!num_threads || (num_threads > max_threads));
+
+ if (max_threads == 1)
cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
+
+ cpu_smt_max_threads = max_threads;
+
+ /*
+ * If SMT has been disabled via the kernel command line or SMT is
+ * not supported, set cpu_smt_num_threads to 1 for consistency.
+ * If enabled, take the architecture requested number of threads
+ * to bring up into account.
+ */
+ if (cpu_smt_control != CPU_SMT_ENABLED)
+ cpu_smt_num_threads = 1;
+ else if (num_threads < cpu_smt_num_threads)
+ cpu_smt_num_threads = num_threads;
}
static int __init smt_cmdline_disable(char *str)
@@ -422,9 +654,31 @@ static int __init smt_cmdline_disable(char *str)
}
early_param("nosmt", smt_cmdline_disable);
-static inline bool cpu_smt_allowed(unsigned int cpu)
+/*
+ * For Archicture supporting partial SMT states check if the thread is allowed.
+ * Otherwise this has already been checked through cpu_smt_max_threads when
+ * setting the SMT level.
+ */
+static inline bool cpu_smt_thread_allowed(unsigned int cpu)
{
- if (cpu_smt_control == CPU_SMT_ENABLED)
+#ifdef CONFIG_SMT_NUM_THREADS_DYNAMIC
+ return topology_smt_thread_allowed(cpu);
+#else
+ return true;
+#endif
+}
+
+static inline bool cpu_bootable(unsigned int cpu)
+{
+ if (cpu_smt_control == CPU_SMT_ENABLED && cpu_smt_thread_allowed(cpu))
+ return true;
+
+ /* All CPUs are bootable if controls are not configured */
+ if (cpu_smt_control == CPU_SMT_NOT_IMPLEMENTED)
+ return true;
+
+ /* All CPUs are bootable if CPU is not SMT capable */
+ if (cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
return true;
if (topology_is_primary_thread(cpu))
@@ -439,35 +693,51 @@ static inline bool cpu_smt_allowed(unsigned int cpu)
return !cpumask_test_cpu(cpu, &cpus_booted_once_mask);
}
-/* Returns true if SMT is not supported of forcefully (irreversibly) disabled */
+/* Returns true if SMT is supported and not forcefully (irreversibly) disabled */
bool cpu_smt_possible(void)
{
return cpu_smt_control != CPU_SMT_FORCE_DISABLED &&
cpu_smt_control != CPU_SMT_NOT_SUPPORTED;
}
EXPORT_SYMBOL_GPL(cpu_smt_possible);
+
#else
-static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
+static inline bool cpu_bootable(unsigned int cpu) { return true; }
#endif
static inline enum cpuhp_state
-cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target)
+cpuhp_set_state(int cpu, struct cpuhp_cpu_state *st, enum cpuhp_state target)
{
enum cpuhp_state prev_state = st->state;
+ bool bringup = st->state < target;
st->rollback = false;
st->last = NULL;
st->target = target;
st->single = false;
- st->bringup = st->state < target;
+ st->bringup = bringup;
+ if (cpu_dying(cpu) != !bringup)
+ set_cpu_dying(cpu, !bringup);
return prev_state;
}
static inline void
-cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state)
+cpuhp_reset_state(int cpu, struct cpuhp_cpu_state *st,
+ enum cpuhp_state prev_state)
{
+ bool bringup = !st->bringup;
+
+ st->target = prev_state;
+
+ /*
+ * Already rolling back. No need invert the bringup value or to change
+ * the current state.
+ */
+ if (st->rollback)
+ return;
+
st->rollback = true;
/*
@@ -481,8 +751,9 @@ cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state)
st->state++;
}
- st->target = prev_state;
- st->bringup = !st->bringup;
+ st->bringup = bringup;
+ if (cpu_dying(cpu) != !bringup)
+ set_cpu_dying(cpu, !bringup);
}
/* Regular hotplug invocation of the AP hotplug thread */
@@ -502,22 +773,23 @@ static void __cpuhp_kick_ap(struct cpuhp_cpu_state *st)
wait_for_ap_thread(st, st->bringup);
}
-static int cpuhp_kick_ap(struct cpuhp_cpu_state *st, enum cpuhp_state target)
+static int cpuhp_kick_ap(int cpu, struct cpuhp_cpu_state *st,
+ enum cpuhp_state target)
{
enum cpuhp_state prev_state;
int ret;
- prev_state = cpuhp_set_state(st, target);
+ prev_state = cpuhp_set_state(cpu, st, target);
__cpuhp_kick_ap(st);
if ((ret = st->result)) {
- cpuhp_reset_state(st, prev_state);
+ cpuhp_reset_state(cpu, st, prev_state);
__cpuhp_kick_ap(st);
}
return ret;
}
-static int bringup_wait_for_ap(unsigned int cpu)
+static int bringup_wait_for_ap_online(unsigned int cpu)
{
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
@@ -533,37 +805,99 @@ static int bringup_wait_for_ap(unsigned int cpu)
* SMT soft disabling on X86 requires to bring the CPU out of the
* BIOS 'wait for SIPI' state in order to set the CR4.MCE bit. The
* CPU marked itself as booted_once in notify_cpu_starting() so the
- * cpu_smt_allowed() check will now return false if this is not the
+ * cpu_bootable() check will now return false if this is not the
* primary sibling.
*/
- if (!cpu_smt_allowed(cpu))
+ if (!cpu_bootable(cpu))
return -ECANCELED;
+ return 0;
+}
+
+#ifdef CONFIG_HOTPLUG_SPLIT_STARTUP
+static int cpuhp_kick_ap_alive(unsigned int cpu)
+{
+ if (!cpuhp_can_boot_ap(cpu))
+ return -EAGAIN;
+
+ return arch_cpuhp_kick_ap_alive(cpu, idle_thread_get(cpu));
+}
+
+static int cpuhp_bringup_ap(unsigned int cpu)
+{
+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+ int ret;
+
+ /*
+ * Some architectures have to walk the irq descriptors to
+ * setup the vector space for the cpu which comes online.
+ * Prevent irq alloc/free across the bringup.
+ */
+ irq_lock_sparse();
+
+ ret = cpuhp_bp_sync_alive(cpu);
+ if (ret)
+ goto out_unlock;
+
+ ret = bringup_wait_for_ap_online(cpu);
+ if (ret)
+ goto out_unlock;
+
+ irq_unlock_sparse();
if (st->target <= CPUHP_AP_ONLINE_IDLE)
return 0;
- return cpuhp_kick_ap(st, st->target);
-}
+ return cpuhp_kick_ap(cpu, st, st->target);
+out_unlock:
+ irq_unlock_sparse();
+ return ret;
+}
+#else
static int bringup_cpu(unsigned int cpu)
{
+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
struct task_struct *idle = idle_thread_get(cpu);
int ret;
+ if (!cpuhp_can_boot_ap(cpu))
+ return -EAGAIN;
+
/*
* Some architectures have to walk the irq descriptors to
* setup the vector space for the cpu which comes online.
- * Prevent irq alloc/free across the bringup.
+ *
+ * Prevent irq alloc/free across the bringup by acquiring the
+ * sparse irq lock. Hold it until the upcoming CPU completes the
+ * startup in cpuhp_online_idle() which allows to avoid
+ * intermediate synchronization points in the architecture code.
*/
irq_lock_sparse();
- /* Arch-specific enabling code. */
ret = __cpu_up(cpu, idle);
- irq_unlock_sparse();
if (ret)
- return ret;
- return bringup_wait_for_ap(cpu);
+ goto out_unlock;
+
+ ret = cpuhp_bp_sync_alive(cpu);
+ if (ret)
+ goto out_unlock;
+
+ ret = bringup_wait_for_ap_online(cpu);
+ if (ret)
+ goto out_unlock;
+
+ irq_unlock_sparse();
+
+ if (st->target <= CPUHP_AP_ONLINE_IDLE)
+ return 0;
+
+ return cpuhp_kick_ap(cpu, st, st->target);
+
+out_unlock:
+ irq_unlock_sparse();
+ return ret;
}
+#endif
static int finish_cpu(unsigned int cpu)
{
@@ -571,12 +905,13 @@ static int finish_cpu(unsigned int cpu)
struct mm_struct *mm = idle->active_mm;
/*
- * idle_task_exit() will have switched to &init_mm, now
- * clean up any remaining active_mm state.
+ * sched_force_init_mm() ensured the use of &init_mm,
+ * drop that refcount now that the CPU has stopped.
*/
- if (mm != &init_mm)
- idle->active_mm = &init_mm;
- mmdrop(mm);
+ WARN_ON(mm != &init_mm);
+ idle->active_mm = NULL;
+ mmdrop_lazy_tlb(mm);
+
return 0;
}
@@ -584,10 +919,83 @@ static int finish_cpu(unsigned int cpu)
* Hotplug state machine related functions
*/
-static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st)
+/*
+ * Get the next state to run. Empty ones will be skipped. Returns true if a
+ * state must be run.
+ *
+ * st->state will be modified ahead of time, to match state_to_run, as if it
+ * has already ran.
+ */
+static bool cpuhp_next_state(bool bringup,
+ enum cpuhp_state *state_to_run,
+ struct cpuhp_cpu_state *st,
+ enum cpuhp_state target)
+{
+ do {
+ if (bringup) {
+ if (st->state >= target)
+ return false;
+
+ *state_to_run = ++st->state;
+ } else {
+ if (st->state <= target)
+ return false;
+
+ *state_to_run = st->state--;
+ }
+
+ if (!cpuhp_step_empty(bringup, cpuhp_get_step(*state_to_run)))
+ break;
+ } while (true);
+
+ return true;
+}
+
+static int __cpuhp_invoke_callback_range(bool bringup,
+ unsigned int cpu,
+ struct cpuhp_cpu_state *st,
+ enum cpuhp_state target,
+ bool nofail)
+{
+ enum cpuhp_state state;
+ int ret = 0;
+
+ while (cpuhp_next_state(bringup, &state, st, target)) {
+ int err;
+
+ err = cpuhp_invoke_callback(cpu, state, bringup, NULL, NULL);
+ if (!err)
+ continue;
+
+ if (nofail) {
+ pr_warn("CPU %u %s state %s (%d) failed (%d)\n",
+ cpu, bringup ? "UP" : "DOWN",
+ cpuhp_get_step(st->state)->name,
+ st->state, err);
+ ret = -1;
+ } else {
+ ret = err;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+static inline int cpuhp_invoke_callback_range(bool bringup,
+ unsigned int cpu,
+ struct cpuhp_cpu_state *st,
+ enum cpuhp_state target)
{
- for (st->state--; st->state > st->target; st->state--)
- cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
+ return __cpuhp_invoke_callback_range(bringup, cpu, st, target, false);
+}
+
+static inline void cpuhp_invoke_callback_range_nofail(bool bringup,
+ unsigned int cpu,
+ struct cpuhp_cpu_state *st,
+ enum cpuhp_state target)
+{
+ __cpuhp_invoke_callback_range(bringup, cpu, st, target, true);
}
static inline bool can_rollback_cpu(struct cpuhp_cpu_state *st)
@@ -610,16 +1018,16 @@ static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
enum cpuhp_state prev_state = st->state;
int ret = 0;
- while (st->state < target) {
- st->state++;
- ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
- if (ret) {
- if (can_rollback_cpu(st)) {
- st->target = prev_state;
- undo_cpu_up(cpu, st);
- }
- break;
- }
+ ret = cpuhp_invoke_callback_range(true, cpu, st, target);
+ if (ret) {
+ pr_debug("CPU UP failed (%d) CPU %u state %s (%d)\n",
+ ret, cpu, cpuhp_get_step(st->state)->name,
+ st->state);
+
+ cpuhp_reset_state(cpu, st, prev_state);
+ if (can_rollback_cpu(st))
+ WARN_ON(cpuhp_invoke_callback_range(false, cpu, st,
+ prev_state));
}
return ret;
}
@@ -627,14 +1035,6 @@ static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
/*
* The cpu hotplug threads manage the bringup and teardown of the cpus
*/
-static void cpuhp_create(unsigned int cpu)
-{
- struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
-
- init_completion(&st->done_up);
- init_completion(&st->done_down);
-}
-
static int cpuhp_should_run(unsigned int cpu)
{
struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
@@ -683,17 +1083,9 @@ static void cpuhp_thread_fun(unsigned int cpu)
state = st->cb_state;
st->should_run = false;
} else {
- if (bringup) {
- st->state++;
- state = st->state;
- st->should_run = (st->state < st->target);
- WARN_ON_ONCE(st->state > st->target);
- } else {
- state = st->state;
- st->state--;
- st->should_run = (st->state > st->target);
- WARN_ON_ONCE(st->state < st->target);
- }
+ st->should_run = cpuhp_next_state(bringup, &state, st, st->target);
+ if (!st->should_run)
+ goto end;
}
WARN_ON_ONCE(!cpuhp_is_ap_state(state));
@@ -721,6 +1113,7 @@ static void cpuhp_thread_fun(unsigned int cpu)
st->should_run = false;
}
+end:
cpuhp_lock_release(bringup);
lockdep_release_cpus_lock();
@@ -793,7 +1186,7 @@ static int cpuhp_kick_ap_work(unsigned int cpu)
cpuhp_lock_release(true);
trace_cpuhp_enter(cpu, st->target, prev_state, cpuhp_kick_ap_work);
- ret = cpuhp_kick_ap(st, st->target);
+ ret = cpuhp_kick_ap(cpu, st, st->target);
trace_cpuhp_exit(cpu, st->state, prev_state, ret);
return ret;
@@ -801,20 +1194,36 @@ static int cpuhp_kick_ap_work(unsigned int cpu)
static struct smp_hotplug_thread cpuhp_threads = {
.store = &cpuhp_state.thread,
- .create = &cpuhp_create,
.thread_should_run = cpuhp_should_run,
.thread_fn = cpuhp_thread_fun,
.thread_comm = "cpuhp/%u",
.selfparking = true,
};
+static __init void cpuhp_init_state(void)
+{
+ struct cpuhp_cpu_state *st;
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ st = per_cpu_ptr(&cpuhp_state, cpu);
+ init_completion(&st->done_up);
+ init_completion(&st->done_down);
+ }
+}
+
void __init cpuhp_threads_init(void)
{
+ cpuhp_init_state();
BUG_ON(smpboot_register_percpu_thread(&cpuhp_threads));
kthread_unpark(this_cpu_read(cpuhp_state.thread));
}
#ifdef CONFIG_HOTPLUG_CPU
+#ifndef arch_clear_mm_cpumask_cpu
+#define arch_clear_mm_cpumask_cpu(cpu, mm) cpumask_clear_cpu(cpu, mm_cpumask(mm))
+#endif
+
/**
* clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
* @cpu: a CPU id
@@ -850,7 +1259,7 @@ void clear_tasks_mm_cpumask(int cpu)
t = find_lock_task_mm(p);
if (!t)
continue;
- cpumask_clear_cpu(cpu, mm_cpumask(t->mm));
+ arch_clear_mm_cpumask_cpu(cpu, t->mm);
task_unlock(t);
}
rcu_read_unlock();
@@ -862,7 +1271,6 @@ static int take_cpu_down(void *_param)
struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE);
int err, cpu = smp_processor_id();
- int ret;
/* Ensure this CPU doesn't handle any more interrupts. */
err = __cpu_disable();
@@ -870,24 +1278,16 @@ static int take_cpu_down(void *_param)
return err;
/*
- * We get here while we are in CPUHP_TEARDOWN_CPU state and we must not
- * do this step again.
+ * Must be called from CPUHP_TEARDOWN_CPU, which means, as we are going
+ * down, that the current state is CPUHP_TEARDOWN_CPU - 1.
*/
- WARN_ON(st->state != CPUHP_TEARDOWN_CPU);
- st->state--;
- /* Invoke the former CPU_DYING callbacks */
- for (; st->state > target; st->state--) {
- ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
- /*
- * DYING must not fail!
- */
- WARN_ON_ONCE(ret);
- }
+ WARN_ON(st->state != (CPUHP_TEARDOWN_CPU - 1));
+
+ /*
+ * Invoke the former CPU_DYING callbacks. DYING must not fail!
+ */
+ cpuhp_invoke_callback_range_nofail(false, cpu, st, target);
- /* Give up timekeeping duties */
- tick_handover_do_timer();
- /* Remove CPU from timer broadcasting */
- tick_offline_cpu(cpu);
/* Park the stopper thread */
stop_machine_park(cpu);
return 0;
@@ -899,7 +1299,7 @@ static int takedown_cpu(unsigned int cpu)
int err;
/* Park the smpboot threads */
- kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread);
+ kthread_park(st->thread);
/*
* Prevent irq alloc/free while the dying cpu reorganizes the
@@ -915,7 +1315,7 @@ static int takedown_cpu(unsigned int cpu)
/* CPU refused to die */
irq_unlock_sparse();
/* Unpark the hotplug thread so we can rollback there */
- kthread_unpark(per_cpu_ptr(&cpuhp_state, cpu)->thread);
+ kthread_unpark(st->thread);
return err;
}
BUG_ON(cpu_online(cpu));
@@ -937,8 +1337,17 @@ static int takedown_cpu(unsigned int cpu)
/* This actually kills the CPU. */
__cpu_die(cpu);
- tick_cleanup_dead_cpu(cpu);
+ cpuhp_bp_sync_dead(cpu);
+
+ lockdep_cleanup_dead_cpu(cpu, idle_thread_get(cpu));
+
+ /*
+ * Callbacks must be re-integrated right away to the RCU state machine.
+ * Otherwise an RCU callback could block a further teardown function
+ * waiting for its completion.
+ */
rcutree_migrate_callbacks(cpu);
+
return 0;
}
@@ -954,37 +1363,36 @@ void cpuhp_report_idle_dead(void)
struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
BUG_ON(st->state != CPUHP_AP_OFFLINE);
- rcu_report_dead(smp_processor_id());
+ tick_assert_timekeeping_handover();
+ rcutree_report_cpu_dead();
st->state = CPUHP_AP_IDLE_DEAD;
/*
- * We cannot call complete after rcu_report_dead() so we delegate it
+ * We cannot call complete after rcutree_report_cpu_dead() so we delegate it
* to an online cpu.
*/
smp_call_function_single(cpumask_first(cpu_online_mask),
cpuhp_complete_idle_dead, st, 0);
}
-static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st)
-{
- for (st->state++; st->state < st->target; st->state++)
- cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
-}
-
static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
enum cpuhp_state target)
{
enum cpuhp_state prev_state = st->state;
int ret = 0;
- for (; st->state > target; st->state--) {
- ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
- if (ret) {
- st->target = prev_state;
- if (st->state < prev_state)
- undo_cpu_down(cpu, st);
- break;
- }
+ ret = cpuhp_invoke_callback_range(false, cpu, st, target);
+ if (ret) {
+ pr_debug("CPU DOWN failed (%d) CPU %u state %s (%d)\n",
+ ret, cpu, cpuhp_get_step(st->state)->name,
+ st->state);
+
+ cpuhp_reset_state(cpu, st, prev_state);
+
+ if (st->state < prev_state)
+ WARN_ON(cpuhp_invoke_callback_range(true, cpu, st,
+ prev_state));
}
+
return ret;
}
@@ -1005,7 +1413,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
cpuhp_tasks_frozen = tasks_frozen;
- prev_state = cpuhp_set_state(st, target);
+ prev_state = cpuhp_set_state(cpu, st, target);
/*
* If the current CPU state is in the range of the AP hotplug thread,
* then we need to kick the thread.
@@ -1034,9 +1442,13 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
* to do the further cleanups.
*/
ret = cpuhp_down_callbacks(cpu, st, target);
- if (ret && st->state == CPUHP_TEARDOWN_CPU && st->state < prev_state) {
- cpuhp_reset_state(st, prev_state);
- __cpuhp_kick_ap(st);
+ if (ret && st->state < prev_state) {
+ if (st->state == CPUHP_TEARDOWN_CPU) {
+ cpuhp_reset_state(cpu, st, prev_state);
+ __cpuhp_kick_ap(st);
+ } else {
+ WARN(1, "DEAD callback error for CPU%d", cpu);
+ }
}
out:
@@ -1050,11 +1462,42 @@ out:
return ret;
}
+struct cpu_down_work {
+ unsigned int cpu;
+ enum cpuhp_state target;
+};
+
+static long __cpu_down_maps_locked(void *arg)
+{
+ struct cpu_down_work *work = arg;
+
+ return _cpu_down(work->cpu, 0, work->target);
+}
+
static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target)
{
+ struct cpu_down_work work = { .cpu = cpu, .target = target, };
+
+ /*
+ * If the platform does not support hotplug, report it explicitly to
+ * differentiate it from a transient offlining failure.
+ */
+ if (cpu_hotplug_offline_disabled)
+ return -EOPNOTSUPP;
if (cpu_hotplug_disabled)
return -EBUSY;
- return _cpu_down(cpu, 0, target);
+
+ /*
+ * Ensure that the control task does not run on the to be offlined
+ * CPU to prevent a deadlock against cfs_b->period_timer.
+ * Also keep at least one housekeeping cpu onlined to avoid generating
+ * an empty sched_domain span.
+ */
+ for_each_cpu_and(cpu, cpu_online_mask, housekeeping_cpumask(HK_TYPE_DOMAIN)) {
+ if (cpu != work.cpu)
+ return work_on_cpu(cpu, __cpu_down_maps_locked, &work);
+ }
+ return -EBUSY;
}
static int cpu_down(unsigned int cpu, enum cpuhp_state target)
@@ -1074,6 +1517,8 @@ static int cpu_down(unsigned int cpu, enum cpuhp_state target)
* This function is meant to be used by device core cpu subsystem only.
*
* Other subsystems should use remove_cpu() instead.
+ *
+ * Return: %0 on success or a negative errno code
*/
int cpu_device_down(struct device *dev)
{
@@ -1149,18 +1594,14 @@ void notify_cpu_starting(unsigned int cpu)
{
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);
- int ret;
- rcu_cpu_starting(cpu); /* Enables RCU usage on this CPU. */
+ rcutree_report_cpu_starting(cpu); /* Enables RCU usage on this CPU. */
cpumask_set_cpu(cpu, &cpus_booted_once_mask);
- while (st->state < target) {
- st->state++;
- ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
- /*
- * STARTING must not fail!
- */
- WARN_ON_ONCE(ret);
- }
+
+ /*
+ * STARTING must not fail!
+ */
+ cpuhp_invoke_callback_range_nofail(true, cpu, st, target);
}
/*
@@ -1176,8 +1617,10 @@ void cpuhp_online_idle(enum cpuhp_state state)
if (state != CPUHP_AP_ONLINE_IDLE)
return;
+ cpuhp_ap_update_sync_state(SYNC_STATE_ONLINE);
+
/*
- * Unpart the stopper thread before we start the idle loop (and start
+ * Unpark the stopper thread before we start the idle loop (and start
* scheduling); this ensures the stopper task is always available.
*/
stop_machine_unpark(smp_processor_id());
@@ -1214,11 +1657,17 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
ret = PTR_ERR(idle);
goto out;
}
+
+ /*
+ * Reset stale stack state from the last time this CPU was online.
+ */
+ scs_task_reset(idle);
+ kasan_unpoison_task_stack(idle);
}
cpuhp_tasks_frozen = tasks_frozen;
- cpuhp_set_state(st, target);
+ cpuhp_set_state(cpu, st, target);
/*
* If the current CPU state is in the range of the AP hotplug thread,
* then we need to kick the thread once more.
@@ -1253,9 +1702,6 @@ static int cpu_up(unsigned int cpu, enum cpuhp_state target)
if (!cpu_possible(cpu)) {
pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n",
cpu);
-#if defined(CONFIG_IA64)
- pr_err("please check additional_cpus= boot parameter\n");
-#endif
return -EINVAL;
}
@@ -1269,7 +1715,7 @@ static int cpu_up(unsigned int cpu, enum cpuhp_state target)
err = -EBUSY;
goto out;
}
- if (!cpu_smt_allowed(cpu)) {
+ if (!cpu_bootable(cpu)) {
err = -EPERM;
goto out;
}
@@ -1287,6 +1733,8 @@ out:
* This function is meant to be used by device core cpu subsystem only.
*
* Other subsystems should use add_cpu() instead.
+ *
+ * Return: %0 on success or a negative errno code
*/
int cpu_device_up(struct device *dev)
{
@@ -1312,6 +1760,8 @@ EXPORT_SYMBOL_GPL(add_cpu);
* On some architectures like arm64, we can hibernate on any CPU, but on
* wake up the CPU we hibernated on might be offline as a side effect of
* using maxcpus= for example.
+ *
+ * Return: %0 on success or a negative errno code
*/
int bringup_hibernate_cpu(unsigned int sleep_cpu)
{
@@ -1328,18 +1778,125 @@ int bringup_hibernate_cpu(unsigned int sleep_cpu)
return 0;
}
-void bringup_nonboot_cpus(unsigned int setup_max_cpus)
+static void __init cpuhp_bringup_mask(const struct cpumask *mask, unsigned int ncpus,
+ enum cpuhp_state target)
{
unsigned int cpu;
- for_each_present_cpu(cpu) {
- if (num_online_cpus() >= setup_max_cpus)
+ for_each_cpu(cpu, mask) {
+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+
+ if (cpu_up(cpu, target) && can_rollback_cpu(st)) {
+ /*
+ * If this failed then cpu_up() might have only
+ * rolled back to CPUHP_BP_KICK_AP for the final
+ * online. Clean it up. NOOP if already rolled back.
+ */
+ WARN_ON(cpuhp_invoke_callback_range(false, cpu, st, CPUHP_OFFLINE));
+ }
+
+ if (!--ncpus)
break;
- if (!cpu_online(cpu))
- cpu_up(cpu, CPUHP_ONLINE);
}
}
+#ifdef CONFIG_HOTPLUG_PARALLEL
+static bool __cpuhp_parallel_bringup __ro_after_init = true;
+
+static int __init parallel_bringup_parse_param(char *arg)
+{
+ return kstrtobool(arg, &__cpuhp_parallel_bringup);
+}
+early_param("cpuhp.parallel", parallel_bringup_parse_param);
+
+#ifdef CONFIG_HOTPLUG_SMT
+static inline bool cpuhp_smt_aware(void)
+{
+ return cpu_smt_max_threads > 1;
+}
+
+static inline const struct cpumask *cpuhp_get_primary_thread_mask(void)
+{
+ return cpu_primary_thread_mask;
+}
+#else
+static inline bool cpuhp_smt_aware(void)
+{
+ return false;
+}
+static inline const struct cpumask *cpuhp_get_primary_thread_mask(void)
+{
+ return cpu_none_mask;
+}
+#endif
+
+bool __weak arch_cpuhp_init_parallel_bringup(void)
+{
+ return true;
+}
+
+/*
+ * On architectures which have enabled parallel bringup this invokes all BP
+ * prepare states for each of the to be onlined APs first. The last state
+ * sends the startup IPI to the APs. The APs proceed through the low level
+ * bringup code in parallel and then wait for the control CPU to release
+ * them one by one for the final onlining procedure.
+ *
+ * This avoids waiting for each AP to respond to the startup IPI in
+ * CPUHP_BRINGUP_CPU.
+ */
+static bool __init cpuhp_bringup_cpus_parallel(unsigned int ncpus)
+{
+ const struct cpumask *mask = cpu_present_mask;
+
+ if (__cpuhp_parallel_bringup)
+ __cpuhp_parallel_bringup = arch_cpuhp_init_parallel_bringup();
+ if (!__cpuhp_parallel_bringup)
+ return false;
+
+ if (cpuhp_smt_aware()) {
+ const struct cpumask *pmask = cpuhp_get_primary_thread_mask();
+ static struct cpumask tmp_mask __initdata;
+
+ /*
+ * X86 requires to prevent that SMT siblings stopped while
+ * the primary thread does a microcode update for various
+ * reasons. Bring the primary threads up first.
+ */
+ cpumask_and(&tmp_mask, mask, pmask);
+ cpuhp_bringup_mask(&tmp_mask, ncpus, CPUHP_BP_KICK_AP);
+ cpuhp_bringup_mask(&tmp_mask, ncpus, CPUHP_ONLINE);
+ /* Account for the online CPUs */
+ ncpus -= num_online_cpus();
+ if (!ncpus)
+ return true;
+ /* Create the mask for secondary CPUs */
+ cpumask_andnot(&tmp_mask, mask, pmask);
+ mask = &tmp_mask;
+ }
+
+ /* Bring the not-yet started CPUs up */
+ cpuhp_bringup_mask(mask, ncpus, CPUHP_BP_KICK_AP);
+ cpuhp_bringup_mask(mask, ncpus, CPUHP_ONLINE);
+ return true;
+}
+#else
+static inline bool cpuhp_bringup_cpus_parallel(unsigned int ncpus) { return false; }
+#endif /* CONFIG_HOTPLUG_PARALLEL */
+
+void __init bringup_nonboot_cpus(unsigned int max_cpus)
+{
+ if (!max_cpus)
+ return;
+
+ /* Try parallel bringup optimization if enabled */
+ if (cpuhp_bringup_cpus_parallel(max_cpus))
+ return;
+
+ /* Full per CPU serialized bringup */
+ cpuhp_bringup_mask(cpu_present_mask, max_cpus, CPUHP_ONLINE);
+}
+
#ifdef CONFIG_PM_SLEEP_SMP
static cpumask_var_t frozen_cpus;
@@ -1350,8 +1907,8 @@ int freeze_secondary_cpus(int primary)
cpu_maps_update_begin();
if (primary == -1) {
primary = cpumask_first(cpu_online_mask);
- if (!housekeeping_cpu(primary, HK_FLAG_TIMER))
- primary = housekeeping_any_cpu(HK_FLAG_TIMER);
+ if (!housekeeping_cpu(primary, HK_TYPE_TIMER))
+ primary = housekeeping_any_cpu(HK_TYPE_TIMER);
} else {
if (!cpu_online(primary))
primary = cpumask_first(cpu_online_mask);
@@ -1364,8 +1921,8 @@ int freeze_secondary_cpus(int primary)
cpumask_clear(frozen_cpus);
pr_info("Disabling non-boot CPUs ...\n");
- for_each_online_cpu(cpu) {
- if (cpu == primary)
+ for (cpu = nr_cpu_ids - 1; cpu >= 0; cpu--) {
+ if (!cpu_online(cpu) || cpu == primary)
continue;
if (pm_wakeup_pending()) {
@@ -1521,6 +2078,11 @@ static struct cpuhp_step cpuhp_hp_states[] = {
.startup.single = perf_event_init_cpu,
.teardown.single = perf_event_exit_cpu,
},
+ [CPUHP_RANDOM_PREPARE] = {
+ .name = "random:prepare",
+ .startup.single = random_prepare_cpu,
+ .teardown.single = NULL,
+ },
[CPUHP_WORKQUEUE_PREP] = {
.name = "workqueue:prepare",
.startup.single = workqueue_prepare_cpu,
@@ -1529,7 +2091,7 @@ static struct cpuhp_step cpuhp_hp_states[] = {
[CPUHP_HRTIMERS_PREPARE] = {
.name = "hrtimers:prepare",
.startup.single = hrtimers_prepare_cpu,
- .teardown.single = hrtimers_dead_cpu,
+ .teardown.single = NULL,
},
[CPUHP_SMPCFD_PREPARE] = {
.name = "smpcfd:prepare",
@@ -1541,11 +2103,6 @@ static struct cpuhp_step cpuhp_hp_states[] = {
.startup.single = relay_prepare_cpu,
.teardown.single = NULL,
},
- [CPUHP_SLAB_PREPARE] = {
- .name = "slab:prepare",
- .startup.single = slab_prepare_cpu,
- .teardown.single = slab_dead_cpu,
- },
[CPUHP_RCUTREE_PREP] = {
.name = "RCU/tree:prepare",
.startup.single = rcutree_prepare_cpu,
@@ -1561,13 +2118,38 @@ static struct cpuhp_step cpuhp_hp_states[] = {
.startup.single = timers_prepare_cpu,
.teardown.single = timers_dead_cpu,
},
- /* Kicks the plugged cpu into life */
+
+#ifdef CONFIG_HOTPLUG_SPLIT_STARTUP
+ /*
+ * Kicks the AP alive. AP will wait in cpuhp_ap_sync_alive() until
+ * the next step will release it.
+ */
+ [CPUHP_BP_KICK_AP] = {
+ .name = "cpu:kick_ap",
+ .startup.single = cpuhp_kick_ap_alive,
+ },
+
+ /*
+ * Waits for the AP to reach cpuhp_ap_sync_alive() and then
+ * releases it for the complete bringup.
+ */
+ [CPUHP_BRINGUP_CPU] = {
+ .name = "cpu:bringup",
+ .startup.single = cpuhp_bringup_ap,
+ .teardown.single = finish_cpu,
+ .cant_stop = true,
+ },
+#else
+ /*
+ * All-in-one CPU bringup state which includes the kick alive.
+ */
[CPUHP_BRINGUP_CPU] = {
.name = "cpu:bringup",
.startup.single = bringup_cpu,
.teardown.single = finish_cpu,
.cant_stop = true,
},
+#endif
/* Final state before CPU kills itself */
[CPUHP_AP_IDLE_DEAD] = {
.name = "idle:dead",
@@ -1596,13 +2178,23 @@ static struct cpuhp_step cpuhp_hp_states[] = {
.startup.single = NULL,
.teardown.single = smpcfd_dying_cpu,
},
+ [CPUHP_AP_HRTIMERS_DYING] = {
+ .name = "hrtimers:dying",
+ .startup.single = hrtimers_cpu_starting,
+ .teardown.single = hrtimers_cpu_dying,
+ },
+ [CPUHP_AP_TICK_DYING] = {
+ .name = "tick:dying",
+ .startup.single = NULL,
+ .teardown.single = tick_cpu_dying,
+ },
/* Entry state on starting. Interrupts enabled from here on. Transient
* state for synchronsization */
[CPUHP_AP_ONLINE] = {
.name = "ap:online",
},
/*
- * Handled on controll processor until the plugged processor manages
+ * Handled on control processor until the plugged processor manages
* this itself.
*/
[CPUHP_TEARDOWN_CPU] = {
@@ -1611,6 +2203,13 @@ static struct cpuhp_step cpuhp_hp_states[] = {
.teardown.single = takedown_cpu,
.cant_stop = true,
},
+
+ [CPUHP_AP_SCHED_WAIT_EMPTY] = {
+ .name = "sched:waitempty",
+ .startup.single = NULL,
+ .teardown.single = sched_cpu_wait_empty,
+ },
+
/* Handle smpboot threads park/unpark */
[CPUHP_AP_SMPBOOT_THREADS] = {
.name = "smpboot/threads:online",
@@ -1637,6 +2236,11 @@ static struct cpuhp_step cpuhp_hp_states[] = {
.startup.single = workqueue_online_cpu,
.teardown.single = workqueue_offline_cpu,
},
+ [CPUHP_AP_RANDOM_ONLINE] = {
+ .name = "random:online",
+ .startup.single = random_online_cpu,
+ .teardown.single = NULL,
+ },
[CPUHP_AP_RCUTREE_ONLINE] = {
.name = "RCU/tree:online",
.startup.single = rcutree_online_cpu,
@@ -1759,8 +2363,7 @@ static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup,
* If there's nothing to do, we done.
* Relies on the union for multi_instance.
*/
- if ((bringup && !sp->startup.single) ||
- (!bringup && !sp->teardown.single))
+ if (cpuhp_step_empty(bringup, sp))
return 0;
/*
* The non AP bound callbacks can fail on bringup. On teardown
@@ -1862,6 +2465,7 @@ EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
/**
* __cpuhp_setup_state_cpuslocked - Setup the callbacks for an hotplug machine state
* @state: The state to setup
+ * @name: Name of the step
* @invoke: If true, the startup function is invoked for cpus where
* cpu state >= @state
* @startup: startup callback function
@@ -1870,9 +2474,9 @@ EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
* added afterwards.
*
* The caller needs to hold cpus read locked while calling this function.
- * Returns:
+ * Return:
* On success:
- * Positive state number if @state is CPUHP_AP_ONLINE_DYN
+ * Positive state number if @state is CPUHP_AP_ONLINE_DYN or CPUHP_BP_PREPARE_DYN;
* 0 for all other states
* On failure: proper (negative) error code
*/
@@ -1895,7 +2499,7 @@ int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
ret = cpuhp_store_callbacks(state, name, startup, teardown,
multi_instance);
- dynstate = state == CPUHP_AP_ONLINE_DYN;
+ dynstate = state == CPUHP_AP_ONLINE_DYN || state == CPUHP_BP_PREPARE_DYN;
if (ret > 0 && dynstate) {
state = ret;
ret = 0;
@@ -1926,8 +2530,8 @@ int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
out:
mutex_unlock(&cpuhp_state_mutex);
/*
- * If the requested state is CPUHP_AP_ONLINE_DYN, return the
- * dynamically allocated state in case of success.
+ * If the requested state is CPUHP_AP_ONLINE_DYN or CPUHP_BP_PREPARE_DYN,
+ * return the dynamically allocated state in case of success.
*/
if (!ret && dynstate)
return state;
@@ -2072,6 +2676,12 @@ int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
for_each_online_cpu(cpu) {
if (topology_is_primary_thread(cpu))
continue;
+ /*
+ * Disable can be called with CPU_SMT_ENABLED when changing
+ * from a higher to lower number of SMT threads per core.
+ */
+ if (ctrlval == CPU_SMT_ENABLED && cpu_smt_thread_allowed(cpu))
+ continue;
ret = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
if (ret)
break;
@@ -2096,6 +2706,14 @@ int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
return ret;
}
+/* Check if the core a CPU belongs to is online */
+#if !defined(topology_is_core_online)
+static inline bool topology_is_core_online(unsigned int cpu)
+{
+ return true;
+}
+#endif
+
int cpuhp_smt_enable(void)
{
int cpu, ret = 0;
@@ -2106,6 +2724,8 @@ int cpuhp_smt_enable(void)
/* Skip online CPUs and CPUs on offline nodes */
if (cpu_online(cpu) || !node_online(cpu_to_node(cpu)))
continue;
+ if (!cpu_smt_thread_allowed(cpu) || !topology_is_core_online(cpu))
+ continue;
ret = _cpu_up(cpu, 0, CPUHP_ONLINE);
if (ret)
break;
@@ -2118,18 +2738,17 @@ int cpuhp_smt_enable(void)
#endif
#if defined(CONFIG_SYSFS) && defined(CONFIG_HOTPLUG_CPU)
-static ssize_t show_cpuhp_state(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t state_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
return sprintf(buf, "%d\n", st->state);
}
-static DEVICE_ATTR(state, 0444, show_cpuhp_state, NULL);
+static DEVICE_ATTR_RO(state);
-static ssize_t write_cpuhp_target(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
+static ssize_t target_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
{
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
struct cpuhp_step *sp;
@@ -2160,26 +2779,26 @@ static ssize_t write_cpuhp_target(struct device *dev,
if (st->state < target)
ret = cpu_up(dev->id, target);
- else
+ else if (st->state > target)
ret = cpu_down(dev->id, target);
+ else if (WARN_ON(st->target != target))
+ st->target = target;
out:
unlock_device_hotplug();
return ret ? ret : count;
}
-static ssize_t show_cpuhp_target(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t target_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
return sprintf(buf, "%d\n", st->target);
}
-static DEVICE_ATTR(target, 0644, show_cpuhp_target, write_cpuhp_target);
+static DEVICE_ATTR_RW(target);
-
-static ssize_t write_cpuhp_fail(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
+static ssize_t fail_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
{
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
struct cpuhp_step *sp;
@@ -2189,6 +2808,11 @@ static ssize_t write_cpuhp_fail(struct device *dev,
if (ret)
return ret;
+ if (fail == CPUHP_INVALID) {
+ st->fail = fail;
+ return count;
+ }
+
if (fail < CPUHP_OFFLINE || fail > CPUHP_ONLINE)
return -EINVAL;
@@ -2199,6 +2823,15 @@ static ssize_t write_cpuhp_fail(struct device *dev,
return -EINVAL;
/*
+ * DEAD callbacks cannot fail...
+ * ... neither can CPUHP_BRINGUP_CPU during hotunplug. The latter
+ * triggering STARTING callbacks, a failure in this state would
+ * hinder rollback.
+ */
+ if (fail <= CPUHP_BRINGUP_CPU && st->state > CPUHP_BRINGUP_CPU)
+ return -EINVAL;
+
+ /*
* Cannot fail anything that doesn't have callbacks.
*/
mutex_lock(&cpuhp_state_mutex);
@@ -2214,15 +2847,15 @@ static ssize_t write_cpuhp_fail(struct device *dev,
return count;
}
-static ssize_t show_cpuhp_fail(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t fail_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
return sprintf(buf, "%d\n", st->fail);
}
-static DEVICE_ATTR(fail, 0644, show_cpuhp_fail, write_cpuhp_fail);
+static DEVICE_ATTR_RW(fail);
static struct attribute *cpuhp_cpu_attrs[] = {
&dev_attr_state.attr,
@@ -2234,10 +2867,9 @@ static struct attribute *cpuhp_cpu_attrs[] = {
static const struct attribute_group cpuhp_cpu_attr_group = {
.attrs = cpuhp_cpu_attrs,
.name = "hotplug",
- NULL
};
-static ssize_t show_cpuhp_states(struct device *dev,
+static ssize_t states_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
ssize_t cur, res = 0;
@@ -2256,7 +2888,7 @@ static ssize_t show_cpuhp_states(struct device *dev,
mutex_unlock(&cpuhp_state_mutex);
return res;
}
-static DEVICE_ATTR(states, 0444, show_cpuhp_states, NULL);
+static DEVICE_ATTR_RO(states);
static struct attribute *cpuhp_cpu_root_attrs[] = {
&dev_attr_states.attr,
@@ -2266,25 +2898,23 @@ static struct attribute *cpuhp_cpu_root_attrs[] = {
static const struct attribute_group cpuhp_cpu_root_attr_group = {
.attrs = cpuhp_cpu_root_attrs,
.name = "hotplug",
- NULL
};
#ifdef CONFIG_HOTPLUG_SMT
+static bool cpu_smt_num_threads_valid(unsigned int threads)
+{
+ if (IS_ENABLED(CONFIG_SMT_NUM_THREADS_DYNAMIC))
+ return threads >= 1 && threads <= cpu_smt_max_threads;
+ return threads == 1 || threads == cpu_smt_max_threads;
+}
+
static ssize_t
__store_smt_control(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
- int ctrlval, ret;
-
- if (sysfs_streq(buf, "on"))
- ctrlval = CPU_SMT_ENABLED;
- else if (sysfs_streq(buf, "off"))
- ctrlval = CPU_SMT_DISABLED;
- else if (sysfs_streq(buf, "forceoff"))
- ctrlval = CPU_SMT_FORCE_DISABLED;
- else
- return -EINVAL;
+ int ctrlval, ret, num_threads, orig_threads;
+ bool force_off;
if (cpu_smt_control == CPU_SMT_FORCE_DISABLED)
return -EPERM;
@@ -2292,21 +2922,39 @@ __store_smt_control(struct device *dev, struct device_attribute *attr,
if (cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
return -ENODEV;
+ if (sysfs_streq(buf, "on")) {
+ ctrlval = CPU_SMT_ENABLED;
+ num_threads = cpu_smt_max_threads;
+ } else if (sysfs_streq(buf, "off")) {
+ ctrlval = CPU_SMT_DISABLED;
+ num_threads = 1;
+ } else if (sysfs_streq(buf, "forceoff")) {
+ ctrlval = CPU_SMT_FORCE_DISABLED;
+ num_threads = 1;
+ } else if (kstrtoint(buf, 10, &num_threads) == 0) {
+ if (num_threads == 1)
+ ctrlval = CPU_SMT_DISABLED;
+ else if (cpu_smt_num_threads_valid(num_threads))
+ ctrlval = CPU_SMT_ENABLED;
+ else
+ return -EINVAL;
+ } else {
+ return -EINVAL;
+ }
+
ret = lock_device_hotplug_sysfs();
if (ret)
return ret;
- if (ctrlval != cpu_smt_control) {
- switch (ctrlval) {
- case CPU_SMT_ENABLED:
- ret = cpuhp_smt_enable();
- break;
- case CPU_SMT_DISABLED:
- case CPU_SMT_FORCE_DISABLED:
- ret = cpuhp_smt_disable(ctrlval);
- break;
- }
- }
+ orig_threads = cpu_smt_num_threads;
+ cpu_smt_num_threads = num_threads;
+
+ force_off = ctrlval != cpu_smt_control && ctrlval == CPU_SMT_FORCE_DISABLED;
+
+ if (num_threads > orig_threads)
+ ret = cpuhp_smt_enable();
+ else if (num_threads < orig_threads || force_off)
+ ret = cpuhp_smt_disable(ctrlval);
unlock_device_hotplug();
return ret ? ret : count;
@@ -2329,28 +2977,38 @@ static const char *smt_states[] = {
[CPU_SMT_NOT_IMPLEMENTED] = "notimplemented",
};
-static ssize_t
-show_smt_control(struct device *dev, struct device_attribute *attr, char *buf)
+static ssize_t control_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
const char *state = smt_states[cpu_smt_control];
- return snprintf(buf, PAGE_SIZE - 2, "%s\n", state);
+#ifdef CONFIG_HOTPLUG_SMT
+ /*
+ * If SMT is enabled but not all threads are enabled then show the
+ * number of threads. If all threads are enabled show "on". Otherwise
+ * show the state name.
+ */
+ if (cpu_smt_control == CPU_SMT_ENABLED &&
+ cpu_smt_num_threads != cpu_smt_max_threads)
+ return sysfs_emit(buf, "%d\n", cpu_smt_num_threads);
+#endif
+
+ return sysfs_emit(buf, "%s\n", state);
}
-static ssize_t
-store_smt_control(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t count)
+static ssize_t control_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
{
return __store_smt_control(dev, attr, buf, count);
}
-static DEVICE_ATTR(control, 0644, show_smt_control, store_smt_control);
+static DEVICE_ATTR_RW(control);
-static ssize_t
-show_smt_active(struct device *dev, struct device_attribute *attr, char *buf)
+static ssize_t active_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
- return snprintf(buf, PAGE_SIZE - 2, "%d\n", sched_smt_active());
+ return sysfs_emit(buf, "%d\n", sched_smt_active());
}
-static DEVICE_ATTR(active, 0444, show_smt_active, NULL);
+static DEVICE_ATTR_RO(active);
static struct attribute *cpuhp_smt_attrs[] = {
&dev_attr_control.attr,
@@ -2361,27 +3019,37 @@ static struct attribute *cpuhp_smt_attrs[] = {
static const struct attribute_group cpuhp_smt_attr_group = {
.attrs = cpuhp_smt_attrs,
.name = "smt",
- NULL
};
static int __init cpu_smt_sysfs_init(void)
{
- return sysfs_create_group(&cpu_subsys.dev_root->kobj,
- &cpuhp_smt_attr_group);
+ struct device *dev_root;
+ int ret = -ENODEV;
+
+ dev_root = bus_get_dev_root(&cpu_subsys);
+ if (dev_root) {
+ ret = sysfs_create_group(&dev_root->kobj, &cpuhp_smt_attr_group);
+ put_device(dev_root);
+ }
+ return ret;
}
static int __init cpuhp_sysfs_init(void)
{
+ struct device *dev_root;
int cpu, ret;
ret = cpu_smt_sysfs_init();
if (ret)
return ret;
- ret = sysfs_create_group(&cpu_subsys.dev_root->kobj,
- &cpuhp_cpu_root_attr_group);
- if (ret)
- return ret;
+ dev_root = bus_get_dev_root(&cpu_subsys);
+ if (dev_root) {
+ ret = sysfs_create_group(&dev_root->kobj, &cpuhp_cpu_root_attr_group);
+ put_device(dev_root);
+ if (ret)
+ return ret;
+ }
for_each_possible_cpu(cpu) {
struct device *dev = get_cpu_device(cpu);
@@ -2426,22 +3094,28 @@ const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
EXPORT_SYMBOL(cpu_all_bits);
#ifdef CONFIG_INIT_ALL_POSSIBLE
-struct cpumask __cpu_possible_mask __read_mostly
+struct cpumask __cpu_possible_mask __ro_after_init
= {CPU_BITS_ALL};
#else
-struct cpumask __cpu_possible_mask __read_mostly;
+struct cpumask __cpu_possible_mask __ro_after_init;
#endif
EXPORT_SYMBOL(__cpu_possible_mask);
struct cpumask __cpu_online_mask __read_mostly;
EXPORT_SYMBOL(__cpu_online_mask);
+struct cpumask __cpu_enabled_mask __read_mostly;
+EXPORT_SYMBOL(__cpu_enabled_mask);
+
struct cpumask __cpu_present_mask __read_mostly;
EXPORT_SYMBOL(__cpu_present_mask);
struct cpumask __cpu_active_mask __read_mostly;
EXPORT_SYMBOL(__cpu_active_mask);
+struct cpumask __cpu_dying_mask __read_mostly;
+EXPORT_SYMBOL(__cpu_dying_mask);
+
atomic_t __num_online_cpus __read_mostly;
EXPORT_SYMBOL(__num_online_cpus);
@@ -2455,11 +3129,6 @@ void init_cpu_possible(const struct cpumask *src)
cpumask_copy(&__cpu_possible_mask, src);
}
-void init_cpu_online(const struct cpumask *src)
-{
- cpumask_copy(&__cpu_online_mask, src);
-}
-
void set_cpu_online(unsigned int cpu, bool online)
{
/*
@@ -2506,10 +3175,13 @@ void __init boot_cpu_hotplug_init(void)
{
#ifdef CONFIG_SMP
cpumask_set_cpu(smp_processor_id(), &cpus_booted_once_mask);
+ atomic_set(this_cpu_ptr(&cpuhp_state.ap_sync_state), SYNC_STATE_ONLINE);
#endif
this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);
+ this_cpu_write(cpuhp_state.target, CPUHP_ONLINE);
}
+#ifdef CONFIG_CPU_MITIGATIONS
/*
* These are used for a global "mitigations=" cmdline option for toggling
* optional CPU mitigations.
@@ -2520,8 +3192,7 @@ enum cpu_mitigations {
CPU_MITIGATIONS_AUTO_NOSMT,
};
-static enum cpu_mitigations cpu_mitigations __ro_after_init =
- CPU_MITIGATIONS_AUTO;
+static enum cpu_mitigations cpu_mitigations __ro_after_init = CPU_MITIGATIONS_AUTO;
static int __init mitigations_parse_cmdline(char *arg)
{
@@ -2537,7 +3208,6 @@ static int __init mitigations_parse_cmdline(char *arg)
return 0;
}
-early_param("mitigations", mitigations_parse_cmdline);
/* mitigations=off */
bool cpu_mitigations_off(void)
@@ -2552,3 +3222,11 @@ bool cpu_mitigations_auto_nosmt(void)
return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT;
}
EXPORT_SYMBOL_GPL(cpu_mitigations_auto_nosmt);
+#else
+static int __init mitigations_parse_cmdline(char *arg)
+{
+ pr_crit("Kernel compiled without mitigations, ignoring 'mitigations'; system may still be vulnerable\n");
+ return 0;
+}
+#endif
+early_param("mitigations", mitigations_parse_cmdline);