diff options
Diffstat (limited to 'kernel/cpu.c')
-rw-r--r-- | kernel/cpu.c | 1130 |
1 files changed, 904 insertions, 226 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c index 6ff2578ecf17..07455d25329c 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -17,6 +17,7 @@ #include <linux/cpu.h> #include <linux/oom.h> #include <linux/rcupdate.h> +#include <linux/delay.h> #include <linux/export.h> #include <linux/bug.h> #include <linux/kthread.h> @@ -31,7 +32,11 @@ #include <linux/smpboot.h> #include <linux/relay.h> #include <linux/slab.h> +#include <linux/scs.h> #include <linux/percpu-rwsem.h> +#include <linux/cpuset.h> +#include <linux/random.h> +#include <linux/cc_platform.h> #include <trace/events/power.h> #define CREATE_TRACE_POINTS @@ -40,16 +45,21 @@ #include "smpboot.h" /** - * cpuhp_cpu_state - Per cpu hotplug state storage + * struct cpuhp_cpu_state - Per cpu hotplug state storage * @state: The current cpu state * @target: The target state + * @fail: Current CPU hotplug callback state * @thread: Pointer to the hotplug thread * @should_run: Thread should execute * @rollback: Perform a rollback * @single: Single callback invocation * @bringup: Single callback bringup or teardown selector + * @node: Remote CPU node; for multi-instance, do a + * single entry callback for install/remove + * @last: For multi-instance rollback, remember how far we got * @cb_state: The state for a single callback (install/uninstall) * @result: Result of the operation + * @ap_sync_state: State for AP synchronization * @done_up: Signal completion to the issuer of the task for cpu-up * @done_down: Signal completion to the issuer of the task for cpu-down */ @@ -67,6 +77,7 @@ struct cpuhp_cpu_state { struct hlist_node *last; enum cpuhp_state cb_state; int result; + atomic_t ap_sync_state; struct completion done_up; struct completion done_down; #endif @@ -104,11 +115,12 @@ static inline void cpuhp_lock_release(bool bringup) { } #endif /** - * cpuhp_step - Hotplug state machine step + * struct cpuhp_step - Hotplug state machine step * @name: Name of the step * @startup: Startup function of the step * @teardown: Teardown function of the step * @cant_stop: Bringup/teardown can't be stopped at this step + * @multi_instance: State has multiple instances which get added afterwards */ struct cpuhp_step { const char *name; @@ -122,7 +134,9 @@ struct cpuhp_step { int (*multi)(unsigned int cpu, struct hlist_node *node); } teardown; + /* private: */ struct hlist_head list; + /* public: */ bool cant_stop; bool multi_instance; }; @@ -135,8 +149,13 @@ static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state) return cpuhp_hp_states + state; } +static bool cpuhp_step_empty(bool bringup, struct cpuhp_step *step) +{ + return bringup ? !step->startup.single : !step->teardown.single; +} + /** - * cpuhp_invoke_callback _ Invoke the callbacks for a given state + * cpuhp_invoke_callback - Invoke the callbacks for a given state * @cpu: The cpu for which the callback should be invoked * @state: The state to do callbacks for * @bringup: True if the bringup callback should be invoked @@ -144,6 +163,8 @@ static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state) * @lastp: For multi-instance rollback, remember how far we got * * Called from cpu hotplug and from the state register machinery. + * + * Return: %0 on success or a negative errno code */ static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state, bool bringup, struct hlist_node *node, @@ -157,26 +178,24 @@ static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state, if (st->fail == state) { st->fail = CPUHP_INVALID; - - if (!(bringup ? step->startup.single : step->teardown.single)) - return 0; - return -EAGAIN; } + if (cpuhp_step_empty(bringup, step)) { + WARN_ON_ONCE(1); + return 0; + } + if (!step->multi_instance) { WARN_ON_ONCE(lastp && *lastp); cb = bringup ? step->startup.single : step->teardown.single; - if (!cb) - return 0; + trace_cpuhp_enter(cpu, st->target, state, cb); ret = cb(cpu); trace_cpuhp_exit(cpu, st->state, state, ret); return ret; } cbm = bringup ? step->startup.multi : step->teardown.multi; - if (!cbm) - return 0; /* Single invocation for instance add/remove */ if (node) { @@ -259,6 +278,182 @@ static bool cpuhp_is_atomic_state(enum cpuhp_state state) return CPUHP_AP_IDLE_DEAD <= state && state < CPUHP_AP_ONLINE; } +/* Synchronization state management */ +enum cpuhp_sync_state { + SYNC_STATE_DEAD, + SYNC_STATE_KICKED, + SYNC_STATE_SHOULD_DIE, + SYNC_STATE_ALIVE, + SYNC_STATE_SHOULD_ONLINE, + SYNC_STATE_ONLINE, +}; + +#ifdef CONFIG_HOTPLUG_CORE_SYNC +/** + * cpuhp_ap_update_sync_state - Update synchronization state during bringup/teardown + * @state: The synchronization state to set + * + * No synchronization point. Just update of the synchronization state, but implies + * a full barrier so that the AP changes are visible before the control CPU proceeds. + */ +static inline void cpuhp_ap_update_sync_state(enum cpuhp_sync_state state) +{ + atomic_t *st = this_cpu_ptr(&cpuhp_state.ap_sync_state); + + (void)atomic_xchg(st, state); +} + +void __weak arch_cpuhp_sync_state_poll(void) { cpu_relax(); } + +static bool cpuhp_wait_for_sync_state(unsigned int cpu, enum cpuhp_sync_state state, + enum cpuhp_sync_state next_state) +{ + atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu); + ktime_t now, end, start = ktime_get(); + int sync; + + end = start + 10ULL * NSEC_PER_SEC; + + sync = atomic_read(st); + while (1) { + if (sync == state) { + if (!atomic_try_cmpxchg(st, &sync, next_state)) + continue; + return true; + } + + now = ktime_get(); + if (now > end) { + /* Timeout. Leave the state unchanged */ + return false; + } else if (now - start < NSEC_PER_MSEC) { + /* Poll for one millisecond */ + arch_cpuhp_sync_state_poll(); + } else { + usleep_range(USEC_PER_MSEC, 2 * USEC_PER_MSEC); + } + sync = atomic_read(st); + } + return true; +} +#else /* CONFIG_HOTPLUG_CORE_SYNC */ +static inline void cpuhp_ap_update_sync_state(enum cpuhp_sync_state state) { } +#endif /* !CONFIG_HOTPLUG_CORE_SYNC */ + +#ifdef CONFIG_HOTPLUG_CORE_SYNC_DEAD +/** + * cpuhp_ap_report_dead - Update synchronization state to DEAD + * + * No synchronization point. Just update of the synchronization state. + */ +void cpuhp_ap_report_dead(void) +{ + cpuhp_ap_update_sync_state(SYNC_STATE_DEAD); +} + +void __weak arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) { } + +/* + * Late CPU shutdown synchronization point. Cannot use cpuhp_state::done_down + * because the AP cannot issue complete() at this stage. + */ +static void cpuhp_bp_sync_dead(unsigned int cpu) +{ + atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu); + int sync = atomic_read(st); + + do { + /* CPU can have reported dead already. Don't overwrite that! */ + if (sync == SYNC_STATE_DEAD) + break; + } while (!atomic_try_cmpxchg(st, &sync, SYNC_STATE_SHOULD_DIE)); + + if (cpuhp_wait_for_sync_state(cpu, SYNC_STATE_DEAD, SYNC_STATE_DEAD)) { + /* CPU reached dead state. Invoke the cleanup function */ + arch_cpuhp_cleanup_dead_cpu(cpu); + return; + } + + /* No further action possible. Emit message and give up. */ + pr_err("CPU%u failed to report dead state\n", cpu); +} +#else /* CONFIG_HOTPLUG_CORE_SYNC_DEAD */ +static inline void cpuhp_bp_sync_dead(unsigned int cpu) { } +#endif /* !CONFIG_HOTPLUG_CORE_SYNC_DEAD */ + +#ifdef CONFIG_HOTPLUG_CORE_SYNC_FULL +/** + * cpuhp_ap_sync_alive - Synchronize AP with the control CPU once it is alive + * + * Updates the AP synchronization state to SYNC_STATE_ALIVE and waits + * for the BP to release it. + */ +void cpuhp_ap_sync_alive(void) +{ + atomic_t *st = this_cpu_ptr(&cpuhp_state.ap_sync_state); + + cpuhp_ap_update_sync_state(SYNC_STATE_ALIVE); + + /* Wait for the control CPU to release it. */ + while (atomic_read(st) != SYNC_STATE_SHOULD_ONLINE) + cpu_relax(); +} + +static bool cpuhp_can_boot_ap(unsigned int cpu) +{ + atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu); + int sync = atomic_read(st); + +again: + switch (sync) { + case SYNC_STATE_DEAD: + /* CPU is properly dead */ + break; + case SYNC_STATE_KICKED: + /* CPU did not come up in previous attempt */ + break; + case SYNC_STATE_ALIVE: + /* CPU is stuck cpuhp_ap_sync_alive(). */ + break; + default: + /* CPU failed to report online or dead and is in limbo state. */ + return false; + } + + /* Prepare for booting */ + if (!atomic_try_cmpxchg(st, &sync, SYNC_STATE_KICKED)) + goto again; + + return true; +} + +void __weak arch_cpuhp_cleanup_kick_cpu(unsigned int cpu) { } + +/* + * Early CPU bringup synchronization point. Cannot use cpuhp_state::done_up + * because the AP cannot issue complete() so early in the bringup. + */ +static int cpuhp_bp_sync_alive(unsigned int cpu) +{ + int ret = 0; + + if (!IS_ENABLED(CONFIG_HOTPLUG_CORE_SYNC_FULL)) + return 0; + + if (!cpuhp_wait_for_sync_state(cpu, SYNC_STATE_ALIVE, SYNC_STATE_SHOULD_ONLINE)) { + pr_err("CPU%u failed to report alive state\n", cpu); + ret = -EIO; + } + + /* Let the architecture cleanup the kick alive mechanics. */ + arch_cpuhp_cleanup_kick_cpu(cpu); + return ret; +} +#else /* CONFIG_HOTPLUG_CORE_SYNC_FULL */ +static inline int cpuhp_bp_sync_alive(unsigned int cpu) { return 0; } +static inline bool cpuhp_can_boot_ap(unsigned int cpu) { return true; } +#endif /* !CONFIG_HOTPLUG_CORE_SYNC_FULL */ + /* Serializes the updates to cpu_online_mask, cpu_present_mask */ static DEFINE_MUTEX(cpu_add_remove_lock); bool cpuhp_tasks_frozen; @@ -288,6 +483,8 @@ static int cpu_hotplug_disabled; DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock); +static bool cpu_hotplug_offline_disabled __ro_after_init; + void cpus_read_lock(void) { percpu_down_read(&cpu_hotplug_lock); @@ -330,6 +527,13 @@ void lockdep_assert_cpus_held(void) percpu_rwsem_assert_held(&cpu_hotplug_lock); } +#ifdef CONFIG_LOCKDEP +int lockdep_is_cpus_held(void) +{ + return percpu_rwsem_is_held(&cpu_hotplug_lock); +} +#endif + static void lockdep_acquire_cpus_lock(void) { rwsem_acquire(&cpu_hotplug_lock.dep_map, 0, 0, _THIS_IP_); @@ -340,6 +544,14 @@ static void lockdep_release_cpus_lock(void) rwsem_release(&cpu_hotplug_lock.dep_map, _THIS_IP_); } +/* Declare CPU offlining not supported */ +void cpu_hotplug_disable_offlining(void) +{ + cpu_maps_update_begin(); + cpu_hotplug_offline_disabled = true; + cpu_maps_update_done(); +} + /* * Wait for currently running CPU hotplug operations to complete (if any) and * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects @@ -389,7 +601,10 @@ static void lockdep_release_cpus_lock(void) void __weak arch_smt_update(void) { } #ifdef CONFIG_HOTPLUG_SMT + enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED; +static unsigned int cpu_smt_max_threads __ro_after_init; +unsigned int cpu_smt_num_threads __read_mostly = UINT_MAX; void __init cpu_smt_disable(bool force) { @@ -403,16 +618,33 @@ void __init cpu_smt_disable(bool force) pr_info("SMT: disabled\n"); cpu_smt_control = CPU_SMT_DISABLED; } + cpu_smt_num_threads = 1; } /* * The decision whether SMT is supported can only be done after the full * CPU identification. Called from architecture code. */ -void __init cpu_smt_check_topology(void) +void __init cpu_smt_set_num_threads(unsigned int num_threads, + unsigned int max_threads) { - if (!topology_smt_supported()) + WARN_ON(!num_threads || (num_threads > max_threads)); + + if (max_threads == 1) cpu_smt_control = CPU_SMT_NOT_SUPPORTED; + + cpu_smt_max_threads = max_threads; + + /* + * If SMT has been disabled via the kernel command line or SMT is + * not supported, set cpu_smt_num_threads to 1 for consistency. + * If enabled, take the architecture requested number of threads + * to bring up into account. + */ + if (cpu_smt_control != CPU_SMT_ENABLED) + cpu_smt_num_threads = 1; + else if (num_threads < cpu_smt_num_threads) + cpu_smt_num_threads = num_threads; } static int __init smt_cmdline_disable(char *str) @@ -422,9 +654,31 @@ static int __init smt_cmdline_disable(char *str) } early_param("nosmt", smt_cmdline_disable); -static inline bool cpu_smt_allowed(unsigned int cpu) +/* + * For Archicture supporting partial SMT states check if the thread is allowed. + * Otherwise this has already been checked through cpu_smt_max_threads when + * setting the SMT level. + */ +static inline bool cpu_smt_thread_allowed(unsigned int cpu) { - if (cpu_smt_control == CPU_SMT_ENABLED) +#ifdef CONFIG_SMT_NUM_THREADS_DYNAMIC + return topology_smt_thread_allowed(cpu); +#else + return true; +#endif +} + +static inline bool cpu_bootable(unsigned int cpu) +{ + if (cpu_smt_control == CPU_SMT_ENABLED && cpu_smt_thread_allowed(cpu)) + return true; + + /* All CPUs are bootable if controls are not configured */ + if (cpu_smt_control == CPU_SMT_NOT_IMPLEMENTED) + return true; + + /* All CPUs are bootable if CPU is not SMT capable */ + if (cpu_smt_control == CPU_SMT_NOT_SUPPORTED) return true; if (topology_is_primary_thread(cpu)) @@ -439,35 +693,51 @@ static inline bool cpu_smt_allowed(unsigned int cpu) return !cpumask_test_cpu(cpu, &cpus_booted_once_mask); } -/* Returns true if SMT is not supported of forcefully (irreversibly) disabled */ +/* Returns true if SMT is supported and not forcefully (irreversibly) disabled */ bool cpu_smt_possible(void) { return cpu_smt_control != CPU_SMT_FORCE_DISABLED && cpu_smt_control != CPU_SMT_NOT_SUPPORTED; } EXPORT_SYMBOL_GPL(cpu_smt_possible); + #else -static inline bool cpu_smt_allowed(unsigned int cpu) { return true; } +static inline bool cpu_bootable(unsigned int cpu) { return true; } #endif static inline enum cpuhp_state -cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target) +cpuhp_set_state(int cpu, struct cpuhp_cpu_state *st, enum cpuhp_state target) { enum cpuhp_state prev_state = st->state; + bool bringup = st->state < target; st->rollback = false; st->last = NULL; st->target = target; st->single = false; - st->bringup = st->state < target; + st->bringup = bringup; + if (cpu_dying(cpu) != !bringup) + set_cpu_dying(cpu, !bringup); return prev_state; } static inline void -cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state) +cpuhp_reset_state(int cpu, struct cpuhp_cpu_state *st, + enum cpuhp_state prev_state) { + bool bringup = !st->bringup; + + st->target = prev_state; + + /* + * Already rolling back. No need invert the bringup value or to change + * the current state. + */ + if (st->rollback) + return; + st->rollback = true; /* @@ -481,8 +751,9 @@ cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state) st->state++; } - st->target = prev_state; - st->bringup = !st->bringup; + st->bringup = bringup; + if (cpu_dying(cpu) != !bringup) + set_cpu_dying(cpu, !bringup); } /* Regular hotplug invocation of the AP hotplug thread */ @@ -502,22 +773,23 @@ static void __cpuhp_kick_ap(struct cpuhp_cpu_state *st) wait_for_ap_thread(st, st->bringup); } -static int cpuhp_kick_ap(struct cpuhp_cpu_state *st, enum cpuhp_state target) +static int cpuhp_kick_ap(int cpu, struct cpuhp_cpu_state *st, + enum cpuhp_state target) { enum cpuhp_state prev_state; int ret; - prev_state = cpuhp_set_state(st, target); + prev_state = cpuhp_set_state(cpu, st, target); __cpuhp_kick_ap(st); if ((ret = st->result)) { - cpuhp_reset_state(st, prev_state); + cpuhp_reset_state(cpu, st, prev_state); __cpuhp_kick_ap(st); } return ret; } -static int bringup_wait_for_ap(unsigned int cpu) +static int bringup_wait_for_ap_online(unsigned int cpu) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); @@ -533,37 +805,99 @@ static int bringup_wait_for_ap(unsigned int cpu) * SMT soft disabling on X86 requires to bring the CPU out of the * BIOS 'wait for SIPI' state in order to set the CR4.MCE bit. The * CPU marked itself as booted_once in notify_cpu_starting() so the - * cpu_smt_allowed() check will now return false if this is not the + * cpu_bootable() check will now return false if this is not the * primary sibling. */ - if (!cpu_smt_allowed(cpu)) + if (!cpu_bootable(cpu)) return -ECANCELED; + return 0; +} + +#ifdef CONFIG_HOTPLUG_SPLIT_STARTUP +static int cpuhp_kick_ap_alive(unsigned int cpu) +{ + if (!cpuhp_can_boot_ap(cpu)) + return -EAGAIN; + + return arch_cpuhp_kick_ap_alive(cpu, idle_thread_get(cpu)); +} + +static int cpuhp_bringup_ap(unsigned int cpu) +{ + struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); + int ret; + + /* + * Some architectures have to walk the irq descriptors to + * setup the vector space for the cpu which comes online. + * Prevent irq alloc/free across the bringup. + */ + irq_lock_sparse(); + + ret = cpuhp_bp_sync_alive(cpu); + if (ret) + goto out_unlock; + + ret = bringup_wait_for_ap_online(cpu); + if (ret) + goto out_unlock; + + irq_unlock_sparse(); if (st->target <= CPUHP_AP_ONLINE_IDLE) return 0; - return cpuhp_kick_ap(st, st->target); -} + return cpuhp_kick_ap(cpu, st, st->target); +out_unlock: + irq_unlock_sparse(); + return ret; +} +#else static int bringup_cpu(unsigned int cpu) { + struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); struct task_struct *idle = idle_thread_get(cpu); int ret; + if (!cpuhp_can_boot_ap(cpu)) + return -EAGAIN; + /* * Some architectures have to walk the irq descriptors to * setup the vector space for the cpu which comes online. - * Prevent irq alloc/free across the bringup. + * + * Prevent irq alloc/free across the bringup by acquiring the + * sparse irq lock. Hold it until the upcoming CPU completes the + * startup in cpuhp_online_idle() which allows to avoid + * intermediate synchronization points in the architecture code. */ irq_lock_sparse(); - /* Arch-specific enabling code. */ ret = __cpu_up(cpu, idle); - irq_unlock_sparse(); if (ret) - return ret; - return bringup_wait_for_ap(cpu); + goto out_unlock; + + ret = cpuhp_bp_sync_alive(cpu); + if (ret) + goto out_unlock; + + ret = bringup_wait_for_ap_online(cpu); + if (ret) + goto out_unlock; + + irq_unlock_sparse(); + + if (st->target <= CPUHP_AP_ONLINE_IDLE) + return 0; + + return cpuhp_kick_ap(cpu, st, st->target); + +out_unlock: + irq_unlock_sparse(); + return ret; } +#endif static int finish_cpu(unsigned int cpu) { @@ -571,12 +905,13 @@ static int finish_cpu(unsigned int cpu) struct mm_struct *mm = idle->active_mm; /* - * idle_task_exit() will have switched to &init_mm, now - * clean up any remaining active_mm state. + * sched_force_init_mm() ensured the use of &init_mm, + * drop that refcount now that the CPU has stopped. */ - if (mm != &init_mm) - idle->active_mm = &init_mm; - mmdrop(mm); + WARN_ON(mm != &init_mm); + idle->active_mm = NULL; + mmdrop_lazy_tlb(mm); + return 0; } @@ -584,10 +919,83 @@ static int finish_cpu(unsigned int cpu) * Hotplug state machine related functions */ -static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st) +/* + * Get the next state to run. Empty ones will be skipped. Returns true if a + * state must be run. + * + * st->state will be modified ahead of time, to match state_to_run, as if it + * has already ran. + */ +static bool cpuhp_next_state(bool bringup, + enum cpuhp_state *state_to_run, + struct cpuhp_cpu_state *st, + enum cpuhp_state target) +{ + do { + if (bringup) { + if (st->state >= target) + return false; + + *state_to_run = ++st->state; + } else { + if (st->state <= target) + return false; + + *state_to_run = st->state--; + } + + if (!cpuhp_step_empty(bringup, cpuhp_get_step(*state_to_run))) + break; + } while (true); + + return true; +} + +static int __cpuhp_invoke_callback_range(bool bringup, + unsigned int cpu, + struct cpuhp_cpu_state *st, + enum cpuhp_state target, + bool nofail) +{ + enum cpuhp_state state; + int ret = 0; + + while (cpuhp_next_state(bringup, &state, st, target)) { + int err; + + err = cpuhp_invoke_callback(cpu, state, bringup, NULL, NULL); + if (!err) + continue; + + if (nofail) { + pr_warn("CPU %u %s state %s (%d) failed (%d)\n", + cpu, bringup ? "UP" : "DOWN", + cpuhp_get_step(st->state)->name, + st->state, err); + ret = -1; + } else { + ret = err; + break; + } + } + + return ret; +} + +static inline int cpuhp_invoke_callback_range(bool bringup, + unsigned int cpu, + struct cpuhp_cpu_state *st, + enum cpuhp_state target) { - for (st->state--; st->state > st->target; st->state--) - cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL); + return __cpuhp_invoke_callback_range(bringup, cpu, st, target, false); +} + +static inline void cpuhp_invoke_callback_range_nofail(bool bringup, + unsigned int cpu, + struct cpuhp_cpu_state *st, + enum cpuhp_state target) +{ + __cpuhp_invoke_callback_range(bringup, cpu, st, target, true); } static inline bool can_rollback_cpu(struct cpuhp_cpu_state *st) @@ -610,16 +1018,16 @@ static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st, enum cpuhp_state prev_state = st->state; int ret = 0; - while (st->state < target) { - st->state++; - ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL); - if (ret) { - if (can_rollback_cpu(st)) { - st->target = prev_state; - undo_cpu_up(cpu, st); - } - break; - } + ret = cpuhp_invoke_callback_range(true, cpu, st, target); + if (ret) { + pr_debug("CPU UP failed (%d) CPU %u state %s (%d)\n", + ret, cpu, cpuhp_get_step(st->state)->name, + st->state); + + cpuhp_reset_state(cpu, st, prev_state); + if (can_rollback_cpu(st)) + WARN_ON(cpuhp_invoke_callback_range(false, cpu, st, + prev_state)); } return ret; } @@ -627,14 +1035,6 @@ static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st, /* * The cpu hotplug threads manage the bringup and teardown of the cpus */ -static void cpuhp_create(unsigned int cpu) -{ - struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); - - init_completion(&st->done_up); - init_completion(&st->done_down); -} - static int cpuhp_should_run(unsigned int cpu) { struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state); @@ -683,17 +1083,9 @@ static void cpuhp_thread_fun(unsigned int cpu) state = st->cb_state; st->should_run = false; } else { - if (bringup) { - st->state++; - state = st->state; - st->should_run = (st->state < st->target); - WARN_ON_ONCE(st->state > st->target); - } else { - state = st->state; - st->state--; - st->should_run = (st->state > st->target); - WARN_ON_ONCE(st->state < st->target); - } + st->should_run = cpuhp_next_state(bringup, &state, st, st->target); + if (!st->should_run) + goto end; } WARN_ON_ONCE(!cpuhp_is_ap_state(state)); @@ -721,6 +1113,7 @@ static void cpuhp_thread_fun(unsigned int cpu) st->should_run = false; } +end: cpuhp_lock_release(bringup); lockdep_release_cpus_lock(); @@ -793,7 +1186,7 @@ static int cpuhp_kick_ap_work(unsigned int cpu) cpuhp_lock_release(true); trace_cpuhp_enter(cpu, st->target, prev_state, cpuhp_kick_ap_work); - ret = cpuhp_kick_ap(st, st->target); + ret = cpuhp_kick_ap(cpu, st, st->target); trace_cpuhp_exit(cpu, st->state, prev_state, ret); return ret; @@ -801,20 +1194,36 @@ static int cpuhp_kick_ap_work(unsigned int cpu) static struct smp_hotplug_thread cpuhp_threads = { .store = &cpuhp_state.thread, - .create = &cpuhp_create, .thread_should_run = cpuhp_should_run, .thread_fn = cpuhp_thread_fun, .thread_comm = "cpuhp/%u", .selfparking = true, }; +static __init void cpuhp_init_state(void) +{ + struct cpuhp_cpu_state *st; + int cpu; + + for_each_possible_cpu(cpu) { + st = per_cpu_ptr(&cpuhp_state, cpu); + init_completion(&st->done_up); + init_completion(&st->done_down); + } +} + void __init cpuhp_threads_init(void) { + cpuhp_init_state(); BUG_ON(smpboot_register_percpu_thread(&cpuhp_threads)); kthread_unpark(this_cpu_read(cpuhp_state.thread)); } #ifdef CONFIG_HOTPLUG_CPU +#ifndef arch_clear_mm_cpumask_cpu +#define arch_clear_mm_cpumask_cpu(cpu, mm) cpumask_clear_cpu(cpu, mm_cpumask(mm)) +#endif + /** * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU * @cpu: a CPU id @@ -850,7 +1259,7 @@ void clear_tasks_mm_cpumask(int cpu) t = find_lock_task_mm(p); if (!t) continue; - cpumask_clear_cpu(cpu, mm_cpumask(t->mm)); + arch_clear_mm_cpumask_cpu(cpu, t->mm); task_unlock(t); } rcu_read_unlock(); @@ -862,7 +1271,6 @@ static int take_cpu_down(void *_param) struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state); enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE); int err, cpu = smp_processor_id(); - int ret; /* Ensure this CPU doesn't handle any more interrupts. */ err = __cpu_disable(); @@ -870,24 +1278,16 @@ static int take_cpu_down(void *_param) return err; /* - * We get here while we are in CPUHP_TEARDOWN_CPU state and we must not - * do this step again. + * Must be called from CPUHP_TEARDOWN_CPU, which means, as we are going + * down, that the current state is CPUHP_TEARDOWN_CPU - 1. */ - WARN_ON(st->state != CPUHP_TEARDOWN_CPU); - st->state--; - /* Invoke the former CPU_DYING callbacks */ - for (; st->state > target; st->state--) { - ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL); - /* - * DYING must not fail! - */ - WARN_ON_ONCE(ret); - } + WARN_ON(st->state != (CPUHP_TEARDOWN_CPU - 1)); + + /* + * Invoke the former CPU_DYING callbacks. DYING must not fail! + */ + cpuhp_invoke_callback_range_nofail(false, cpu, st, target); - /* Give up timekeeping duties */ - tick_handover_do_timer(); - /* Remove CPU from timer broadcasting */ - tick_offline_cpu(cpu); /* Park the stopper thread */ stop_machine_park(cpu); return 0; @@ -899,7 +1299,7 @@ static int takedown_cpu(unsigned int cpu) int err; /* Park the smpboot threads */ - kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread); + kthread_park(st->thread); /* * Prevent irq alloc/free while the dying cpu reorganizes the @@ -915,7 +1315,7 @@ static int takedown_cpu(unsigned int cpu) /* CPU refused to die */ irq_unlock_sparse(); /* Unpark the hotplug thread so we can rollback there */ - kthread_unpark(per_cpu_ptr(&cpuhp_state, cpu)->thread); + kthread_unpark(st->thread); return err; } BUG_ON(cpu_online(cpu)); @@ -937,8 +1337,17 @@ static int takedown_cpu(unsigned int cpu) /* This actually kills the CPU. */ __cpu_die(cpu); - tick_cleanup_dead_cpu(cpu); + cpuhp_bp_sync_dead(cpu); + + lockdep_cleanup_dead_cpu(cpu, idle_thread_get(cpu)); + + /* + * Callbacks must be re-integrated right away to the RCU state machine. + * Otherwise an RCU callback could block a further teardown function + * waiting for its completion. + */ rcutree_migrate_callbacks(cpu); + return 0; } @@ -954,37 +1363,36 @@ void cpuhp_report_idle_dead(void) struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state); BUG_ON(st->state != CPUHP_AP_OFFLINE); - rcu_report_dead(smp_processor_id()); + tick_assert_timekeeping_handover(); + rcutree_report_cpu_dead(); st->state = CPUHP_AP_IDLE_DEAD; /* - * We cannot call complete after rcu_report_dead() so we delegate it + * We cannot call complete after rcutree_report_cpu_dead() so we delegate it * to an online cpu. */ smp_call_function_single(cpumask_first(cpu_online_mask), cpuhp_complete_idle_dead, st, 0); } -static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st) -{ - for (st->state++; st->state < st->target; st->state++) - cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL); -} - static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st, enum cpuhp_state target) { enum cpuhp_state prev_state = st->state; int ret = 0; - for (; st->state > target; st->state--) { - ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL); - if (ret) { - st->target = prev_state; - if (st->state < prev_state) - undo_cpu_down(cpu, st); - break; - } + ret = cpuhp_invoke_callback_range(false, cpu, st, target); + if (ret) { + pr_debug("CPU DOWN failed (%d) CPU %u state %s (%d)\n", + ret, cpu, cpuhp_get_step(st->state)->name, + st->state); + + cpuhp_reset_state(cpu, st, prev_state); + + if (st->state < prev_state) + WARN_ON(cpuhp_invoke_callback_range(true, cpu, st, + prev_state)); } + return ret; } @@ -1005,7 +1413,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, cpuhp_tasks_frozen = tasks_frozen; - prev_state = cpuhp_set_state(st, target); + prev_state = cpuhp_set_state(cpu, st, target); /* * If the current CPU state is in the range of the AP hotplug thread, * then we need to kick the thread. @@ -1034,9 +1442,13 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, * to do the further cleanups. */ ret = cpuhp_down_callbacks(cpu, st, target); - if (ret && st->state == CPUHP_TEARDOWN_CPU && st->state < prev_state) { - cpuhp_reset_state(st, prev_state); - __cpuhp_kick_ap(st); + if (ret && st->state < prev_state) { + if (st->state == CPUHP_TEARDOWN_CPU) { + cpuhp_reset_state(cpu, st, prev_state); + __cpuhp_kick_ap(st); + } else { + WARN(1, "DEAD callback error for CPU%d", cpu); + } } out: @@ -1050,11 +1462,42 @@ out: return ret; } +struct cpu_down_work { + unsigned int cpu; + enum cpuhp_state target; +}; + +static long __cpu_down_maps_locked(void *arg) +{ + struct cpu_down_work *work = arg; + + return _cpu_down(work->cpu, 0, work->target); +} + static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target) { + struct cpu_down_work work = { .cpu = cpu, .target = target, }; + + /* + * If the platform does not support hotplug, report it explicitly to + * differentiate it from a transient offlining failure. + */ + if (cpu_hotplug_offline_disabled) + return -EOPNOTSUPP; if (cpu_hotplug_disabled) return -EBUSY; - return _cpu_down(cpu, 0, target); + + /* + * Ensure that the control task does not run on the to be offlined + * CPU to prevent a deadlock against cfs_b->period_timer. + * Also keep at least one housekeeping cpu onlined to avoid generating + * an empty sched_domain span. + */ + for_each_cpu_and(cpu, cpu_online_mask, housekeeping_cpumask(HK_TYPE_DOMAIN)) { + if (cpu != work.cpu) + return work_on_cpu(cpu, __cpu_down_maps_locked, &work); + } + return -EBUSY; } static int cpu_down(unsigned int cpu, enum cpuhp_state target) @@ -1074,6 +1517,8 @@ static int cpu_down(unsigned int cpu, enum cpuhp_state target) * This function is meant to be used by device core cpu subsystem only. * * Other subsystems should use remove_cpu() instead. + * + * Return: %0 on success or a negative errno code */ int cpu_device_down(struct device *dev) { @@ -1149,18 +1594,14 @@ void notify_cpu_starting(unsigned int cpu) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE); - int ret; - rcu_cpu_starting(cpu); /* Enables RCU usage on this CPU. */ + rcutree_report_cpu_starting(cpu); /* Enables RCU usage on this CPU. */ cpumask_set_cpu(cpu, &cpus_booted_once_mask); - while (st->state < target) { - st->state++; - ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL); - /* - * STARTING must not fail! - */ - WARN_ON_ONCE(ret); - } + + /* + * STARTING must not fail! + */ + cpuhp_invoke_callback_range_nofail(true, cpu, st, target); } /* @@ -1176,8 +1617,10 @@ void cpuhp_online_idle(enum cpuhp_state state) if (state != CPUHP_AP_ONLINE_IDLE) return; + cpuhp_ap_update_sync_state(SYNC_STATE_ONLINE); + /* - * Unpart the stopper thread before we start the idle loop (and start + * Unpark the stopper thread before we start the idle loop (and start * scheduling); this ensures the stopper task is always available. */ stop_machine_unpark(smp_processor_id()); @@ -1214,11 +1657,17 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target) ret = PTR_ERR(idle); goto out; } + + /* + * Reset stale stack state from the last time this CPU was online. + */ + scs_task_reset(idle); + kasan_unpoison_task_stack(idle); } cpuhp_tasks_frozen = tasks_frozen; - cpuhp_set_state(st, target); + cpuhp_set_state(cpu, st, target); /* * If the current CPU state is in the range of the AP hotplug thread, * then we need to kick the thread once more. @@ -1253,9 +1702,6 @@ static int cpu_up(unsigned int cpu, enum cpuhp_state target) if (!cpu_possible(cpu)) { pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n", cpu); -#if defined(CONFIG_IA64) - pr_err("please check additional_cpus= boot parameter\n"); -#endif return -EINVAL; } @@ -1269,7 +1715,7 @@ static int cpu_up(unsigned int cpu, enum cpuhp_state target) err = -EBUSY; goto out; } - if (!cpu_smt_allowed(cpu)) { + if (!cpu_bootable(cpu)) { err = -EPERM; goto out; } @@ -1287,6 +1733,8 @@ out: * This function is meant to be used by device core cpu subsystem only. * * Other subsystems should use add_cpu() instead. + * + * Return: %0 on success or a negative errno code */ int cpu_device_up(struct device *dev) { @@ -1312,6 +1760,8 @@ EXPORT_SYMBOL_GPL(add_cpu); * On some architectures like arm64, we can hibernate on any CPU, but on * wake up the CPU we hibernated on might be offline as a side effect of * using maxcpus= for example. + * + * Return: %0 on success or a negative errno code */ int bringup_hibernate_cpu(unsigned int sleep_cpu) { @@ -1328,18 +1778,125 @@ int bringup_hibernate_cpu(unsigned int sleep_cpu) return 0; } -void bringup_nonboot_cpus(unsigned int setup_max_cpus) +static void __init cpuhp_bringup_mask(const struct cpumask *mask, unsigned int ncpus, + enum cpuhp_state target) { unsigned int cpu; - for_each_present_cpu(cpu) { - if (num_online_cpus() >= setup_max_cpus) + for_each_cpu(cpu, mask) { + struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); + + if (cpu_up(cpu, target) && can_rollback_cpu(st)) { + /* + * If this failed then cpu_up() might have only + * rolled back to CPUHP_BP_KICK_AP for the final + * online. Clean it up. NOOP if already rolled back. + */ + WARN_ON(cpuhp_invoke_callback_range(false, cpu, st, CPUHP_OFFLINE)); + } + + if (!--ncpus) break; - if (!cpu_online(cpu)) - cpu_up(cpu, CPUHP_ONLINE); } } +#ifdef CONFIG_HOTPLUG_PARALLEL +static bool __cpuhp_parallel_bringup __ro_after_init = true; + +static int __init parallel_bringup_parse_param(char *arg) +{ + return kstrtobool(arg, &__cpuhp_parallel_bringup); +} +early_param("cpuhp.parallel", parallel_bringup_parse_param); + +#ifdef CONFIG_HOTPLUG_SMT +static inline bool cpuhp_smt_aware(void) +{ + return cpu_smt_max_threads > 1; +} + +static inline const struct cpumask *cpuhp_get_primary_thread_mask(void) +{ + return cpu_primary_thread_mask; +} +#else +static inline bool cpuhp_smt_aware(void) +{ + return false; +} +static inline const struct cpumask *cpuhp_get_primary_thread_mask(void) +{ + return cpu_none_mask; +} +#endif + +bool __weak arch_cpuhp_init_parallel_bringup(void) +{ + return true; +} + +/* + * On architectures which have enabled parallel bringup this invokes all BP + * prepare states for each of the to be onlined APs first. The last state + * sends the startup IPI to the APs. The APs proceed through the low level + * bringup code in parallel and then wait for the control CPU to release + * them one by one for the final onlining procedure. + * + * This avoids waiting for each AP to respond to the startup IPI in + * CPUHP_BRINGUP_CPU. + */ +static bool __init cpuhp_bringup_cpus_parallel(unsigned int ncpus) +{ + const struct cpumask *mask = cpu_present_mask; + + if (__cpuhp_parallel_bringup) + __cpuhp_parallel_bringup = arch_cpuhp_init_parallel_bringup(); + if (!__cpuhp_parallel_bringup) + return false; + + if (cpuhp_smt_aware()) { + const struct cpumask *pmask = cpuhp_get_primary_thread_mask(); + static struct cpumask tmp_mask __initdata; + + /* + * X86 requires to prevent that SMT siblings stopped while + * the primary thread does a microcode update for various + * reasons. Bring the primary threads up first. + */ + cpumask_and(&tmp_mask, mask, pmask); + cpuhp_bringup_mask(&tmp_mask, ncpus, CPUHP_BP_KICK_AP); + cpuhp_bringup_mask(&tmp_mask, ncpus, CPUHP_ONLINE); + /* Account for the online CPUs */ + ncpus -= num_online_cpus(); + if (!ncpus) + return true; + /* Create the mask for secondary CPUs */ + cpumask_andnot(&tmp_mask, mask, pmask); + mask = &tmp_mask; + } + + /* Bring the not-yet started CPUs up */ + cpuhp_bringup_mask(mask, ncpus, CPUHP_BP_KICK_AP); + cpuhp_bringup_mask(mask, ncpus, CPUHP_ONLINE); + return true; +} +#else +static inline bool cpuhp_bringup_cpus_parallel(unsigned int ncpus) { return false; } +#endif /* CONFIG_HOTPLUG_PARALLEL */ + +void __init bringup_nonboot_cpus(unsigned int max_cpus) +{ + if (!max_cpus) + return; + + /* Try parallel bringup optimization if enabled */ + if (cpuhp_bringup_cpus_parallel(max_cpus)) + return; + + /* Full per CPU serialized bringup */ + cpuhp_bringup_mask(cpu_present_mask, max_cpus, CPUHP_ONLINE); +} + #ifdef CONFIG_PM_SLEEP_SMP static cpumask_var_t frozen_cpus; @@ -1350,8 +1907,8 @@ int freeze_secondary_cpus(int primary) cpu_maps_update_begin(); if (primary == -1) { primary = cpumask_first(cpu_online_mask); - if (!housekeeping_cpu(primary, HK_FLAG_TIMER)) - primary = housekeeping_any_cpu(HK_FLAG_TIMER); + if (!housekeeping_cpu(primary, HK_TYPE_TIMER)) + primary = housekeeping_any_cpu(HK_TYPE_TIMER); } else { if (!cpu_online(primary)) primary = cpumask_first(cpu_online_mask); @@ -1364,8 +1921,8 @@ int freeze_secondary_cpus(int primary) cpumask_clear(frozen_cpus); pr_info("Disabling non-boot CPUs ...\n"); - for_each_online_cpu(cpu) { - if (cpu == primary) + for (cpu = nr_cpu_ids - 1; cpu >= 0; cpu--) { + if (!cpu_online(cpu) || cpu == primary) continue; if (pm_wakeup_pending()) { @@ -1521,6 +2078,11 @@ static struct cpuhp_step cpuhp_hp_states[] = { .startup.single = perf_event_init_cpu, .teardown.single = perf_event_exit_cpu, }, + [CPUHP_RANDOM_PREPARE] = { + .name = "random:prepare", + .startup.single = random_prepare_cpu, + .teardown.single = NULL, + }, [CPUHP_WORKQUEUE_PREP] = { .name = "workqueue:prepare", .startup.single = workqueue_prepare_cpu, @@ -1529,7 +2091,7 @@ static struct cpuhp_step cpuhp_hp_states[] = { [CPUHP_HRTIMERS_PREPARE] = { .name = "hrtimers:prepare", .startup.single = hrtimers_prepare_cpu, - .teardown.single = hrtimers_dead_cpu, + .teardown.single = NULL, }, [CPUHP_SMPCFD_PREPARE] = { .name = "smpcfd:prepare", @@ -1541,11 +2103,6 @@ static struct cpuhp_step cpuhp_hp_states[] = { .startup.single = relay_prepare_cpu, .teardown.single = NULL, }, - [CPUHP_SLAB_PREPARE] = { - .name = "slab:prepare", - .startup.single = slab_prepare_cpu, - .teardown.single = slab_dead_cpu, - }, [CPUHP_RCUTREE_PREP] = { .name = "RCU/tree:prepare", .startup.single = rcutree_prepare_cpu, @@ -1561,13 +2118,38 @@ static struct cpuhp_step cpuhp_hp_states[] = { .startup.single = timers_prepare_cpu, .teardown.single = timers_dead_cpu, }, - /* Kicks the plugged cpu into life */ + +#ifdef CONFIG_HOTPLUG_SPLIT_STARTUP + /* + * Kicks the AP alive. AP will wait in cpuhp_ap_sync_alive() until + * the next step will release it. + */ + [CPUHP_BP_KICK_AP] = { + .name = "cpu:kick_ap", + .startup.single = cpuhp_kick_ap_alive, + }, + + /* + * Waits for the AP to reach cpuhp_ap_sync_alive() and then + * releases it for the complete bringup. + */ + [CPUHP_BRINGUP_CPU] = { + .name = "cpu:bringup", + .startup.single = cpuhp_bringup_ap, + .teardown.single = finish_cpu, + .cant_stop = true, + }, +#else + /* + * All-in-one CPU bringup state which includes the kick alive. + */ [CPUHP_BRINGUP_CPU] = { .name = "cpu:bringup", .startup.single = bringup_cpu, .teardown.single = finish_cpu, .cant_stop = true, }, +#endif /* Final state before CPU kills itself */ [CPUHP_AP_IDLE_DEAD] = { .name = "idle:dead", @@ -1596,13 +2178,23 @@ static struct cpuhp_step cpuhp_hp_states[] = { .startup.single = NULL, .teardown.single = smpcfd_dying_cpu, }, + [CPUHP_AP_HRTIMERS_DYING] = { + .name = "hrtimers:dying", + .startup.single = hrtimers_cpu_starting, + .teardown.single = hrtimers_cpu_dying, + }, + [CPUHP_AP_TICK_DYING] = { + .name = "tick:dying", + .startup.single = NULL, + .teardown.single = tick_cpu_dying, + }, /* Entry state on starting. Interrupts enabled from here on. Transient * state for synchronsization */ [CPUHP_AP_ONLINE] = { .name = "ap:online", }, /* - * Handled on controll processor until the plugged processor manages + * Handled on control processor until the plugged processor manages * this itself. */ [CPUHP_TEARDOWN_CPU] = { @@ -1611,6 +2203,13 @@ static struct cpuhp_step cpuhp_hp_states[] = { .teardown.single = takedown_cpu, .cant_stop = true, }, + + [CPUHP_AP_SCHED_WAIT_EMPTY] = { + .name = "sched:waitempty", + .startup.single = NULL, + .teardown.single = sched_cpu_wait_empty, + }, + /* Handle smpboot threads park/unpark */ [CPUHP_AP_SMPBOOT_THREADS] = { .name = "smpboot/threads:online", @@ -1637,6 +2236,11 @@ static struct cpuhp_step cpuhp_hp_states[] = { .startup.single = workqueue_online_cpu, .teardown.single = workqueue_offline_cpu, }, + [CPUHP_AP_RANDOM_ONLINE] = { + .name = "random:online", + .startup.single = random_online_cpu, + .teardown.single = NULL, + }, [CPUHP_AP_RCUTREE_ONLINE] = { .name = "RCU/tree:online", .startup.single = rcutree_online_cpu, @@ -1759,8 +2363,7 @@ static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup, * If there's nothing to do, we done. * Relies on the union for multi_instance. */ - if ((bringup && !sp->startup.single) || - (!bringup && !sp->teardown.single)) + if (cpuhp_step_empty(bringup, sp)) return 0; /* * The non AP bound callbacks can fail on bringup. On teardown @@ -1862,6 +2465,7 @@ EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance); /** * __cpuhp_setup_state_cpuslocked - Setup the callbacks for an hotplug machine state * @state: The state to setup + * @name: Name of the step * @invoke: If true, the startup function is invoked for cpus where * cpu state >= @state * @startup: startup callback function @@ -1870,9 +2474,9 @@ EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance); * added afterwards. * * The caller needs to hold cpus read locked while calling this function. - * Returns: + * Return: * On success: - * Positive state number if @state is CPUHP_AP_ONLINE_DYN + * Positive state number if @state is CPUHP_AP_ONLINE_DYN or CPUHP_BP_PREPARE_DYN; * 0 for all other states * On failure: proper (negative) error code */ @@ -1895,7 +2499,7 @@ int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state, ret = cpuhp_store_callbacks(state, name, startup, teardown, multi_instance); - dynstate = state == CPUHP_AP_ONLINE_DYN; + dynstate = state == CPUHP_AP_ONLINE_DYN || state == CPUHP_BP_PREPARE_DYN; if (ret > 0 && dynstate) { state = ret; ret = 0; @@ -1926,8 +2530,8 @@ int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state, out: mutex_unlock(&cpuhp_state_mutex); /* - * If the requested state is CPUHP_AP_ONLINE_DYN, return the - * dynamically allocated state in case of success. + * If the requested state is CPUHP_AP_ONLINE_DYN or CPUHP_BP_PREPARE_DYN, + * return the dynamically allocated state in case of success. */ if (!ret && dynstate) return state; @@ -2072,6 +2676,12 @@ int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) for_each_online_cpu(cpu) { if (topology_is_primary_thread(cpu)) continue; + /* + * Disable can be called with CPU_SMT_ENABLED when changing + * from a higher to lower number of SMT threads per core. + */ + if (ctrlval == CPU_SMT_ENABLED && cpu_smt_thread_allowed(cpu)) + continue; ret = cpu_down_maps_locked(cpu, CPUHP_OFFLINE); if (ret) break; @@ -2096,6 +2706,14 @@ int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) return ret; } +/* Check if the core a CPU belongs to is online */ +#if !defined(topology_is_core_online) +static inline bool topology_is_core_online(unsigned int cpu) +{ + return true; +} +#endif + int cpuhp_smt_enable(void) { int cpu, ret = 0; @@ -2106,6 +2724,8 @@ int cpuhp_smt_enable(void) /* Skip online CPUs and CPUs on offline nodes */ if (cpu_online(cpu) || !node_online(cpu_to_node(cpu))) continue; + if (!cpu_smt_thread_allowed(cpu) || !topology_is_core_online(cpu)) + continue; ret = _cpu_up(cpu, 0, CPUHP_ONLINE); if (ret) break; @@ -2118,18 +2738,17 @@ int cpuhp_smt_enable(void) #endif #if defined(CONFIG_SYSFS) && defined(CONFIG_HOTPLUG_CPU) -static ssize_t show_cpuhp_state(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t state_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id); return sprintf(buf, "%d\n", st->state); } -static DEVICE_ATTR(state, 0444, show_cpuhp_state, NULL); +static DEVICE_ATTR_RO(state); -static ssize_t write_cpuhp_target(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t target_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id); struct cpuhp_step *sp; @@ -2160,26 +2779,26 @@ static ssize_t write_cpuhp_target(struct device *dev, if (st->state < target) ret = cpu_up(dev->id, target); - else + else if (st->state > target) ret = cpu_down(dev->id, target); + else if (WARN_ON(st->target != target)) + st->target = target; out: unlock_device_hotplug(); return ret ? ret : count; } -static ssize_t show_cpuhp_target(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t target_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id); return sprintf(buf, "%d\n", st->target); } -static DEVICE_ATTR(target, 0644, show_cpuhp_target, write_cpuhp_target); +static DEVICE_ATTR_RW(target); - -static ssize_t write_cpuhp_fail(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t fail_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id); struct cpuhp_step *sp; @@ -2189,6 +2808,11 @@ static ssize_t write_cpuhp_fail(struct device *dev, if (ret) return ret; + if (fail == CPUHP_INVALID) { + st->fail = fail; + return count; + } + if (fail < CPUHP_OFFLINE || fail > CPUHP_ONLINE) return -EINVAL; @@ -2199,6 +2823,15 @@ static ssize_t write_cpuhp_fail(struct device *dev, return -EINVAL; /* + * DEAD callbacks cannot fail... + * ... neither can CPUHP_BRINGUP_CPU during hotunplug. The latter + * triggering STARTING callbacks, a failure in this state would + * hinder rollback. + */ + if (fail <= CPUHP_BRINGUP_CPU && st->state > CPUHP_BRINGUP_CPU) + return -EINVAL; + + /* * Cannot fail anything that doesn't have callbacks. */ mutex_lock(&cpuhp_state_mutex); @@ -2214,15 +2847,15 @@ static ssize_t write_cpuhp_fail(struct device *dev, return count; } -static ssize_t show_cpuhp_fail(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t fail_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id); return sprintf(buf, "%d\n", st->fail); } -static DEVICE_ATTR(fail, 0644, show_cpuhp_fail, write_cpuhp_fail); +static DEVICE_ATTR_RW(fail); static struct attribute *cpuhp_cpu_attrs[] = { &dev_attr_state.attr, @@ -2234,10 +2867,9 @@ static struct attribute *cpuhp_cpu_attrs[] = { static const struct attribute_group cpuhp_cpu_attr_group = { .attrs = cpuhp_cpu_attrs, .name = "hotplug", - NULL }; -static ssize_t show_cpuhp_states(struct device *dev, +static ssize_t states_show(struct device *dev, struct device_attribute *attr, char *buf) { ssize_t cur, res = 0; @@ -2256,7 +2888,7 @@ static ssize_t show_cpuhp_states(struct device *dev, mutex_unlock(&cpuhp_state_mutex); return res; } -static DEVICE_ATTR(states, 0444, show_cpuhp_states, NULL); +static DEVICE_ATTR_RO(states); static struct attribute *cpuhp_cpu_root_attrs[] = { &dev_attr_states.attr, @@ -2266,25 +2898,23 @@ static struct attribute *cpuhp_cpu_root_attrs[] = { static const struct attribute_group cpuhp_cpu_root_attr_group = { .attrs = cpuhp_cpu_root_attrs, .name = "hotplug", - NULL }; #ifdef CONFIG_HOTPLUG_SMT +static bool cpu_smt_num_threads_valid(unsigned int threads) +{ + if (IS_ENABLED(CONFIG_SMT_NUM_THREADS_DYNAMIC)) + return threads >= 1 && threads <= cpu_smt_max_threads; + return threads == 1 || threads == cpu_smt_max_threads; +} + static ssize_t __store_smt_control(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - int ctrlval, ret; - - if (sysfs_streq(buf, "on")) - ctrlval = CPU_SMT_ENABLED; - else if (sysfs_streq(buf, "off")) - ctrlval = CPU_SMT_DISABLED; - else if (sysfs_streq(buf, "forceoff")) - ctrlval = CPU_SMT_FORCE_DISABLED; - else - return -EINVAL; + int ctrlval, ret, num_threads, orig_threads; + bool force_off; if (cpu_smt_control == CPU_SMT_FORCE_DISABLED) return -EPERM; @@ -2292,21 +2922,39 @@ __store_smt_control(struct device *dev, struct device_attribute *attr, if (cpu_smt_control == CPU_SMT_NOT_SUPPORTED) return -ENODEV; + if (sysfs_streq(buf, "on")) { + ctrlval = CPU_SMT_ENABLED; + num_threads = cpu_smt_max_threads; + } else if (sysfs_streq(buf, "off")) { + ctrlval = CPU_SMT_DISABLED; + num_threads = 1; + } else if (sysfs_streq(buf, "forceoff")) { + ctrlval = CPU_SMT_FORCE_DISABLED; + num_threads = 1; + } else if (kstrtoint(buf, 10, &num_threads) == 0) { + if (num_threads == 1) + ctrlval = CPU_SMT_DISABLED; + else if (cpu_smt_num_threads_valid(num_threads)) + ctrlval = CPU_SMT_ENABLED; + else + return -EINVAL; + } else { + return -EINVAL; + } + ret = lock_device_hotplug_sysfs(); if (ret) return ret; - if (ctrlval != cpu_smt_control) { - switch (ctrlval) { - case CPU_SMT_ENABLED: - ret = cpuhp_smt_enable(); - break; - case CPU_SMT_DISABLED: - case CPU_SMT_FORCE_DISABLED: - ret = cpuhp_smt_disable(ctrlval); - break; - } - } + orig_threads = cpu_smt_num_threads; + cpu_smt_num_threads = num_threads; + + force_off = ctrlval != cpu_smt_control && ctrlval == CPU_SMT_FORCE_DISABLED; + + if (num_threads > orig_threads) + ret = cpuhp_smt_enable(); + else if (num_threads < orig_threads || force_off) + ret = cpuhp_smt_disable(ctrlval); unlock_device_hotplug(); return ret ? ret : count; @@ -2329,28 +2977,38 @@ static const char *smt_states[] = { [CPU_SMT_NOT_IMPLEMENTED] = "notimplemented", }; -static ssize_t -show_smt_control(struct device *dev, struct device_attribute *attr, char *buf) +static ssize_t control_show(struct device *dev, + struct device_attribute *attr, char *buf) { const char *state = smt_states[cpu_smt_control]; - return snprintf(buf, PAGE_SIZE - 2, "%s\n", state); +#ifdef CONFIG_HOTPLUG_SMT + /* + * If SMT is enabled but not all threads are enabled then show the + * number of threads. If all threads are enabled show "on". Otherwise + * show the state name. + */ + if (cpu_smt_control == CPU_SMT_ENABLED && + cpu_smt_num_threads != cpu_smt_max_threads) + return sysfs_emit(buf, "%d\n", cpu_smt_num_threads); +#endif + + return sysfs_emit(buf, "%s\n", state); } -static ssize_t -store_smt_control(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t control_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) { return __store_smt_control(dev, attr, buf, count); } -static DEVICE_ATTR(control, 0644, show_smt_control, store_smt_control); +static DEVICE_ATTR_RW(control); -static ssize_t -show_smt_active(struct device *dev, struct device_attribute *attr, char *buf) +static ssize_t active_show(struct device *dev, + struct device_attribute *attr, char *buf) { - return snprintf(buf, PAGE_SIZE - 2, "%d\n", sched_smt_active()); + return sysfs_emit(buf, "%d\n", sched_smt_active()); } -static DEVICE_ATTR(active, 0444, show_smt_active, NULL); +static DEVICE_ATTR_RO(active); static struct attribute *cpuhp_smt_attrs[] = { &dev_attr_control.attr, @@ -2361,27 +3019,37 @@ static struct attribute *cpuhp_smt_attrs[] = { static const struct attribute_group cpuhp_smt_attr_group = { .attrs = cpuhp_smt_attrs, .name = "smt", - NULL }; static int __init cpu_smt_sysfs_init(void) { - return sysfs_create_group(&cpu_subsys.dev_root->kobj, - &cpuhp_smt_attr_group); + struct device *dev_root; + int ret = -ENODEV; + + dev_root = bus_get_dev_root(&cpu_subsys); + if (dev_root) { + ret = sysfs_create_group(&dev_root->kobj, &cpuhp_smt_attr_group); + put_device(dev_root); + } + return ret; } static int __init cpuhp_sysfs_init(void) { + struct device *dev_root; int cpu, ret; ret = cpu_smt_sysfs_init(); if (ret) return ret; - ret = sysfs_create_group(&cpu_subsys.dev_root->kobj, - &cpuhp_cpu_root_attr_group); - if (ret) - return ret; + dev_root = bus_get_dev_root(&cpu_subsys); + if (dev_root) { + ret = sysfs_create_group(&dev_root->kobj, &cpuhp_cpu_root_attr_group); + put_device(dev_root); + if (ret) + return ret; + } for_each_possible_cpu(cpu) { struct device *dev = get_cpu_device(cpu); @@ -2426,22 +3094,28 @@ const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL; EXPORT_SYMBOL(cpu_all_bits); #ifdef CONFIG_INIT_ALL_POSSIBLE -struct cpumask __cpu_possible_mask __read_mostly +struct cpumask __cpu_possible_mask __ro_after_init = {CPU_BITS_ALL}; #else -struct cpumask __cpu_possible_mask __read_mostly; +struct cpumask __cpu_possible_mask __ro_after_init; #endif EXPORT_SYMBOL(__cpu_possible_mask); struct cpumask __cpu_online_mask __read_mostly; EXPORT_SYMBOL(__cpu_online_mask); +struct cpumask __cpu_enabled_mask __read_mostly; +EXPORT_SYMBOL(__cpu_enabled_mask); + struct cpumask __cpu_present_mask __read_mostly; EXPORT_SYMBOL(__cpu_present_mask); struct cpumask __cpu_active_mask __read_mostly; EXPORT_SYMBOL(__cpu_active_mask); +struct cpumask __cpu_dying_mask __read_mostly; +EXPORT_SYMBOL(__cpu_dying_mask); + atomic_t __num_online_cpus __read_mostly; EXPORT_SYMBOL(__num_online_cpus); @@ -2455,11 +3129,6 @@ void init_cpu_possible(const struct cpumask *src) cpumask_copy(&__cpu_possible_mask, src); } -void init_cpu_online(const struct cpumask *src) -{ - cpumask_copy(&__cpu_online_mask, src); -} - void set_cpu_online(unsigned int cpu, bool online) { /* @@ -2506,10 +3175,13 @@ void __init boot_cpu_hotplug_init(void) { #ifdef CONFIG_SMP cpumask_set_cpu(smp_processor_id(), &cpus_booted_once_mask); + atomic_set(this_cpu_ptr(&cpuhp_state.ap_sync_state), SYNC_STATE_ONLINE); #endif this_cpu_write(cpuhp_state.state, CPUHP_ONLINE); + this_cpu_write(cpuhp_state.target, CPUHP_ONLINE); } +#ifdef CONFIG_CPU_MITIGATIONS /* * These are used for a global "mitigations=" cmdline option for toggling * optional CPU mitigations. @@ -2520,8 +3192,7 @@ enum cpu_mitigations { CPU_MITIGATIONS_AUTO_NOSMT, }; -static enum cpu_mitigations cpu_mitigations __ro_after_init = - CPU_MITIGATIONS_AUTO; +static enum cpu_mitigations cpu_mitigations __ro_after_init = CPU_MITIGATIONS_AUTO; static int __init mitigations_parse_cmdline(char *arg) { @@ -2537,7 +3208,6 @@ static int __init mitigations_parse_cmdline(char *arg) return 0; } -early_param("mitigations", mitigations_parse_cmdline); /* mitigations=off */ bool cpu_mitigations_off(void) @@ -2552,3 +3222,11 @@ bool cpu_mitigations_auto_nosmt(void) return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT; } EXPORT_SYMBOL_GPL(cpu_mitigations_auto_nosmt); +#else +static int __init mitigations_parse_cmdline(char *arg) +{ + pr_crit("Kernel compiled without mitigations, ignoring 'mitigations'; system may still be vulnerable\n"); + return 0; +} +#endif +early_param("mitigations", mitigations_parse_cmdline); |