From 5107e3ebb868d99872e1b64b054ccfcdad79d5d7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 12 May 2023 23:06:58 +0200 Subject: x86/smpboot: Cleanup topology_phys_to_logical_pkg()/die() Make topology_phys_to_logical_pkg_die() static as it's only used in smpboot.c and fixup the kernel-doc warnings for both functions. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Tested-by: Michael Kelley Tested-by: Oleksandr Natalenko Tested-by: Helge Deller # parisc Tested-by: Guilherme G. Piccoli # Steam Deck Link: https://lore.kernel.org/r/20230512205255.493750666@linutronix.de --- arch/x86/include/asm/topology.h | 3 --- arch/x86/kernel/smpboot.c | 10 ++++++---- 2 files changed, 6 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 458c891a8273..2ba57588e937 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -139,7 +139,6 @@ static inline int topology_max_smt_threads(void) int topology_update_package_map(unsigned int apicid, unsigned int cpu); int topology_update_die_map(unsigned int dieid, unsigned int cpu); int topology_phys_to_logical_pkg(unsigned int pkg); -int topology_phys_to_logical_die(unsigned int die, unsigned int cpu); bool topology_is_primary_thread(unsigned int cpu); bool topology_smt_supported(void); #else @@ -149,8 +148,6 @@ topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; } static inline int topology_update_die_map(unsigned int dieid, unsigned int cpu) { return 0; } static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; } -static inline int topology_phys_to_logical_die(unsigned int die, - unsigned int cpu) { return 0; } static inline int topology_max_die_per_package(void) { return 1; } static inline int topology_max_smt_threads(void) { return 1; } static inline bool topology_is_primary_thread(unsigned int cpu) { return true; } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 352f0ce1ece4..bca9692862f8 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -288,6 +288,7 @@ bool topology_smt_supported(void) /** * topology_phys_to_logical_pkg - Map a physical package id to a logical + * @phys_pkg: The physical package id to map * * Returns logical package id or -1 if not found */ @@ -304,15 +305,17 @@ int topology_phys_to_logical_pkg(unsigned int phys_pkg) return -1; } EXPORT_SYMBOL(topology_phys_to_logical_pkg); + /** * topology_phys_to_logical_die - Map a physical die id to logical + * @die_id: The physical die id to map + * @cur_cpu: The CPU for which the mapping is done * * Returns logical die id or -1 if not found */ -int topology_phys_to_logical_die(unsigned int die_id, unsigned int cur_cpu) +static int topology_phys_to_logical_die(unsigned int die_id, unsigned int cur_cpu) { - int cpu; - int proc_id = cpu_data(cur_cpu).phys_proc_id; + int cpu, proc_id = cpu_data(cur_cpu).phys_proc_id; for_each_possible_cpu(cpu) { struct cpuinfo_x86 *c = &cpu_data(cpu); @@ -323,7 +326,6 @@ int topology_phys_to_logical_die(unsigned int die_id, unsigned int cur_cpu) } return -1; } -EXPORT_SYMBOL(topology_phys_to_logical_die); /** * topology_update_package_map - Update the physical to logical package map -- cgit From ba831b7b1a517ba7f25d6fa9736a8092d07b0c74 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 12 May 2023 23:07:00 +0200 Subject: cpu/hotplug: Mark arch_disable_smp_support() and bringup_nonboot_cpus() __init No point in keeping them around. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Tested-by: Michael Kelley Tested-by: Oleksandr Natalenko Tested-by: Helge Deller # parisc Tested-by: Guilherme G. Piccoli # Steam Deck Link: https://lore.kernel.org/r/20230512205255.551974164@linutronix.de --- arch/x86/kernel/smpboot.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index bca9692862f8..8eb7721bae87 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1269,9 +1269,9 @@ unreg_nmi: } /** - * arch_disable_smp_support() - disables SMP support for x86 at runtime + * arch_disable_smp_support() - Disables SMP support for x86 at boottime */ -void arch_disable_smp_support(void) +void __init arch_disable_smp_support(void) { disable_ioapic_support(); } -- cgit From 134a12827bc59484c4d4a3ceabf178c831febbb8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 12 May 2023 23:07:01 +0200 Subject: x86/smpboot: Avoid pointless delay calibration if TSC is synchronized When TSC is synchronized across sockets then there is no reason to calibrate the delay for the first CPU which comes up on a socket. Just reuse the existing calibration value. This removes 100ms pointlessly wasted time from CPU hotplug per socket. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Tested-by: Michael Kelley Tested-by: Oleksandr Natalenko Tested-by: Helge Deller # parisc Tested-by: Guilherme G. Piccoli # Steam Deck Link: https://lore.kernel.org/r/20230512205255.608773568@linutronix.de --- arch/x86/kernel/smpboot.c | 40 +++++++++++++++++++++++++--------------- arch/x86/kernel/tsc.c | 20 ++++++++++++++++---- 2 files changed, 41 insertions(+), 19 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 8eb7721bae87..0ad902ac0258 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -178,28 +178,17 @@ static void smp_callin(void) */ apic_ap_setup(); - /* - * Save our processor parameters. Note: this information - * is needed for clock calibration. - */ + /* Save our processor parameters. */ smp_store_cpu_info(cpuid); /* * The topology information must be up to date before - * calibrate_delay() and notify_cpu_starting(). + * notify_cpu_starting(). */ set_cpu_sibling_map(raw_smp_processor_id()); ap_init_aperfmperf(); - /* - * Get our bogomips. - * Update loops_per_jiffy in cpu_data. Previous call to - * smp_store_cpu_info() stored a value that is close but not as - * accurate as the value just calculated. - */ - calibrate_delay(); - cpu_data(cpuid).loops_per_jiffy = loops_per_jiffy; pr_debug("Stack at about %p\n", &cpuid); wmb(); @@ -212,8 +201,24 @@ static void smp_callin(void) cpumask_set_cpu(cpuid, cpu_callin_mask); } +static void ap_calibrate_delay(void) +{ + /* + * Calibrate the delay loop and update loops_per_jiffy in cpu_data. + * smp_store_cpu_info() stored a value that is close but not as + * accurate as the value just calculated. + * + * As this is invoked after the TSC synchronization check, + * calibrate_delay_is_known() will skip the calibration routine + * when TSC is synchronized across sockets. + */ + calibrate_delay(); + cpu_data(smp_processor_id()).loops_per_jiffy = loops_per_jiffy; +} + static int cpu0_logical_apicid; static int enable_start_cpu0; + /* * Activate a secondary processor. */ @@ -240,10 +245,15 @@ static void notrace start_secondary(void *unused) /* otherwise gcc will move up smp_processor_id before the cpu_init */ barrier(); + /* Check TSC synchronization with the control CPU: */ + check_tsc_sync_target(); + /* - * Check TSC synchronization with the boot CPU: + * Calibrate the delay loop after the TSC synchronization check. + * This allows to skip the calibration when TSC is synchronized + * across sockets. */ - check_tsc_sync_target(); + ap_calibrate_delay(); speculative_store_bypass_ht_init(); diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 344698852146..1412b771651e 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1598,10 +1598,7 @@ void __init tsc_init(void) #ifdef CONFIG_SMP /* - * If we have a constant TSC and are using the TSC for the delay loop, - * we can skip clock calibration if another cpu in the same socket has already - * been calibrated. This assumes that CONSTANT_TSC applies to all - * cpus in the socket - this should be a safe assumption. + * Check whether existing calibration data can be reused. */ unsigned long calibrate_delay_is_known(void) { @@ -1609,6 +1606,21 @@ unsigned long calibrate_delay_is_known(void) int constant_tsc = cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC); const struct cpumask *mask = topology_core_cpumask(cpu); + /* + * If TSC has constant frequency and TSC is synchronized across + * sockets then reuse CPU0 calibration. + */ + if (constant_tsc && !tsc_unstable) + return cpu_data(0).loops_per_jiffy; + + /* + * If TSC has constant frequency and TSC is not synchronized across + * sockets and this is not the first CPU in the socket, then reuse + * the calibration value of an already online CPU on that socket. + * + * This assumes that CONSTANT_TSC is consistent for all CPUs in a + * socket. + */ if (!constant_tsc || !mask) return 0; -- cgit From 666e1156b2c514f045827f50263ed2eb9d78671b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 12 May 2023 23:07:03 +0200 Subject: x86/smpboot: Rename start_cpu0() to soft_restart_cpu() This is used in the SEV play_dead() implementation to re-online CPUs. But that has nothing to do with CPU0. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Tested-by: Michael Kelley Tested-by: Oleksandr Natalenko Tested-by: Helge Deller # parisc Tested-by: Guilherme G. Piccoli # Steam Deck Link: https://lore.kernel.org/r/20230512205255.662319599@linutronix.de --- arch/x86/include/asm/cpu.h | 2 +- arch/x86/kernel/callthunks.c | 2 +- arch/x86/kernel/head_32.S | 10 +++++----- arch/x86/kernel/head_64.S | 10 +++++----- arch/x86/kernel/sev.c | 2 +- 5 files changed, 13 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index 78796b98a544..b356464b883b 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -30,7 +30,7 @@ struct x86_cpu { #ifdef CONFIG_HOTPLUG_CPU extern int arch_register_cpu(int num); extern void arch_unregister_cpu(int); -extern void start_cpu0(void); +extern void soft_restart_cpu(void); #ifdef CONFIG_DEBUG_HOTPLUG_CPU0 extern int _debug_hotplug_cpu(int cpu, int action); #endif diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c index 22ab13966427..f5a623641361 100644 --- a/arch/x86/kernel/callthunks.c +++ b/arch/x86/kernel/callthunks.c @@ -134,7 +134,7 @@ static bool skip_addr(void *dest) if (dest == ret_from_fork) return true; #ifdef CONFIG_HOTPLUG_CPU - if (dest == start_cpu0) + if (dest == soft_restart_cpu) return true; #endif #ifdef CONFIG_FUNCTION_TRACER diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 67c8ed99144b..805abf181f95 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -140,16 +140,16 @@ SYM_CODE_END(startup_32) #ifdef CONFIG_HOTPLUG_CPU /* - * Boot CPU0 entry point. It's called from play_dead(). Everything has been set - * up already except stack. We just set up stack here. Then call - * start_secondary(). + * Entry point for soft restart of a CPU. Invoked from xxx_play_dead() for + * restarting the boot CPU or for restarting SEV guest CPUs after CPU hot + * unplug. Everything is set up already except the stack. */ -SYM_FUNC_START(start_cpu0) +SYM_FUNC_START(soft_restart_cpu) movl initial_stack, %ecx movl %ecx, %esp call *(initial_code) 1: jmp 1b -SYM_FUNC_END(start_cpu0) +SYM_FUNC_END(soft_restart_cpu) #endif /* diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index a5df3e994f04..fc86bc1b6ba4 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -377,11 +377,11 @@ SYM_CODE_END(secondary_startup_64) #ifdef CONFIG_HOTPLUG_CPU /* - * Boot CPU0 entry point. It's called from play_dead(). Everything has been set - * up already except stack. We just set up stack here. Then call - * start_secondary() via .Ljump_to_C_code. + * Entry point for soft restart of a CPU. Invoked from xxx_play_dead() for + * restarting the boot CPU or for restarting SEV guest CPUs after CPU hot + * unplug. Everything is set up already except the stack. */ -SYM_CODE_START(start_cpu0) +SYM_CODE_START(soft_restart_cpu) ANNOTATE_NOENDBR UNWIND_HINT_END_OF_STACK @@ -390,7 +390,7 @@ SYM_CODE_START(start_cpu0) movq TASK_threadsp(%rcx), %rsp jmp .Ljump_to_C_code -SYM_CODE_END(start_cpu0) +SYM_CODE_END(soft_restart_cpu) #endif #ifdef CONFIG_AMD_MEM_ENCRYPT diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index b031244d6d2d..d095a5d3c14a 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -1328,7 +1328,7 @@ static void sev_es_play_dead(void) * If we get here, the VCPU was woken up again. Jump to CPU * startup code to get it back online. */ - start_cpu0(); + soft_restart_cpu(); } #else /* CONFIG_HOTPLUG_CPU */ #define sev_es_play_dead native_play_dead -- cgit From e59e74dc48a309cb848ffc3d76a0d61aa6803c05 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 12 May 2023 23:07:04 +0200 Subject: x86/topology: Remove CPU0 hotplug option This was introduced together with commit e1c467e69040 ("x86, hotplug: Wake up CPU0 via NMI instead of INIT, SIPI, SIPI") to eventually support physical hotplug of CPU0: "We'll change this code in the future to wake up hard offlined CPU0 if real platform and request are available." 11 years later this has not happened and physical hotplug is not officially supported. Remove the cruft. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Tested-by: Michael Kelley Tested-by: Oleksandr Natalenko Tested-by: Helge Deller # parisc Tested-by: Guilherme G. Piccoli # Steam Deck Link: https://lore.kernel.org/r/20230512205255.715707999@linutronix.de --- arch/x86/Kconfig | 43 -------------------- arch/x86/include/asm/cpu.h | 3 -- arch/x86/kernel/topology.c | 98 ++-------------------------------------------- arch/x86/power/cpu.c | 37 ----------------- 4 files changed, 4 insertions(+), 177 deletions(-) (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 53bab123a8ee..d0191338a102 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2305,49 +2305,6 @@ config HOTPLUG_CPU def_bool y depends on SMP -config BOOTPARAM_HOTPLUG_CPU0 - bool "Set default setting of cpu0_hotpluggable" - depends on HOTPLUG_CPU - help - Set whether default state of cpu0_hotpluggable is on or off. - - Say Y here to enable CPU0 hotplug by default. If this switch - is turned on, there is no need to give cpu0_hotplug kernel - parameter and the CPU0 hotplug feature is enabled by default. - - Please note: there are two known CPU0 dependencies if you want - to enable the CPU0 hotplug feature either by this switch or by - cpu0_hotplug kernel parameter. - - First, resume from hibernate or suspend always starts from CPU0. - So hibernate and suspend are prevented if CPU0 is offline. - - Second dependency is PIC interrupts always go to CPU0. CPU0 can not - offline if any interrupt can not migrate out of CPU0. There may - be other CPU0 dependencies. - - Please make sure the dependencies are under your control before - you enable this feature. - - Say N if you don't want to enable CPU0 hotplug feature by default. - You still can enable the CPU0 hotplug feature at boot by kernel - parameter cpu0_hotplug. - -config DEBUG_HOTPLUG_CPU0 - def_bool n - prompt "Debug CPU0 hotplug" - depends on HOTPLUG_CPU - help - Enabling this option offlines CPU0 (if CPU0 can be offlined) as - soon as possible and boots up userspace with CPU0 offlined. User - can online CPU0 back after boot time. - - To debug CPU0 hotplug, you need to enable CPU0 offline/online - feature by either turning on CONFIG_BOOTPARAM_HOTPLUG_CPU0 during - compilation or giving cpu0_hotplug kernel parameter at boot. - - If unsure, say N. - config COMPAT_VDSO def_bool n prompt "Disable the 32-bit vDSO (needed for glibc 2.3.3)" diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index b356464b883b..c854376e1cb9 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -31,9 +31,6 @@ struct x86_cpu { extern int arch_register_cpu(int num); extern void arch_unregister_cpu(int); extern void soft_restart_cpu(void); -#ifdef CONFIG_DEBUG_HOTPLUG_CPU0 -extern int _debug_hotplug_cpu(int cpu, int action); -#endif #endif extern void ap_init_aperfmperf(void); diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c index 1b83377274b8..ca004e2e4469 100644 --- a/arch/x86/kernel/topology.c +++ b/arch/x86/kernel/topology.c @@ -38,102 +38,12 @@ static DEFINE_PER_CPU(struct x86_cpu, cpu_devices); #ifdef CONFIG_HOTPLUG_CPU - -#ifdef CONFIG_BOOTPARAM_HOTPLUG_CPU0 -static int cpu0_hotpluggable = 1; -#else -static int cpu0_hotpluggable; -static int __init enable_cpu0_hotplug(char *str) -{ - cpu0_hotpluggable = 1; - return 1; -} - -__setup("cpu0_hotplug", enable_cpu0_hotplug); -#endif - -#ifdef CONFIG_DEBUG_HOTPLUG_CPU0 -/* - * This function offlines a CPU as early as possible and allows userspace to - * boot up without the CPU. The CPU can be onlined back by user after boot. - * - * This is only called for debugging CPU offline/online feature. - */ -int _debug_hotplug_cpu(int cpu, int action) +int arch_register_cpu(int cpu) { - int ret; - - if (!cpu_is_hotpluggable(cpu)) - return -EINVAL; + struct x86_cpu *xc = per_cpu_ptr(&cpu_devices, cpu); - switch (action) { - case 0: - ret = remove_cpu(cpu); - if (!ret) - pr_info("DEBUG_HOTPLUG_CPU0: CPU %u is now offline\n", cpu); - else - pr_debug("Can't offline CPU%d.\n", cpu); - break; - case 1: - ret = add_cpu(cpu); - if (ret) - pr_debug("Can't online CPU%d.\n", cpu); - - break; - default: - ret = -EINVAL; - } - - return ret; -} - -static int __init debug_hotplug_cpu(void) -{ - _debug_hotplug_cpu(0, 0); - return 0; -} - -late_initcall_sync(debug_hotplug_cpu); -#endif /* CONFIG_DEBUG_HOTPLUG_CPU0 */ - -int arch_register_cpu(int num) -{ - struct cpuinfo_x86 *c = &cpu_data(num); - - /* - * Currently CPU0 is only hotpluggable on Intel platforms. Other - * vendors can add hotplug support later. - * Xen PV guests don't support CPU0 hotplug at all. - */ - if (c->x86_vendor != X86_VENDOR_INTEL || - cpu_feature_enabled(X86_FEATURE_XENPV)) - cpu0_hotpluggable = 0; - - /* - * Two known BSP/CPU0 dependencies: Resume from suspend/hibernate - * depends on BSP. PIC interrupts depend on BSP. - * - * If the BSP dependencies are under control, one can tell kernel to - * enable BSP hotplug. This basically adds a control file and - * one can attempt to offline BSP. - */ - if (num == 0 && cpu0_hotpluggable) { - unsigned int irq; - /* - * We won't take down the boot processor on i386 if some - * interrupts only are able to be serviced by the BSP in PIC. - */ - for_each_active_irq(irq) { - if (!IO_APIC_IRQ(irq) && irq_has_action(irq)) { - cpu0_hotpluggable = 0; - break; - } - } - } - if (num || cpu0_hotpluggable) - per_cpu(cpu_devices, num).cpu.hotpluggable = 1; - - return register_cpu(&per_cpu(cpu_devices, num).cpu, num); + xc->cpu.hotpluggable = cpu > 0; + return register_cpu(&xc->cpu, cpu); } EXPORT_SYMBOL(arch_register_cpu); diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 7a4d5e911415..63230ff8cf4f 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -351,43 +351,6 @@ static int bsp_pm_callback(struct notifier_block *nb, unsigned long action, case PM_HIBERNATION_PREPARE: ret = bsp_check(); break; -#ifdef CONFIG_DEBUG_HOTPLUG_CPU0 - case PM_RESTORE_PREPARE: - /* - * When system resumes from hibernation, online CPU0 because - * 1. it's required for resume and - * 2. the CPU was online before hibernation - */ - if (!cpu_online(0)) - _debug_hotplug_cpu(0, 1); - break; - case PM_POST_RESTORE: - /* - * When a resume really happens, this code won't be called. - * - * This code is called only when user space hibernation software - * prepares for snapshot device during boot time. So we just - * call _debug_hotplug_cpu() to restore to CPU0's state prior to - * preparing the snapshot device. - * - * This works for normal boot case in our CPU0 hotplug debug - * mode, i.e. CPU0 is offline and user mode hibernation - * software initializes during boot time. - * - * If CPU0 is online and user application accesses snapshot - * device after boot time, this will offline CPU0 and user may - * see different CPU0 state before and after accessing - * the snapshot device. But hopefully this is not a case when - * user debugging CPU0 hotplug. Even if users hit this case, - * they can easily online CPU0 back. - * - * To simplify this debug code, we only consider normal boot - * case. Otherwise we need to remember CPU0's state and restore - * to that state and resolve racy conditions etc. - */ - _debug_hotplug_cpu(0, 0); - break; -#endif default: break; } -- cgit From 5475abbde77f6d78a052a81e5d5de70e59f7181e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 12 May 2023 23:07:06 +0200 Subject: x86/smpboot: Remove the CPU0 hotplug kludge This was introduced with commit e1c467e69040 ("x86, hotplug: Wake up CPU0 via NMI instead of INIT, SIPI, SIPI") to eventually support physical hotplug of CPU0: "We'll change this code in the future to wake up hard offlined CPU0 if real platform and request are available." 11 years later this has not happened and physical hotplug is not officially supported. Remove the cruft. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Tested-by: Michael Kelley Tested-by: Oleksandr Natalenko Tested-by: Helge Deller # parisc Tested-by: Guilherme G. Piccoli # Steam Deck Link: https://lore.kernel.org/r/20230512205255.768845190@linutronix.de --- arch/x86/include/asm/apic.h | 1 - arch/x86/include/asm/smp.h | 1 - arch/x86/kernel/smpboot.c | 168 ++++---------------------------------------- 3 files changed, 13 insertions(+), 157 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 3216da7074ba..dc50ed7db447 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -377,7 +377,6 @@ extern struct apic *__apicdrivers[], *__apicdrivers_end[]; * APIC functionality to boot other CPUs - only used on SMP: */ #ifdef CONFIG_SMP -extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip); extern int lapic_can_unplug_cpu(void); #endif diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 4e91054c84be..0ec4c64a1981 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -130,7 +130,6 @@ void native_play_dead(void); void play_dead_common(void); void wbinvd_on_cpu(int cpu); int wbinvd_on_all_cpus(void); -void cond_wakeup_cpu0(void); void native_smp_send_reschedule(int cpu); void native_send_call_func_ipi(const struct cpumask *mask); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 0ad902ac0258..466e83a54572 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -216,9 +216,6 @@ static void ap_calibrate_delay(void) cpu_data(smp_processor_id()).loops_per_jiffy = loops_per_jiffy; } -static int cpu0_logical_apicid; -static int enable_start_cpu0; - /* * Activate a secondary processor. */ @@ -241,8 +238,6 @@ static void notrace start_secondary(void *unused) x86_cpuinit.early_percpu_clock_init(); smp_callin(); - enable_start_cpu0 = 0; - /* otherwise gcc will move up smp_processor_id before the cpu_init */ barrier(); /* Check TSC synchronization with the control CPU: */ @@ -410,7 +405,7 @@ void smp_store_cpu_info(int id) c->cpu_index = id; /* * During boot time, CPU0 has this setup already. Save the info when - * bringing up AP or offlined CPU0. + * bringing up an AP. */ identify_secondary_cpu(c); c->initialized = true; @@ -807,51 +802,14 @@ static void __init smp_quirk_init_udelay(void) } /* - * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal - * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this - * won't ... remember to clear down the APIC, etc later. + * Wake up AP by INIT, INIT, STARTUP sequence. */ -int -wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip) -{ - u32 dm = apic->dest_mode_logical ? APIC_DEST_LOGICAL : APIC_DEST_PHYSICAL; - unsigned long send_status, accept_status = 0; - int maxlvt; - - /* Target chip */ - /* Boot on the stack */ - /* Kick the second */ - apic_icr_write(APIC_DM_NMI | dm, apicid); - - pr_debug("Waiting for send to finish...\n"); - send_status = safe_apic_wait_icr_idle(); - - /* - * Give the other CPU some time to accept the IPI. - */ - udelay(200); - if (APIC_INTEGRATED(boot_cpu_apic_version)) { - maxlvt = lapic_get_maxlvt(); - if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ - apic_write(APIC_ESR, 0); - accept_status = (apic_read(APIC_ESR) & 0xEF); - } - pr_debug("NMI sent\n"); - - if (send_status) - pr_err("APIC never delivered???\n"); - if (accept_status) - pr_err("APIC delivery error (%lx)\n", accept_status); - - return (send_status | accept_status); -} - -static int -wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) +static int wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) { unsigned long send_status = 0, accept_status = 0; int maxlvt, num_starts, j; + preempt_disable(); maxlvt = lapic_get_maxlvt(); /* @@ -957,6 +915,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) if (accept_status) pr_err("APIC delivery error (%lx)\n", accept_status); + preempt_enable(); return (send_status | accept_status); } @@ -997,67 +956,6 @@ static void announce_cpu(int cpu, int apicid) node, cpu, apicid); } -static int wakeup_cpu0_nmi(unsigned int cmd, struct pt_regs *regs) -{ - int cpu; - - cpu = smp_processor_id(); - if (cpu == 0 && !cpu_online(cpu) && enable_start_cpu0) - return NMI_HANDLED; - - return NMI_DONE; -} - -/* - * Wake up AP by INIT, INIT, STARTUP sequence. - * - * Instead of waiting for STARTUP after INITs, BSP will execute the BIOS - * boot-strap code which is not a desired behavior for waking up BSP. To - * void the boot-strap code, wake up CPU0 by NMI instead. - * - * This works to wake up soft offlined CPU0 only. If CPU0 is hard offlined - * (i.e. physically hot removed and then hot added), NMI won't wake it up. - * We'll change this code in the future to wake up hard offlined CPU0 if - * real platform and request are available. - */ -static int -wakeup_cpu_via_init_nmi(int cpu, unsigned long start_ip, int apicid, - int *cpu0_nmi_registered) -{ - int id; - int boot_error; - - preempt_disable(); - - /* - * Wake up AP by INIT, INIT, STARTUP sequence. - */ - if (cpu) { - boot_error = wakeup_secondary_cpu_via_init(apicid, start_ip); - goto out; - } - - /* - * Wake up BSP by nmi. - * - * Register a NMI handler to help wake up CPU0. - */ - boot_error = register_nmi_handler(NMI_LOCAL, - wakeup_cpu0_nmi, 0, "wake_cpu0"); - - if (!boot_error) { - enable_start_cpu0 = 1; - *cpu0_nmi_registered = 1; - id = apic->dest_mode_logical ? cpu0_logical_apicid : apicid; - boot_error = wakeup_secondary_cpu_via_nmi(id, start_ip); - } - -out: - preempt_enable(); - - return boot_error; -} - int common_cpu_up(unsigned int cpu, struct task_struct *idle) { int ret; @@ -1086,8 +984,7 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle) * Returns zero if CPU booted OK, else error code from * ->wakeup_secondary_cpu. */ -static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle, - int *cpu0_nmi_registered) +static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) { /* start_ip had better be page-aligned! */ unsigned long start_ip = real_mode_header->trampoline_start; @@ -1120,7 +1017,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle, * This grunge runs the startup process for * the targeted processor. */ - if (x86_platform.legacy.warm_reset) { pr_debug("Setting warm reset code and vector.\n"); @@ -1149,15 +1045,14 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle, * - Use a method from the APIC driver if one defined, with wakeup * straight to 64-bit mode preferred over wakeup to RM. * Otherwise, - * - Use an INIT boot APIC message for APs or NMI for BSP. + * - Use an INIT boot APIC message */ if (apic->wakeup_secondary_cpu_64) boot_error = apic->wakeup_secondary_cpu_64(apicid, start_ip); else if (apic->wakeup_secondary_cpu) boot_error = apic->wakeup_secondary_cpu(apicid, start_ip); else - boot_error = wakeup_cpu_via_init_nmi(cpu, start_ip, apicid, - cpu0_nmi_registered); + boot_error = wakeup_secondary_cpu_via_init(apicid, start_ip); if (!boot_error) { /* @@ -1206,9 +1101,8 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle, int native_cpu_up(unsigned int cpu, struct task_struct *tidle) { int apicid = apic->cpu_present_to_apicid(cpu); - int cpu0_nmi_registered = 0; unsigned long flags; - int err, ret = 0; + int err; lockdep_assert_irqs_enabled(); @@ -1247,11 +1141,10 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle) if (err) return err; - err = do_boot_cpu(apicid, cpu, tidle, &cpu0_nmi_registered); + err = do_boot_cpu(apicid, cpu, tidle); if (err) { pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu); - ret = -EIO; - goto unreg_nmi; + return err; } /* @@ -1267,15 +1160,7 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle) touch_nmi_watchdog(); } -unreg_nmi: - /* - * Clean up the nmi handler. Do this after the callin and callout sync - * to avoid impact of possible long unregister time. - */ - if (cpu0_nmi_registered) - unregister_nmi_handler(NMI_LOCAL, "wake_cpu0"); - - return ret; + return 0; } /** @@ -1373,14 +1258,6 @@ static void __init smp_cpu_index_default(void) } } -static void __init smp_get_logical_apicid(void) -{ - if (x2apic_mode) - cpu0_logical_apicid = apic_read(APIC_LDR); - else - cpu0_logical_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); -} - void __init smp_prepare_cpus_common(void) { unsigned int i; @@ -1443,8 +1320,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) /* Setup local timer */ x86_init.timers.setup_percpu_clockev(); - smp_get_logical_apicid(); - pr_info("CPU0: "); print_cpu_info(&cpu_data(0)); @@ -1752,18 +1627,6 @@ void play_dead_common(void) local_irq_disable(); } -/** - * cond_wakeup_cpu0 - Wake up CPU0 if needed. - * - * If NMI wants to wake up CPU0, start CPU0. - */ -void cond_wakeup_cpu0(void) -{ - if (smp_processor_id() == 0 && enable_start_cpu0) - start_cpu0(); -} -EXPORT_SYMBOL_GPL(cond_wakeup_cpu0); - /* * We need to flush the caches before going to sleep, lest we have * dirty data in our caches when we come back up. @@ -1831,8 +1694,6 @@ static inline void mwait_play_dead(void) __monitor(mwait_ptr, 0, 0); mb(); __mwait(eax, 0); - - cond_wakeup_cpu0(); } } @@ -1841,11 +1702,8 @@ void __noreturn hlt_play_dead(void) if (__this_cpu_read(cpu_info.x86) >= 4) wbinvd(); - while (1) { + while (1) native_halt(); - - cond_wakeup_cpu0(); - } } void native_play_dead(void) -- cgit From cded367976587ed4d160ed7d6bb118992a8b82ab Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 12 May 2023 23:07:08 +0200 Subject: x86/smpboot: Restrict soft_restart_cpu() to SEV Now that the CPU0 hotplug cruft is gone, the only user is AMD SEV. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Tested-by: Michael Kelley Tested-by: Oleksandr Natalenko Tested-by: Helge Deller # parisc Tested-by: Guilherme G. Piccoli # Steam Deck Link: https://lore.kernel.org/r/20230512205255.822234014@linutronix.de --- arch/x86/kernel/callthunks.c | 2 +- arch/x86/kernel/head_32.S | 14 -------------- arch/x86/kernel/head_64.S | 2 +- 3 files changed, 2 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c index f5a623641361..8bb937331acb 100644 --- a/arch/x86/kernel/callthunks.c +++ b/arch/x86/kernel/callthunks.c @@ -133,7 +133,7 @@ static bool skip_addr(void *dest) /* Accounts directly */ if (dest == ret_from_fork) return true; -#ifdef CONFIG_HOTPLUG_CPU +#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_AMD_MEM_ENCRYPT) if (dest == soft_restart_cpu) return true; #endif diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 805abf181f95..c9318993f959 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -138,20 +138,6 @@ SYM_CODE_START(startup_32) jmp .Ldefault_entry SYM_CODE_END(startup_32) -#ifdef CONFIG_HOTPLUG_CPU -/* - * Entry point for soft restart of a CPU. Invoked from xxx_play_dead() for - * restarting the boot CPU or for restarting SEV guest CPUs after CPU hot - * unplug. Everything is set up already except the stack. - */ -SYM_FUNC_START(soft_restart_cpu) - movl initial_stack, %ecx - movl %ecx, %esp - call *(initial_code) -1: jmp 1b -SYM_FUNC_END(soft_restart_cpu) -#endif - /* * Non-boot CPU entry point; entered from trampoline.S * We can't lgdt here, because lgdt itself uses a data segment, but diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index fc86bc1b6ba4..8458033bb9f1 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -375,7 +375,7 @@ SYM_CODE_END(secondary_startup_64) #include "verify_cpu.S" #include "sev_verify_cbit.S" -#ifdef CONFIG_HOTPLUG_CPU +#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_AMD_MEM_ENCRYPT) /* * Entry point for soft restart of a CPU. Invoked from xxx_play_dead() for * restarting the boot CPU or for restarting SEV guest CPUs after CPU hot -- cgit From c7f15dd3f0e9f1d12d1ae21f0bbd61302ef3abcf Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 12 May 2023 23:07:09 +0200 Subject: x86/smpboot: Remove unnecessary barrier() Peter stumbled over the barrier() after the invocation of smp_callin() in start_secondary(): "...this barrier() and it's comment seem weird vs smp_callin(). That function ends with an atomic bitop (it has to, at the very least it must not be weaker than store-release) but also has an explicit wmb() to order setup vs CPU_STARTING. There is no way the smp_processor_id() referred to in this comment can land before cpu_init() even without the barrier()." The barrier() along with the comment was added in 2003 with commit d8f19f2cac70 ("[PATCH] x86-64 merge") in the history tree. One of those well documented combo patches of that time which changes world and some more. The context back then was: /* * Dont put anything before smp_callin(), SMP * booting is too fragile that we want to limit the * things done here to the most necessary things. */ cpu_init(); smp_callin(); + /* otherwise gcc will move up smp_processor_id before the cpu_init */ + barrier(); Dprintk("cpu %d: waiting for commence\n", smp_processor_id()); Even back in 2003 the compiler was not allowed to reorder that smp_processor_id() invocation before the cpu_init() function call. Especially not as smp_processor_id() resolved to: asm volatile("movl %%gs:%c1,%0":"=r" (ret__):"i"(pda_offset(field)):"memory"); There is no trace of this change in any mailing list archive including the back then official x86_64 list discuss@x86-64.org, which would explain the problem this change solved. The debug prints are gone by now and the the only smp_processor_id() invocation today is farther down in start_secondary() after locking vector_lock which itself prevents reordering. Even if the compiler would be allowed to reorder this, the code would still be correct as GSBASE is set up early in the assembly code and is valid when the CPU reaches start_secondary(), while the code at the time when this barrier was added did the GSBASE setup in cpu_init(). As the barrier has zero value, remove it. Reported-by: Peter Zijlstra Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Tested-by: Oleksandr Natalenko Tested-by: Helge Deller # parisc Tested-by: Guilherme G. Piccoli # Steam Deck Link: https://lore.kernel.org/r/20230512205255.875713771@linutronix.de --- arch/x86/kernel/smpboot.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 466e83a54572..f5f43284b4a0 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -238,8 +238,6 @@ static void notrace start_secondary(void *unused) x86_cpuinit.early_percpu_clock_init(); smp_callin(); - /* otherwise gcc will move up smp_processor_id before the cpu_init */ - barrier(); /* Check TSC synchronization with the control CPU: */ check_tsc_sync_target(); -- cgit From 2b3be65d2e4f4bb9358b039b99ad1a391dd3c311 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 12 May 2023 23:07:11 +0200 Subject: x86/smpboot: Split up native_cpu_up() into separate phases and document them There are four logical parts to what native_cpu_up() does on the BSP (or on the controlling CPU for a later hotplug): 1) Wake the AP by sending the INIT/SIPI/SIPI sequence. 2) Wait for the AP to make it as far as wait_for_master_cpu() which sets that CPU's bit in cpu_initialized_mask, then sets the bit in cpu_callout_mask to let the AP proceed through cpu_init(). 3) Wait for the AP to finish cpu_init() and get as far as the smp_callin() call, which sets that CPU's bit in cpu_callin_mask. 4) Perform the TSC synchronization and wait for the AP to actually mark itself online in cpu_online_mask. In preparation to allow these phases to operate in parallel on multiple APs, split them out into separate functions and document the interactions a little more clearly in both the BP and AP code paths. No functional change intended. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Tested-by: Michael Kelley Tested-by: Oleksandr Natalenko Tested-by: Helge Deller # parisc Tested-by: Guilherme G. Piccoli # Steam Deck Link: https://lore.kernel.org/r/20230512205255.928917242@linutronix.de --- arch/x86/kernel/smpboot.c | 184 ++++++++++++++++++++++++++++++---------------- 1 file changed, 119 insertions(+), 65 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f5f43284b4a0..0bd9c9f7861f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -193,6 +193,10 @@ static void smp_callin(void) wmb(); + /* + * This runs the AP through all the cpuhp states to its target + * state CPUHP_ONLINE. + */ notify_cpu_starting(cpuid); /* @@ -233,12 +237,28 @@ static void notrace start_secondary(void *unused) load_cr3(swapper_pg_dir); __flush_tlb_all(); #endif + /* + * Sync point with wait_cpu_initialized(). Before proceeding through + * cpu_init(), the AP will call wait_for_master_cpu() which sets its + * own bit in cpu_initialized_mask and then waits for the BSP to set + * its bit in cpu_callout_mask to release it. + */ cpu_init_secondary(); rcu_cpu_starting(raw_smp_processor_id()); x86_cpuinit.early_percpu_clock_init(); + + /* + * Sync point with wait_cpu_callin(). The AP doesn't wait here + * but just sets the bit to let the controlling CPU (BSP) know that + * it's got this far. + */ smp_callin(); - /* Check TSC synchronization with the control CPU: */ + /* + * Check TSC synchronization with the control CPU, which will do + * its part of this from wait_cpu_online(), making it an implicit + * synchronization point. + */ check_tsc_sync_target(); /* @@ -257,6 +277,7 @@ static void notrace start_secondary(void *unused) * half valid vector space. */ lock_vector_lock(); + /* Sync point with do_wait_cpu_online() */ set_cpu_online(smp_processor_id(), true); lapic_online(); unlock_vector_lock(); @@ -979,17 +1000,13 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle) /* * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad * (ie clustered apic addressing mode), this is a LOGICAL apic ID. - * Returns zero if CPU booted OK, else error code from + * Returns zero if startup was successfully sent, else error code from * ->wakeup_secondary_cpu. */ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) { - /* start_ip had better be page-aligned! */ unsigned long start_ip = real_mode_header->trampoline_start; - unsigned long boot_error = 0; - unsigned long timeout; - #ifdef CONFIG_X86_64 /* If 64-bit wakeup method exists, use the 64-bit mode trampoline IP */ if (apic->wakeup_secondary_cpu_64) @@ -1046,60 +1063,89 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) * - Use an INIT boot APIC message */ if (apic->wakeup_secondary_cpu_64) - boot_error = apic->wakeup_secondary_cpu_64(apicid, start_ip); + return apic->wakeup_secondary_cpu_64(apicid, start_ip); else if (apic->wakeup_secondary_cpu) - boot_error = apic->wakeup_secondary_cpu(apicid, start_ip); - else - boot_error = wakeup_secondary_cpu_via_init(apicid, start_ip); + return apic->wakeup_secondary_cpu(apicid, start_ip); - if (!boot_error) { - /* - * Wait 10s total for first sign of life from AP - */ - boot_error = -1; - timeout = jiffies + 10*HZ; - while (time_before(jiffies, timeout)) { - if (cpumask_test_cpu(cpu, cpu_initialized_mask)) { - /* - * Tell AP to proceed with initialization - */ - cpumask_set_cpu(cpu, cpu_callout_mask); - boot_error = 0; - break; - } - schedule(); - } - } + return wakeup_secondary_cpu_via_init(apicid, start_ip); +} - if (!boot_error) { - /* - * Wait till AP completes initial initialization - */ - while (!cpumask_test_cpu(cpu, cpu_callin_mask)) { - /* - * Allow other tasks to run while we wait for the - * AP to come online. This also gives a chance - * for the MTRR work(triggered by the AP coming online) - * to be completed in the stop machine context. - */ - schedule(); - } - } +static int wait_cpu_cpumask(unsigned int cpu, const struct cpumask *mask) +{ + unsigned long timeout; - if (x86_platform.legacy.warm_reset) { - /* - * Cleanup possible dangling ends... - */ - smpboot_restore_warm_reset_vector(); + /* + * Wait up to 10s for the CPU to report in. + */ + timeout = jiffies + 10*HZ; + while (time_before(jiffies, timeout)) { + if (cpumask_test_cpu(cpu, mask)) + return 0; + + schedule(); } + return -1; +} - return boot_error; +/* + * Bringup step two: Wait for the target AP to reach cpu_init_secondary() + * and thus wait_for_master_cpu(), then set cpu_callout_mask to allow it + * to proceed. The AP will then proceed past setting its 'callin' bit + * and end up waiting in check_tsc_sync_target() until we reach + * do_wait_cpu_online() to tend to it. + */ +static int wait_cpu_initialized(unsigned int cpu) +{ + /* + * Wait for first sign of life from AP. + */ + if (wait_cpu_cpumask(cpu, cpu_initialized_mask)) + return -1; + + cpumask_set_cpu(cpu, cpu_callout_mask); + return 0; } -int native_cpu_up(unsigned int cpu, struct task_struct *tidle) +/* + * Bringup step three: Wait for the target AP to reach smp_callin(). + * The AP is not waiting for us here so we don't need to parallelise + * this step. Not entirely clear why we care about this, since we just + * proceed directly to TSC synchronization which is the next sync + * point with the AP anyway. + */ +static void wait_cpu_callin(unsigned int cpu) +{ + while (!cpumask_test_cpu(cpu, cpu_callin_mask)) + schedule(); +} + +/* + * Bringup step four: Synchronize the TSC and wait for the target AP + * to reach set_cpu_online() in start_secondary(). + */ +static void wait_cpu_online(unsigned int cpu) { - int apicid = apic->cpu_present_to_apicid(cpu); unsigned long flags; + + /* + * Check TSC synchronization with the AP (keep irqs disabled + * while doing so): + */ + local_irq_save(flags); + check_tsc_sync_source(cpu); + local_irq_restore(flags); + + /* + * Wait for the AP to mark itself online, so the core caller + * can drop sparse_irq_lock. + */ + while (!cpu_online(cpu)) + schedule(); +} + +static int native_kick_ap(unsigned int cpu, struct task_struct *tidle) +{ + int apicid = apic->cpu_present_to_apicid(cpu); int err; lockdep_assert_irqs_enabled(); @@ -1140,25 +1186,33 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle) return err; err = do_boot_cpu(apicid, cpu, tidle); - if (err) { + if (err) pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu); - return err; - } - /* - * Check TSC synchronization with the AP (keep irqs disabled - * while doing so): - */ - local_irq_save(flags); - check_tsc_sync_source(cpu); - local_irq_restore(flags); + return err; +} - while (!cpu_online(cpu)) { - cpu_relax(); - touch_nmi_watchdog(); - } +int native_cpu_up(unsigned int cpu, struct task_struct *tidle) +{ + int ret; - return 0; + ret = native_kick_ap(cpu, tidle); + if (ret) + goto out; + + ret = wait_cpu_initialized(cpu); + if (ret) + goto out; + + wait_cpu_callin(cpu); + wait_cpu_online(cpu); + +out: + /* Cleanup possible dangling ends... */ + if (x86_platform.legacy.warm_reset) + smpboot_restore_warm_reset_vector(); + + return ret; } /** -- cgit From e94cd1503b153ea753f0c4ed9d5ed12e7abd1306 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 12 May 2023 23:07:12 +0200 Subject: x86/smpboot: Get rid of cpu_init_secondary() The synchronization of the AP with the control CPU is a SMP boot problem and has nothing to do with cpu_init(). Open code cpu_init_secondary() in start_secondary() and move wait_for_master_cpu() into the SMP boot code. No functional change. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Tested-by: Michael Kelley Tested-by: Oleksandr Natalenko Tested-by: Helge Deller # parisc Tested-by: Guilherme G. Piccoli # Steam Deck Link: https://lore.kernel.org/r/20230512205255.981999763@linutronix.de --- arch/x86/include/asm/processor.h | 1 - arch/x86/kernel/cpu/common.c | 27 --------------------------- arch/x86/kernel/smpboot.c | 24 +++++++++++++++++++----- 3 files changed, 19 insertions(+), 33 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index a1e4fa58b357..d46300e94f85 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -551,7 +551,6 @@ extern void switch_gdt_and_percpu_base(int); extern void load_direct_gdt(int); extern void load_fixmap_gdt(int); extern void cpu_init(void); -extern void cpu_init_secondary(void); extern void cpu_init_exception_handling(void); extern void cr4_init(void); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 80710a68ef7d..e25fb13cd6ef 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -2123,19 +2123,6 @@ static void dbg_restore_debug_regs(void) #define dbg_restore_debug_regs() #endif /* ! CONFIG_KGDB */ -static void wait_for_master_cpu(int cpu) -{ -#ifdef CONFIG_SMP - /* - * wait for ACK from master CPU before continuing - * with AP initialization - */ - WARN_ON(cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)); - while (!cpumask_test_cpu(cpu, cpu_callout_mask)) - cpu_relax(); -#endif -} - static inline void setup_getcpu(int cpu) { unsigned long cpudata = vdso_encode_cpunode(cpu, early_cpu_to_node(cpu)); @@ -2239,8 +2226,6 @@ void cpu_init(void) struct task_struct *cur = current; int cpu = raw_smp_processor_id(); - wait_for_master_cpu(cpu); - ucode_cpu_init(cpu); #ifdef CONFIG_NUMA @@ -2293,18 +2278,6 @@ void cpu_init(void) load_fixmap_gdt(cpu); } -#ifdef CONFIG_SMP -void cpu_init_secondary(void) -{ - /* - * Relies on the BP having set-up the IDT tables, which are loaded - * on this CPU in cpu_init_exception_handling(). - */ - cpu_init_exception_handling(); - cpu_init(); -} -#endif - #ifdef CONFIG_MICROCODE_LATE_LOADING /** * store_cpu_caps() - Store a snapshot of CPU capabilities diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 0bd9c9f7861f..50eff9b7fe7f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -220,6 +220,17 @@ static void ap_calibrate_delay(void) cpu_data(smp_processor_id()).loops_per_jiffy = loops_per_jiffy; } +static void wait_for_master_cpu(int cpu) +{ + /* + * Wait for release by control CPU before continuing with AP + * initialization. + */ + WARN_ON(cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)); + while (!cpumask_test_cpu(cpu, cpu_callout_mask)) + cpu_relax(); +} + /* * Activate a secondary processor. */ @@ -237,13 +248,16 @@ static void notrace start_secondary(void *unused) load_cr3(swapper_pg_dir); __flush_tlb_all(); #endif + cpu_init_exception_handling(); + /* - * Sync point with wait_cpu_initialized(). Before proceeding through - * cpu_init(), the AP will call wait_for_master_cpu() which sets its - * own bit in cpu_initialized_mask and then waits for the BSP to set - * its bit in cpu_callout_mask to release it. + * Sync point with wait_cpu_initialized(). Sets AP in + * cpu_initialized_mask and then waits for the control CPU + * to release it. */ - cpu_init_secondary(); + wait_for_master_cpu(raw_smp_processor_id()); + + cpu_init(); rcu_cpu_starting(raw_smp_processor_id()); x86_cpuinit.early_percpu_clock_init(); -- cgit From a32226fa3b7d33d380494cf94cf1d4f8ebb70004 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 12 May 2023 23:07:14 +0200 Subject: x86/cpu/cacheinfo: Remove cpu_callout_mask dependency cpu_callout_mask is used for the stop machine based MTRR/PAT init. In preparation of moving the BP/AP synchronization to the core hotplug code, use a private CPU mask for cacheinfo and manage it in the starting/dying hotplug state. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Tested-by: Michael Kelley Tested-by: Oleksandr Natalenko Tested-by: Helge Deller # parisc Tested-by: Guilherme G. Piccoli # Steam Deck Link: https://lore.kernel.org/r/20230512205256.035041005@linutronix.de --- arch/x86/kernel/cpu/cacheinfo.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index 4063e8991211..8f86eacf69f7 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -39,6 +39,8 @@ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); /* Shared L2 cache maps */ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map); +static cpumask_var_t cpu_cacheinfo_mask; + /* Kernel controls MTRR and/or PAT MSRs. */ unsigned int memory_caching_control __ro_after_init; @@ -1172,8 +1174,10 @@ void cache_bp_restore(void) cache_cpu_init(); } -static int cache_ap_init(unsigned int cpu) +static int cache_ap_online(unsigned int cpu) { + cpumask_set_cpu(cpu, cpu_cacheinfo_mask); + if (!memory_caching_control || get_cache_aps_delayed_init()) return 0; @@ -1191,11 +1195,17 @@ static int cache_ap_init(unsigned int cpu) * lock to prevent MTRR entry changes */ stop_machine_from_inactive_cpu(cache_rendezvous_handler, NULL, - cpu_callout_mask); + cpu_cacheinfo_mask); return 0; } +static int cache_ap_offline(unsigned int cpu) +{ + cpumask_clear_cpu(cpu, cpu_cacheinfo_mask); + return 0; +} + /* * Delayed cache initialization for all AP's */ @@ -1210,9 +1220,12 @@ void cache_aps_init(void) static int __init cache_ap_register(void) { + zalloc_cpumask_var(&cpu_cacheinfo_mask, GFP_KERNEL); + cpumask_set_cpu(smp_processor_id(), cpu_cacheinfo_mask); + cpuhp_setup_state_nocalls(CPUHP_AP_CACHECTRL_STARTING, "x86/cachectrl:starting", - cache_ap_init, NULL); + cache_ap_online, cache_ap_offline); return 0; } -core_initcall(cache_ap_register); +early_initcall(cache_ap_register); -- cgit From d4f28f07c2fe736bce8ca415a86fb74c629200f3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 12 May 2023 23:07:16 +0200 Subject: x86/smpboot: Move synchronization masks to SMP boot code The usage is in smpboot.c and not in the CPU initialization code. The XEN_PV usage of cpu_callout_mask is obsolete as cpu_init() not longer waits and cacheinfo has its own CPU mask now, so cpu_callout_mask can be made static too. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Tested-by: Michael Kelley Tested-by: Oleksandr Natalenko Tested-by: Helge Deller # parisc Tested-by: Guilherme G. Piccoli # Steam Deck Link: https://lore.kernel.org/r/20230512205256.091511483@linutronix.de --- arch/x86/include/asm/cpumask.h | 5 ----- arch/x86/kernel/cpu/common.c | 17 ----------------- arch/x86/kernel/smpboot.c | 16 ++++++++++++++++ arch/x86/xen/smp_pv.c | 3 --- 4 files changed, 16 insertions(+), 25 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h index c5aed9e9226c..4acfd57de8f1 100644 --- a/arch/x86/include/asm/cpumask.h +++ b/arch/x86/include/asm/cpumask.h @@ -4,11 +4,6 @@ #ifndef __ASSEMBLY__ #include -extern cpumask_var_t cpu_callin_mask; -extern cpumask_var_t cpu_callout_mask; -extern cpumask_var_t cpu_initialized_mask; -extern cpumask_var_t cpu_sibling_setup_mask; - extern void setup_cpu_local_masks(void); /* diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e25fb13cd6ef..640fd1802c72 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -67,14 +67,6 @@ u32 elf_hwcap2 __read_mostly; -/* all of these masks are initialized in setup_cpu_local_masks() */ -cpumask_var_t cpu_initialized_mask; -cpumask_var_t cpu_callout_mask; -cpumask_var_t cpu_callin_mask; - -/* representing cpus for which sibling maps can be computed */ -cpumask_var_t cpu_sibling_setup_mask; - /* Number of siblings per CPU package */ int smp_num_siblings = 1; EXPORT_SYMBOL(smp_num_siblings); @@ -169,15 +161,6 @@ clear_ppin: clear_cpu_cap(c, info->feature); } -/* correctly size the local cpu masks */ -void __init setup_cpu_local_masks(void) -{ - alloc_bootmem_cpumask_var(&cpu_initialized_mask); - alloc_bootmem_cpumask_var(&cpu_callin_mask); - alloc_bootmem_cpumask_var(&cpu_callout_mask); - alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); -} - static void default_init(struct cpuinfo_x86 *c) { #ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 50eff9b7fe7f..8b07c6e6d620 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -101,6 +101,13 @@ EXPORT_PER_CPU_SYMBOL(cpu_die_map); DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); EXPORT_PER_CPU_SYMBOL(cpu_info); +/* All of these masks are initialized in setup_cpu_local_masks() */ +static cpumask_var_t cpu_initialized_mask; +static cpumask_var_t cpu_callout_mask; +static cpumask_var_t cpu_callin_mask; +/* Representing CPUs for which sibling maps can be computed */ +static cpumask_var_t cpu_sibling_setup_mask; + /* Logical package management. We might want to allocate that dynamically */ unsigned int __max_logical_packages __read_mostly; EXPORT_SYMBOL(__max_logical_packages); @@ -1545,6 +1552,15 @@ __init void prefill_possible_map(void) set_cpu_possible(i, true); } +/* correctly size the local cpu masks */ +void __init setup_cpu_local_masks(void) +{ + alloc_bootmem_cpumask_var(&cpu_initialized_mask); + alloc_bootmem_cpumask_var(&cpu_callin_mask); + alloc_bootmem_cpumask_var(&cpu_callout_mask); + alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); +} + #ifdef CONFIG_HOTPLUG_CPU /* Recompute SMT state for all CPUs on offline */ diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index a9cf8c8fa074..a6f9128a4091 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -254,15 +254,12 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) struct desc_struct *gdt; unsigned long gdt_mfn; - /* used to tell cpu_init() that it can proceed with initialization */ - cpumask_set_cpu(cpu, cpu_callout_mask); if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map)) return 0; ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); if (ctxt == NULL) { cpumask_clear_cpu(cpu, xen_cpu_initialized_map); - cpumask_clear_cpu(cpu, cpu_callout_mask); return -ENOMEM; } -- cgit From 9d349d47f0e39b4d1b68793ded2459daa1f948f0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 12 May 2023 23:07:17 +0200 Subject: x86/smpboot: Make TSC synchronization function call based Spin-waiting on the control CPU until the AP reaches the TSC synchronization is just a waste especially in the case that there is no synchronization required. As the synchronization has to run with interrupts disabled the control CPU part can just be done from a SMP function call. The upcoming AP issues that call async only in the case that synchronization is required. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Tested-by: Michael Kelley Tested-by: Oleksandr Natalenko Tested-by: Helge Deller # parisc Tested-by: Guilherme G. Piccoli # Steam Deck Link: https://lore.kernel.org/r/20230512205256.148255496@linutronix.de --- arch/x86/include/asm/tsc.h | 2 -- arch/x86/kernel/smpboot.c | 20 +++----------------- arch/x86/kernel/tsc_sync.c | 36 +++++++++++------------------------- 3 files changed, 14 insertions(+), 44 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index fbdc3d951494..dc1b03be43eb 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -55,12 +55,10 @@ extern bool tsc_async_resets; #ifdef CONFIG_X86_TSC extern bool tsc_store_and_check_tsc_adjust(bool bootcpu); extern void tsc_verify_tsc_adjust(bool resume); -extern void check_tsc_sync_source(int cpu); extern void check_tsc_sync_target(void); #else static inline bool tsc_store_and_check_tsc_adjust(bool bootcpu) { return false; } static inline void tsc_verify_tsc_adjust(bool resume) { } -static inline void check_tsc_sync_source(int cpu) { } static inline void check_tsc_sync_target(void) { } #endif diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 8b07c6e6d620..b2f44a837ce1 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -275,11 +275,7 @@ static void notrace start_secondary(void *unused) */ smp_callin(); - /* - * Check TSC synchronization with the control CPU, which will do - * its part of this from wait_cpu_online(), making it an implicit - * synchronization point. - */ + /* Check TSC synchronization with the control CPU. */ check_tsc_sync_target(); /* @@ -1141,21 +1137,11 @@ static void wait_cpu_callin(unsigned int cpu) } /* - * Bringup step four: Synchronize the TSC and wait for the target AP - * to reach set_cpu_online() in start_secondary(). + * Bringup step four: Wait for the target AP to reach set_cpu_online() in + * start_secondary(). */ static void wait_cpu_online(unsigned int cpu) { - unsigned long flags; - - /* - * Check TSC synchronization with the AP (keep irqs disabled - * while doing so): - */ - local_irq_save(flags); - check_tsc_sync_source(cpu); - local_irq_restore(flags); - /* * Wait for the AP to mark itself online, so the core caller * can drop sparse_irq_lock. diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index 9452dc9664b5..bbc440c93e08 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -245,7 +245,6 @@ bool tsc_store_and_check_tsc_adjust(bool bootcpu) */ static atomic_t start_count; static atomic_t stop_count; -static atomic_t skip_test; static atomic_t test_runs; /* @@ -344,20 +343,13 @@ static inline unsigned int loop_timeout(int cpu) } /* - * Source CPU calls into this - it waits for the freshly booted - * target CPU to arrive and then starts the measurement: + * The freshly booted CPU initiates this via an async SMP function call. */ -void check_tsc_sync_source(int cpu) +static void check_tsc_sync_source(void *__cpu) { + unsigned int cpu = (unsigned long)__cpu; int cpus = 2; - /* - * No need to check if we already know that the TSC is not - * synchronized or if we have no TSC. - */ - if (unsynchronized_tsc()) - return; - /* * Set the maximum number of test runs to * 1 if the CPU does not provide the TSC_ADJUST MSR @@ -368,16 +360,9 @@ void check_tsc_sync_source(int cpu) else atomic_set(&test_runs, 3); retry: - /* - * Wait for the target to start or to skip the test: - */ - while (atomic_read(&start_count) != cpus - 1) { - if (atomic_read(&skip_test) > 0) { - atomic_set(&skip_test, 0); - return; - } + /* Wait for the target to start. */ + while (atomic_read(&start_count) != cpus - 1) cpu_relax(); - } /* * Trigger the target to continue into the measurement too: @@ -397,14 +382,14 @@ retry: if (!nr_warps) { atomic_set(&test_runs, 0); - pr_debug("TSC synchronization [CPU#%d -> CPU#%d]: passed\n", + pr_debug("TSC synchronization [CPU#%d -> CPU#%u]: passed\n", smp_processor_id(), cpu); } else if (atomic_dec_and_test(&test_runs) || random_warps) { /* Force it to 0 if random warps brought us here */ atomic_set(&test_runs, 0); - pr_warn("TSC synchronization [CPU#%d -> CPU#%d]:\n", + pr_warn("TSC synchronization [CPU#%d -> CPU#%u]:\n", smp_processor_id(), cpu); pr_warn("Measured %Ld cycles TSC warp between CPUs, " "turning off TSC clock.\n", max_warp); @@ -457,11 +442,12 @@ void check_tsc_sync_target(void) * SoCs the TSC is frequency synchronized, but still the TSC ADJUST * register might have been wreckaged by the BIOS.. */ - if (tsc_store_and_check_tsc_adjust(false) || tsc_clocksource_reliable) { - atomic_inc(&skip_test); + if (tsc_store_and_check_tsc_adjust(false) || tsc_clocksource_reliable) return; - } + /* Kick the control CPU into the TSC synchronization function */ + smp_call_function_single(cpumask_first(cpu_online_mask), check_tsc_sync_source, + (unsigned long *)(unsigned long)cpu, 0); retry: /* * Register this CPU's participation and wait for the -- cgit From c8b7fb09d1bcfa61d4211f61542e1291e7b4cbad Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 12 May 2023 23:07:19 +0200 Subject: x86/smpboot: Remove cpu_callin_mask Now that TSC synchronization is SMP function call based there is no reason to wait for the AP to be set in smp_callin_mask. The control CPU waits for the AP to set itself in the online mask anyway. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Tested-by: Michael Kelley Tested-by: Oleksandr Natalenko Tested-by: Helge Deller # parisc Tested-by: Guilherme G. Piccoli # Steam Deck Link: https://lore.kernel.org/r/20230512205256.206394064@linutronix.de --- arch/x86/kernel/smpboot.c | 74 ++++++++++------------------------------------- 1 file changed, 15 insertions(+), 59 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index b2f44a837ce1..3f731beb4d3f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -104,7 +104,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_info); /* All of these masks are initialized in setup_cpu_local_masks() */ static cpumask_var_t cpu_initialized_mask; static cpumask_var_t cpu_callout_mask; -static cpumask_var_t cpu_callin_mask; /* Representing CPUs for which sibling maps can be computed */ static cpumask_var_t cpu_sibling_setup_mask; @@ -161,38 +160,30 @@ static inline void smpboot_restore_warm_reset_vector(void) } -/* - * Report back to the Boot Processor during boot time or to the caller processor - * during CPU online. - */ -static void smp_callin(void) +/* Run the next set of setup steps for the upcoming CPU */ +static void ap_starting(void) { - int cpuid; - - /* - * If waken up by an INIT in an 82489DX configuration - * cpu_callout_mask guarantees we don't get here before - * an INIT_deassert IPI reaches our local APIC, so it is - * now safe to touch our local APIC. - */ - cpuid = smp_processor_id(); + int cpuid = smp_processor_id(); /* - * the boot CPU has finished the init stage and is spinning - * on callin_map until we finish. We are free to set up this - * CPU, first the APIC. (this is probably redundant on most - * boards) + * If woken up by an INIT in an 82489DX configuration + * cpu_callout_mask guarantees the CPU does not reach this point + * before an INIT_deassert IPI reaches the local APIC, so it is now + * safe to touch the local APIC. + * + * Set up this CPU, first the APIC, which is probably redundant on + * most boards. */ apic_ap_setup(); - /* Save our processor parameters. */ + /* Save the processor parameters. */ smp_store_cpu_info(cpuid); /* * The topology information must be up to date before * notify_cpu_starting(). */ - set_cpu_sibling_map(raw_smp_processor_id()); + set_cpu_sibling_map(cpuid); ap_init_aperfmperf(); @@ -205,11 +196,6 @@ static void smp_callin(void) * state CPUHP_ONLINE. */ notify_cpu_starting(cpuid); - - /* - * Allow the master to continue. - */ - cpumask_set_cpu(cpuid, cpu_callin_mask); } static void ap_calibrate_delay(void) @@ -268,12 +254,7 @@ static void notrace start_secondary(void *unused) rcu_cpu_starting(raw_smp_processor_id()); x86_cpuinit.early_percpu_clock_init(); - /* - * Sync point with wait_cpu_callin(). The AP doesn't wait here - * but just sets the bit to let the controlling CPU (BSP) know that - * it's got this far. - */ - smp_callin(); + ap_starting(); /* Check TSC synchronization with the control CPU. */ check_tsc_sync_target(); @@ -1109,7 +1090,7 @@ static int wait_cpu_cpumask(unsigned int cpu, const struct cpumask *mask) * and thus wait_for_master_cpu(), then set cpu_callout_mask to allow it * to proceed. The AP will then proceed past setting its 'callin' bit * and end up waiting in check_tsc_sync_target() until we reach - * do_wait_cpu_online() to tend to it. + * wait_cpu_online() to tend to it. */ static int wait_cpu_initialized(unsigned int cpu) { @@ -1124,20 +1105,7 @@ static int wait_cpu_initialized(unsigned int cpu) } /* - * Bringup step three: Wait for the target AP to reach smp_callin(). - * The AP is not waiting for us here so we don't need to parallelise - * this step. Not entirely clear why we care about this, since we just - * proceed directly to TSC synchronization which is the next sync - * point with the AP anyway. - */ -static void wait_cpu_callin(unsigned int cpu) -{ - while (!cpumask_test_cpu(cpu, cpu_callin_mask)) - schedule(); -} - -/* - * Bringup step four: Wait for the target AP to reach set_cpu_online() in + * Bringup step three: Wait for the target AP to reach set_cpu_online() in * start_secondary(). */ static void wait_cpu_online(unsigned int cpu) @@ -1166,14 +1134,6 @@ static int native_kick_ap(unsigned int cpu, struct task_struct *tidle) return -EINVAL; } - /* - * Already booted CPU? - */ - if (cpumask_test_cpu(cpu, cpu_callin_mask)) { - pr_debug("do_boot_cpu %d Already started\n", cpu); - return -ENOSYS; - } - /* * Save current MTRR state in case it was changed since early boot * (e.g. by the ACPI SMI) to initialize new CPUs with MTRRs in sync: @@ -1211,7 +1171,6 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle) if (ret) goto out; - wait_cpu_callin(cpu); wait_cpu_online(cpu); out: @@ -1327,7 +1286,6 @@ void __init smp_prepare_cpus_common(void) * Setup boot CPU information */ smp_store_boot_cpu_info(); /* Final full version of the data */ - cpumask_copy(cpu_callin_mask, cpumask_of(0)); mb(); for_each_possible_cpu(i) { @@ -1542,7 +1500,6 @@ __init void prefill_possible_map(void) void __init setup_cpu_local_masks(void) { alloc_bootmem_cpumask_var(&cpu_initialized_mask); - alloc_bootmem_cpumask_var(&cpu_callin_mask); alloc_bootmem_cpumask_var(&cpu_callout_mask); alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); } @@ -1606,7 +1563,6 @@ static void remove_cpu_from_maps(int cpu) { set_cpu_online(cpu, false); cpumask_clear_cpu(cpu, cpu_callout_mask); - cpumask_clear_cpu(cpu, cpu_callin_mask); /* was set by cpu_init() */ cpumask_clear_cpu(cpu, cpu_initialized_mask); numa_remove_cpu(cpu); -- cgit From e464640cf7af12f3c2748065936b95eae1f735ba Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 12 May 2023 23:07:22 +0200 Subject: x86/smpboot: Remove wait for cpu_online() Now that the core code drops sparse_irq_lock after the idle thread synchronized, it's pointless to wait for the AP to mark itself online. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Tested-by: Michael Kelley Tested-by: Oleksandr Natalenko Tested-by: Helge Deller # parisc Tested-by: Guilherme G. Piccoli # Steam Deck Link: https://lore.kernel.org/r/20230512205256.316417181@linutronix.de --- arch/x86/kernel/smpboot.c | 26 ++------------------------ 1 file changed, 2 insertions(+), 24 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 3f731beb4d3f..4349e8ab2566 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -275,7 +275,6 @@ static void notrace start_secondary(void *unused) * half valid vector space. */ lock_vector_lock(); - /* Sync point with do_wait_cpu_online() */ set_cpu_online(smp_processor_id(), true); lapic_online(); unlock_vector_lock(); @@ -1104,20 +1103,6 @@ static int wait_cpu_initialized(unsigned int cpu) return 0; } -/* - * Bringup step three: Wait for the target AP to reach set_cpu_online() in - * start_secondary(). - */ -static void wait_cpu_online(unsigned int cpu) -{ - /* - * Wait for the AP to mark itself online, so the core caller - * can drop sparse_irq_lock. - */ - while (!cpu_online(cpu)) - schedule(); -} - static int native_kick_ap(unsigned int cpu, struct task_struct *tidle) { int apicid = apic->cpu_present_to_apicid(cpu); @@ -1164,16 +1149,9 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle) int ret; ret = native_kick_ap(cpu, tidle); - if (ret) - goto out; - - ret = wait_cpu_initialized(cpu); - if (ret) - goto out; - - wait_cpu_online(cpu); + if (!ret) + ret = wait_cpu_initialized(cpu); -out: /* Cleanup possible dangling ends... */ if (x86_platform.legacy.warm_reset) smpboot_restore_warm_reset_vector(); -- cgit From 2de7fd26d90729c4e3430f9924b199dea810a9bc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 12 May 2023 23:07:24 +0200 Subject: x86/xen/smp_pv: Remove wait for CPU online Now that the core code drops sparse_irq_lock after the idle thread synchronized, it's pointless to wait for the AP to mark itself online. Whether the control CPU runs in a wait loop or sleeps in the core code waiting for the online operation to complete makes no difference. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Tested-by: Michael Kelley Tested-by: Oleksandr Natalenko Tested-by: Helge Deller # parisc Tested-by: Guilherme G. Piccoli # Steam Deck Link: https://lore.kernel.org/r/20230512205256.369512093@linutronix.de --- arch/x86/xen/smp_pv.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index a6f9128a4091..be40927667cf 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -340,11 +340,11 @@ static int xen_pv_cpu_up(unsigned int cpu, struct task_struct *idle) xen_pmu_init(cpu); - rc = HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL); - BUG_ON(rc); - - while (cpu_report_state(cpu) != CPU_ONLINE) - HYPERVISOR_sched_op(SCHEDOP_yield, NULL); + /* + * Why is this a BUG? If the hypercall fails then everything can be + * rolled back, no? + */ + BUG_ON(HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL)); return 0; } -- cgit From ab24eb9abb9c60c45119370731735b79ed79f36c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 12 May 2023 23:07:25 +0200 Subject: x86/xen/hvm: Get rid of DEAD_FROZEN handling No point in this conditional voodoo. Un-initializing the lock mechanism is safe to be called unconditionally even if it was already invoked when the CPU died. Remove the invocation of xen_smp_intr_free() as that has been already cleaned up in xen_cpu_dead_hvm(). Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Tested-by: Michael Kelley Tested-by: Oleksandr Natalenko Tested-by: Helge Deller # parisc Tested-by: Guilherme G. Piccoli # Steam Deck Link: https://lore.kernel.org/r/20230512205256.423407127@linutronix.de --- arch/x86/xen/enlighten_hvm.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index c1cd28e915a3..a6820ca940bf 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -161,13 +161,12 @@ static int xen_cpu_up_prepare_hvm(unsigned int cpu) int rc = 0; /* - * This can happen if CPU was offlined earlier and - * offlining timed out in common_cpu_die(). + * If a CPU was offlined earlier and offlining timed out then the + * lock mechanism is still initialized. Uninit it unconditionally + * as it's safe to call even if already uninited. Interrupts and + * timer have already been handled in xen_cpu_dead_hvm(). */ - if (cpu_report_state(cpu) == CPU_DEAD_FROZEN) { - xen_smp_intr_free(cpu); - xen_uninit_lock_cpu(cpu); - } + xen_uninit_lock_cpu(cpu); if (cpu_acpi_id(cpu) != U32_MAX) per_cpu(xen_vcpu_id, cpu) = cpu_acpi_id(cpu); -- cgit From 6f0621238b7e7680d5e26c00aa4cd473314d05b2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 12 May 2023 23:07:27 +0200 Subject: cpu/hotplug: Add CPU state tracking and synchronization The CPU state tracking and synchronization mechanism in smpboot.c is completely independent of the hotplug code and all logic around it is implemented in architecture specific code. Except for the state reporting of the AP there is absolutely nothing architecture specific and the sychronization and decision functions can be moved into the generic hotplug core code. Provide an integrated variant and add the core synchronization and decision points. This comes in two flavours: 1) DEAD state synchronization Updated by the architecture code once the AP reaches the point where it is ready to be torn down by the control CPU, e.g. by removing power or clocks or tear down via the hypervisor. The control CPU waits for this state to be reached with a timeout. If the state is reached an architecture specific cleanup function is invoked. 2) Full state synchronization This extends #1 with AP alive synchronization. This is new functionality, which allows to replace architecture specific wait mechanims, e.g. cpumasks, completely. It also prevents that an AP which is in a limbo state can be brought up again. This can happen when an AP failed to report dead state during a previous off-line operation. The dead synchronization is what most architectures use. Only x86 makes a bringup decision based on that state at the moment. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Tested-by: Michael Kelley Tested-by: Oleksandr Natalenko Tested-by: Helge Deller # parisc Tested-by: Guilherme G. Piccoli # Steam Deck Link: https://lore.kernel.org/r/20230512205256.476305035@linutronix.de --- arch/Kconfig | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'arch') diff --git a/arch/Kconfig b/arch/Kconfig index 205fd23e0cad..f55c5fcbea38 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -34,6 +34,21 @@ config ARCH_HAS_SUBPAGE_FAULTS config HOTPLUG_SMT bool +# Selected by HOTPLUG_CORE_SYNC_DEAD or HOTPLUG_CORE_SYNC_FULL +config HOTPLUG_CORE_SYNC + bool + +# Basic CPU dead synchronization selected by architecture +config HOTPLUG_CORE_SYNC_DEAD + bool + select HOTPLUG_CORE_SYNC + +# Full CPU synchronization with alive state selected by architecture +config HOTPLUG_CORE_SYNC_FULL + bool + select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU + select HOTPLUG_CORE_SYNC + config GENERIC_ENTRY bool -- cgit From 2711b8e2b71fa3cce9c28b0f07b75882f141adfe Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 12 May 2023 23:07:29 +0200 Subject: x86/smpboot: Switch to hotplug core state synchronization The new AP state tracking and synchronization mechanism in the CPU hotplug core code allows to remove quite some x86 specific code: 1) The AP alive synchronization based on cpumasks 2) The decision whether an AP can be brought up again Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Tested-by: Michael Kelley Tested-by: Oleksandr Natalenko Tested-by: Helge Deller # parisc Tested-by: Guilherme G. Piccoli # Steam Deck Link: https://lore.kernel.org/r/20230512205256.529657366@linutronix.de --- arch/x86/Kconfig | 1 + arch/x86/include/asm/smp.h | 7 +- arch/x86/kernel/smp.c | 1 - arch/x86/kernel/smpboot.c | 165 +++++++++++---------------------------------- arch/x86/xen/smp_hvm.c | 16 ++--- arch/x86/xen/smp_pv.c | 39 ++++++----- 6 files changed, 75 insertions(+), 154 deletions(-) (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d0191338a102..adb1ec8503d0 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -274,6 +274,7 @@ config X86 select HAVE_UNSTABLE_SCHED_CLOCK select HAVE_USER_RETURN_NOTIFIER select HAVE_GENERIC_VDSO + select HOTPLUG_CORE_SYNC_FULL if SMP select HOTPLUG_SMT if SMP select IRQ_FORCED_THREADING select NEED_PER_CPU_EMBED_FIRST_CHUNK diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 0ec4c64a1981..693c9997c069 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -38,6 +38,8 @@ struct smp_ops { void (*crash_stop_other_cpus)(void); void (*smp_send_reschedule)(int cpu); + void (*cleanup_dead_cpu)(unsigned cpu); + void (*poll_sync_state)(void); int (*cpu_up)(unsigned cpu, struct task_struct *tidle); int (*cpu_disable)(void); void (*cpu_die)(unsigned int cpu); @@ -90,7 +92,8 @@ static inline int __cpu_disable(void) static inline void __cpu_die(unsigned int cpu) { - smp_ops.cpu_die(cpu); + if (smp_ops.cpu_die) + smp_ops.cpu_die(cpu); } static inline void __noreturn play_dead(void) @@ -123,8 +126,6 @@ void native_smp_cpus_done(unsigned int max_cpus); int common_cpu_up(unsigned int cpunum, struct task_struct *tidle); int native_cpu_up(unsigned int cpunum, struct task_struct *tidle); int native_cpu_disable(void); -int common_cpu_die(unsigned int cpu); -void native_cpu_die(unsigned int cpu); void __noreturn hlt_play_dead(void); void native_play_dead(void); void play_dead_common(void); diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 375b33ecafa2..4f6375b4ba9a 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -269,7 +269,6 @@ struct smp_ops smp_ops = { .smp_send_reschedule = native_smp_send_reschedule, .cpu_up = native_cpu_up, - .cpu_die = native_cpu_die, .cpu_disable = native_cpu_disable, .play_dead = native_play_dead, diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 4349e8ab2566..59386419b4b1 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -57,6 +57,7 @@ #include #include #include +#include #include #include @@ -101,9 +102,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_die_map); DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); EXPORT_PER_CPU_SYMBOL(cpu_info); -/* All of these masks are initialized in setup_cpu_local_masks() */ -static cpumask_var_t cpu_initialized_mask; -static cpumask_var_t cpu_callout_mask; /* Representing CPUs for which sibling maps can be computed */ static cpumask_var_t cpu_sibling_setup_mask; @@ -166,10 +164,10 @@ static void ap_starting(void) int cpuid = smp_processor_id(); /* - * If woken up by an INIT in an 82489DX configuration - * cpu_callout_mask guarantees the CPU does not reach this point - * before an INIT_deassert IPI reaches the local APIC, so it is now - * safe to touch the local APIC. + * If woken up by an INIT in an 82489DX configuration the alive + * synchronization guarantees that the CPU does not reach this + * point before an INIT_deassert IPI reaches the local APIC, so it + * is now safe to touch the local APIC. * * Set up this CPU, first the APIC, which is probably redundant on * most boards. @@ -213,17 +211,6 @@ static void ap_calibrate_delay(void) cpu_data(smp_processor_id()).loops_per_jiffy = loops_per_jiffy; } -static void wait_for_master_cpu(int cpu) -{ - /* - * Wait for release by control CPU before continuing with AP - * initialization. - */ - WARN_ON(cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)); - while (!cpumask_test_cpu(cpu, cpu_callout_mask)) - cpu_relax(); -} - /* * Activate a secondary processor. */ @@ -244,11 +231,11 @@ static void notrace start_secondary(void *unused) cpu_init_exception_handling(); /* - * Sync point with wait_cpu_initialized(). Sets AP in - * cpu_initialized_mask and then waits for the control CPU - * to release it. + * Synchronization point with the hotplug core. Sets the + * synchronization state to ALIVE and waits for the control CPU to + * release this CPU for further bringup. */ - wait_for_master_cpu(raw_smp_processor_id()); + cpuhp_ap_sync_alive(); cpu_init(); rcu_cpu_starting(raw_smp_processor_id()); @@ -278,7 +265,6 @@ static void notrace start_secondary(void *unused) set_cpu_online(smp_processor_id(), true); lapic_online(); unlock_vector_lock(); - cpu_set_state_online(smp_processor_id()); x86_platform.nmi_init(); /* enable local interrupts */ @@ -729,9 +715,9 @@ static void impress_friends(void) * Allow the user to impress friends. */ pr_debug("Before bogomips\n"); - for_each_possible_cpu(cpu) - if (cpumask_test_cpu(cpu, cpu_callout_mask)) - bogosum += cpu_data(cpu).loops_per_jiffy; + for_each_online_cpu(cpu) + bogosum += cpu_data(cpu).loops_per_jiffy; + pr_info("Total of %d processors activated (%lu.%02lu BogoMIPS)\n", num_online_cpus(), bogosum/(500000/HZ), @@ -1003,6 +989,7 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle) static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) { unsigned long start_ip = real_mode_header->trampoline_start; + int ret; #ifdef CONFIG_X86_64 /* If 64-bit wakeup method exists, use the 64-bit mode trampoline IP */ @@ -1043,13 +1030,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) } } - /* - * AP might wait on cpu_callout_mask in cpu_init() with - * cpu_initialized_mask set if previous attempt to online - * it timed-out. Clear cpu_initialized_mask so that after - * INIT/SIPI it could start with a clean state. - */ - cpumask_clear_cpu(cpu, cpu_initialized_mask); smp_mb(); /* @@ -1060,47 +1040,16 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) * - Use an INIT boot APIC message */ if (apic->wakeup_secondary_cpu_64) - return apic->wakeup_secondary_cpu_64(apicid, start_ip); + ret = apic->wakeup_secondary_cpu_64(apicid, start_ip); else if (apic->wakeup_secondary_cpu) - return apic->wakeup_secondary_cpu(apicid, start_ip); - - return wakeup_secondary_cpu_via_init(apicid, start_ip); -} - -static int wait_cpu_cpumask(unsigned int cpu, const struct cpumask *mask) -{ - unsigned long timeout; - - /* - * Wait up to 10s for the CPU to report in. - */ - timeout = jiffies + 10*HZ; - while (time_before(jiffies, timeout)) { - if (cpumask_test_cpu(cpu, mask)) - return 0; - - schedule(); - } - return -1; -} - -/* - * Bringup step two: Wait for the target AP to reach cpu_init_secondary() - * and thus wait_for_master_cpu(), then set cpu_callout_mask to allow it - * to proceed. The AP will then proceed past setting its 'callin' bit - * and end up waiting in check_tsc_sync_target() until we reach - * wait_cpu_online() to tend to it. - */ -static int wait_cpu_initialized(unsigned int cpu) -{ - /* - * Wait for first sign of life from AP. - */ - if (wait_cpu_cpumask(cpu, cpu_initialized_mask)) - return -1; + ret = apic->wakeup_secondary_cpu(apicid, start_ip); + else + ret = wakeup_secondary_cpu_via_init(apicid, start_ip); - cpumask_set_cpu(cpu, cpu_callout_mask); - return 0; + /* If the wakeup mechanism failed, cleanup the warm reset vector */ + if (ret) + arch_cpuhp_cleanup_kick_cpu(cpu); + return ret; } static int native_kick_ap(unsigned int cpu, struct task_struct *tidle) @@ -1125,11 +1074,6 @@ static int native_kick_ap(unsigned int cpu, struct task_struct *tidle) */ mtrr_save_state(); - /* x86 CPUs take themselves offline, so delayed offline is OK. */ - err = cpu_check_up_prepare(cpu); - if (err && err != -EBUSY) - return err; - /* the FPU context is blank, nobody can own it */ per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL; @@ -1146,17 +1090,29 @@ static int native_kick_ap(unsigned int cpu, struct task_struct *tidle) int native_cpu_up(unsigned int cpu, struct task_struct *tidle) { - int ret; - - ret = native_kick_ap(cpu, tidle); - if (!ret) - ret = wait_cpu_initialized(cpu); + return native_kick_ap(cpu, tidle); +} +void arch_cpuhp_cleanup_kick_cpu(unsigned int cpu) +{ /* Cleanup possible dangling ends... */ - if (x86_platform.legacy.warm_reset) + if (smp_ops.cpu_up == native_cpu_up && x86_platform.legacy.warm_reset) smpboot_restore_warm_reset_vector(); +} - return ret; +void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) +{ + if (smp_ops.cleanup_dead_cpu) + smp_ops.cleanup_dead_cpu(cpu); + + if (system_state == SYSTEM_RUNNING) + pr_info("CPU %u is now offline\n", cpu); +} + +void arch_cpuhp_sync_state_poll(void) +{ + if (smp_ops.poll_sync_state) + smp_ops.poll_sync_state(); } /** @@ -1348,9 +1304,6 @@ void __init native_smp_prepare_boot_cpu(void) if (!IS_ENABLED(CONFIG_SMP)) switch_gdt_and_percpu_base(me); - /* already set me in cpu_online_mask in boot_cpu_init() */ - cpumask_set_cpu(me, cpu_callout_mask); - cpu_set_state_online(me); native_pv_lock_init(); } @@ -1477,8 +1430,6 @@ __init void prefill_possible_map(void) /* correctly size the local cpu masks */ void __init setup_cpu_local_masks(void) { - alloc_bootmem_cpumask_var(&cpu_initialized_mask); - alloc_bootmem_cpumask_var(&cpu_callout_mask); alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); } @@ -1540,9 +1491,6 @@ static void remove_siblinginfo(int cpu) static void remove_cpu_from_maps(int cpu) { set_cpu_online(cpu, false); - cpumask_clear_cpu(cpu, cpu_callout_mask); - /* was set by cpu_init() */ - cpumask_clear_cpu(cpu, cpu_initialized_mask); numa_remove_cpu(cpu); } @@ -1593,36 +1541,11 @@ int native_cpu_disable(void) return 0; } -int common_cpu_die(unsigned int cpu) -{ - int ret = 0; - - /* We don't do anything here: idle task is faking death itself. */ - - /* They ack this in play_dead() by setting CPU_DEAD */ - if (cpu_wait_death(cpu, 5)) { - if (system_state == SYSTEM_RUNNING) - pr_info("CPU %u is now offline\n", cpu); - } else { - pr_err("CPU %u didn't die...\n", cpu); - ret = -1; - } - - return ret; -} - -void native_cpu_die(unsigned int cpu) -{ - common_cpu_die(cpu); -} - void play_dead_common(void) { idle_task_exit(); - /* Ack it */ - (void)cpu_report_death(); - + cpuhp_ap_report_dead(); /* * With physical CPU hotplug, we should halt the cpu */ @@ -1724,12 +1647,6 @@ int native_cpu_disable(void) return -ENOSYS; } -void native_cpu_die(unsigned int cpu) -{ - /* We said "no" in __cpu_disable */ - BUG(); -} - void native_play_dead(void) { BUG(); diff --git a/arch/x86/xen/smp_hvm.c b/arch/x86/xen/smp_hvm.c index b70afdff419c..ac95d1981cc0 100644 --- a/arch/x86/xen/smp_hvm.c +++ b/arch/x86/xen/smp_hvm.c @@ -55,18 +55,16 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) } #ifdef CONFIG_HOTPLUG_CPU -static void xen_hvm_cpu_die(unsigned int cpu) +static void xen_hvm_cleanup_dead_cpu(unsigned int cpu) { - if (common_cpu_die(cpu) == 0) { - if (xen_have_vector_callback) { - xen_smp_intr_free(cpu); - xen_uninit_lock_cpu(cpu); - xen_teardown_timer(cpu); - } + if (xen_have_vector_callback) { + xen_smp_intr_free(cpu); + xen_uninit_lock_cpu(cpu); + xen_teardown_timer(cpu); } } #else -static void xen_hvm_cpu_die(unsigned int cpu) +static void xen_hvm_cleanup_dead_cpu(unsigned int cpu) { BUG(); } @@ -77,7 +75,7 @@ void __init xen_hvm_smp_init(void) smp_ops.smp_prepare_boot_cpu = xen_hvm_smp_prepare_boot_cpu; smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus; smp_ops.smp_cpus_done = xen_smp_cpus_done; - smp_ops.cpu_die = xen_hvm_cpu_die; + smp_ops.cleanup_dead_cpu = xen_hvm_cleanup_dead_cpu; if (!xen_have_vector_callback) { #ifdef CONFIG_PARAVIRT_SPINLOCKS diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index be40927667cf..b089fb1cefc6 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -62,6 +62,7 @@ static void cpu_bringup(void) int cpu; cr4_init(); + cpuhp_ap_sync_alive(); cpu_init(); touch_softlockup_watchdog(); @@ -83,7 +84,7 @@ static void cpu_bringup(void) set_cpu_online(cpu, true); - cpu_set_state_online(cpu); /* Implies full memory barrier. */ + smp_mb(); /* We can take interrupts now: we're officially "up". */ local_irq_enable(); @@ -323,14 +324,6 @@ static int xen_pv_cpu_up(unsigned int cpu, struct task_struct *idle) xen_setup_runstate_info(cpu); - /* - * PV VCPUs are always successfully taken down (see 'while' loop - * in xen_cpu_die()), so -EBUSY is an error. - */ - rc = cpu_check_up_prepare(cpu); - if (rc) - return rc; - /* make sure interrupts start blocked */ per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1; @@ -349,6 +342,11 @@ static int xen_pv_cpu_up(unsigned int cpu, struct task_struct *idle) return 0; } +static void xen_pv_poll_sync_state(void) +{ + HYPERVISOR_sched_op(SCHEDOP_yield, NULL); +} + #ifdef CONFIG_HOTPLUG_CPU static int xen_pv_cpu_disable(void) { @@ -364,18 +362,18 @@ static int xen_pv_cpu_disable(void) static void xen_pv_cpu_die(unsigned int cpu) { - while (HYPERVISOR_vcpu_op(VCPUOP_is_up, - xen_vcpu_nr(cpu), NULL)) { + while (HYPERVISOR_vcpu_op(VCPUOP_is_up, xen_vcpu_nr(cpu), NULL)) { __set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(HZ/10); } +} - if (common_cpu_die(cpu) == 0) { - xen_smp_intr_free(cpu); - xen_uninit_lock_cpu(cpu); - xen_teardown_timer(cpu); - xen_pmu_finish(cpu); - } +static void xen_pv_cleanup_dead_cpu(unsigned int cpu) +{ + xen_smp_intr_free(cpu); + xen_uninit_lock_cpu(cpu); + xen_teardown_timer(cpu); + xen_pmu_finish(cpu); } static void __noreturn xen_pv_play_dead(void) /* used only with HOTPLUG_CPU */ @@ -397,6 +395,11 @@ static void xen_pv_cpu_die(unsigned int cpu) BUG(); } +static void xen_pv_cleanup_dead_cpu(unsigned int cpu) +{ + BUG(); +} + static void __noreturn xen_pv_play_dead(void) { BUG(); @@ -437,6 +440,8 @@ static const struct smp_ops xen_smp_ops __initconst = { .cpu_up = xen_pv_cpu_up, .cpu_die = xen_pv_cpu_die, + .cleanup_dead_cpu = xen_pv_cleanup_dead_cpu, + .poll_sync_state = xen_pv_poll_sync_state, .cpu_disable = xen_pv_cpu_disable, .play_dead = xen_pv_play_dead, -- cgit From 5490e769cdc7230cb93e804e656cce19d6c82253 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 12 May 2023 23:07:32 +0200 Subject: ARM: smp: Switch to hotplug core state synchronization Switch to the CPU hotplug core state tracking and synchronization mechanim. No functional change intended. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Tested-by: Michael Kelley Tested-by: Oleksandr Natalenko Tested-by: Helge Deller # parisc Tested-by: Guilherme G. Piccoli # Steam Deck Link: https://lore.kernel.org/r/20230512205256.635326070@linutronix.de --- arch/arm/Kconfig | 1 + arch/arm/include/asm/smp.h | 2 +- arch/arm/kernel/smp.c | 18 +++++++----------- 3 files changed, 9 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 0fb4b218f665..775ce86507d7 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -124,6 +124,7 @@ config ARM select HAVE_SYSCALL_TRACEPOINTS select HAVE_UID16 select HAVE_VIRT_CPU_ACCOUNTING_GEN + select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU select IRQ_FORCED_THREADING select MODULES_USE_ELF_REL select NEED_DMA_MAP_STATE diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h index 7c1c90d9f582..8c05a7f374d8 100644 --- a/arch/arm/include/asm/smp.h +++ b/arch/arm/include/asm/smp.h @@ -64,7 +64,7 @@ extern void secondary_startup_arm(void); extern int __cpu_disable(void); -extern void __cpu_die(unsigned int cpu); +static inline void __cpu_die(unsigned int cpu) { } extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 87f8d0e5e314..6756203e45f3 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -288,15 +288,11 @@ int __cpu_disable(void) } /* - * called on the thread which is asking for a CPU to be shutdown - - * waits until shutdown has completed, or it is timed out. + * called on the thread which is asking for a CPU to be shutdown after the + * shutdown completed. */ -void __cpu_die(unsigned int cpu) +void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) { - if (!cpu_wait_death(cpu, 5)) { - pr_err("CPU%u: cpu didn't die\n", cpu); - return; - } pr_debug("CPU%u: shutdown\n", cpu); clear_tasks_mm_cpumask(cpu); @@ -336,11 +332,11 @@ void __noreturn arch_cpu_idle_dead(void) flush_cache_louis(); /* - * Tell __cpu_die() that this CPU is now safe to dispose of. Once - * this returns, power and/or clocks can be removed at any point - * from this CPU and its cache by platform_cpu_kill(). + * Tell cpuhp_bp_sync_dead() that this CPU is now safe to dispose + * of. Once this returns, power and/or clocks can be removed at + * any point from this CPU and its cache by platform_cpu_kill(). */ - (void)cpu_report_death(); + cpuhp_ap_report_dead(); /* * Ensure that the cache lines associated with that completion are -- cgit From b3091f172fed63ee59d1746f088bdcc76a79a79c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 12 May 2023 23:07:33 +0200 Subject: arm64: smp: Switch to hotplug core state synchronization Switch to the CPU hotplug core state tracking and synchronization mechanim. No functional change intended. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Tested-by: Mark Rutland Tested-by: Michael Kelley Tested-by: Oleksandr Natalenko Tested-by: Helge Deller # parisc Tested-by: Guilherme G. Piccoli # Steam Deck Link: https://lore.kernel.org/r/20230512205256.690926018@linutronix.de --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/smp.h | 2 +- arch/arm64/kernel/smp.c | 14 +++++--------- 3 files changed, 7 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b1201d25a8a4..fcb945bde648 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -222,6 +222,7 @@ config ARM64 select HAVE_KPROBES select HAVE_KRETPROBES select HAVE_GENERIC_VDSO + select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU select IRQ_DOMAIN select IRQ_FORCED_THREADING select KASAN_VMALLOC if KASAN diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index f2d26235bfb4..9b31e6d0da17 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -99,7 +99,7 @@ static inline void arch_send_wakeup_ipi_mask(const struct cpumask *mask) extern int __cpu_disable(void); -extern void __cpu_die(unsigned int cpu); +static inline void __cpu_die(unsigned int cpu) { } extern void __noreturn cpu_die(void); extern void __noreturn cpu_die_early(void); diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index d00d4cbb31b1..edd63894d61e 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -332,17 +332,13 @@ static int op_cpu_kill(unsigned int cpu) } /* - * called on the thread which is asking for a CPU to be shutdown - - * waits until shutdown has completed, or it is timed out. + * Called on the thread which is asking for a CPU to be shutdown after the + * shutdown completed. */ -void __cpu_die(unsigned int cpu) +void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) { int err; - if (!cpu_wait_death(cpu, 5)) { - pr_crit("CPU%u: cpu didn't die\n", cpu); - return; - } pr_debug("CPU%u: shutdown\n", cpu); /* @@ -369,8 +365,8 @@ void __noreturn cpu_die(void) local_daif_mask(); - /* Tell __cpu_die() that this CPU is now safe to dispose of */ - (void)cpu_report_death(); + /* Tell cpuhp_bp_sync_dead() that this CPU is now safe to dispose of */ + cpuhp_ap_report_dead(); /* * Actually shutdown the CPU. This must never fail. The specific hotplug -- cgit From 7202e979645715318f2f21a324cb8a506e12fa76 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 12 May 2023 23:07:35 +0200 Subject: csky/smp: Switch to hotplug core state synchronization Switch to the CPU hotplug core state tracking and synchronization mechanim. No functional change intended. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Tested-by: Michael Kelley Tested-by: Oleksandr Natalenko Tested-by: Helge Deller # parisc Tested-by: Guilherme G. Piccoli # Steam Deck Link: https://lore.kernel.org/r/20230512205256.747254502@linutronix.de --- arch/csky/Kconfig | 1 + arch/csky/include/asm/smp.h | 2 +- arch/csky/kernel/smp.c | 8 ++------ 3 files changed, 4 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index 4df1f8c9d170..95f1e9bfd1c7 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -96,6 +96,7 @@ config CSKY select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS + select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU select MAY_HAVE_SPARSE_IRQ select MODULES_USE_ELF_RELA if MODULES select OF diff --git a/arch/csky/include/asm/smp.h b/arch/csky/include/asm/smp.h index 668b79ce29ea..d3db334f3196 100644 --- a/arch/csky/include/asm/smp.h +++ b/arch/csky/include/asm/smp.h @@ -23,7 +23,7 @@ void __init set_send_ipi(void (*func)(const struct cpumask *mask), int irq); int __cpu_disable(void); -void __cpu_die(unsigned int cpu); +static inline void __cpu_die(unsigned int cpu) { } #endif /* CONFIG_SMP */ diff --git a/arch/csky/kernel/smp.c b/arch/csky/kernel/smp.c index b12e2c3c387f..8e42352cbf12 100644 --- a/arch/csky/kernel/smp.c +++ b/arch/csky/kernel/smp.c @@ -291,12 +291,8 @@ int __cpu_disable(void) return 0; } -void __cpu_die(unsigned int cpu) +void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) { - if (!cpu_wait_death(cpu, 5)) { - pr_crit("CPU%u: shutdown failed\n", cpu); - return; - } pr_notice("CPU%u: shutdown\n", cpu); } @@ -304,7 +300,7 @@ void __noreturn arch_cpu_idle_dead(void) { idle_task_exit(); - cpu_report_death(); + cpuhp_ap_report_dead(); while (!secondary_stack) arch_cpu_idle(); -- cgit From c8d2bcc467c8a1a85983c24e0331cf19fe94668f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 12 May 2023 23:07:37 +0200 Subject: MIPS: SMP_CPS: Switch to hotplug core state synchronization Switch to the CPU hotplug core state tracking and synchronization mechanim. This unfortunately requires to add dead reporting to the non CPS platforms as CPS is the only user, but it allows an overall consolidation of this functionality. No functional change intended. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Tested-by: Michael Kelley Tested-by: Oleksandr Natalenko Tested-by: Helge Deller # parisc Tested-by: Guilherme G. Piccoli # Steam Deck Link: https://lore.kernel.org/r/20230512205256.803238859@linutronix.de --- arch/mips/Kconfig | 1 + arch/mips/cavium-octeon/smp.c | 1 + arch/mips/include/asm/smp-ops.h | 1 + arch/mips/kernel/smp-bmips.c | 1 + arch/mips/kernel/smp-cps.c | 14 +++++--------- arch/mips/kernel/smp.c | 8 ++++++++ arch/mips/loongson64/smp.c | 1 + 7 files changed, 18 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index c2f5498d207f..30e90a2d53f4 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -2285,6 +2285,7 @@ config MIPS_CPS select MIPS_CM select MIPS_CPS_PM if HOTPLUG_CPU select SMP + select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU select SYNC_R4K if (CEVT_R4K || CSRC_R4K) select SYS_SUPPORTS_HOTPLUG_CPU select SYS_SUPPORTS_SCHED_SMT if CPU_MIPSR6 diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c index 4212584e6efa..33c09688210f 100644 --- a/arch/mips/cavium-octeon/smp.c +++ b/arch/mips/cavium-octeon/smp.c @@ -345,6 +345,7 @@ void play_dead(void) int cpu = cpu_number_map(cvmx_get_core_num()); idle_task_exit(); + cpuhp_ap_report_dead(); octeon_processor_boot = 0xff; per_cpu(cpu_state, cpu) = CPU_DEAD; diff --git a/arch/mips/include/asm/smp-ops.h b/arch/mips/include/asm/smp-ops.h index 0145bbfb5efb..5719ff49eff1 100644 --- a/arch/mips/include/asm/smp-ops.h +++ b/arch/mips/include/asm/smp-ops.h @@ -33,6 +33,7 @@ struct plat_smp_ops { #ifdef CONFIG_HOTPLUG_CPU int (*cpu_disable)(void); void (*cpu_die)(unsigned int cpu); + void (*cleanup_dead_cpu)(unsigned cpu); #endif #ifdef CONFIG_KEXEC void (*kexec_nonboot_cpu)(void); diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c index 15466d4cf4a0..c074ecce3fbf 100644 --- a/arch/mips/kernel/smp-bmips.c +++ b/arch/mips/kernel/smp-bmips.c @@ -392,6 +392,7 @@ static void bmips_cpu_die(unsigned int cpu) void __ref play_dead(void) { idle_task_exit(); + cpuhp_ap_report_dead(); /* flush data cache */ _dma_cache_wback_inv(0, ~0); diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c index 62f677b2306f..d7fdbec232da 100644 --- a/arch/mips/kernel/smp-cps.c +++ b/arch/mips/kernel/smp-cps.c @@ -503,8 +503,7 @@ void play_dead(void) } } - /* This CPU has chosen its way out */ - (void)cpu_report_death(); + cpuhp_ap_report_dead(); cps_shutdown_this_cpu(cpu_death); @@ -527,7 +526,9 @@ static void wait_for_sibling_halt(void *ptr_cpu) } while (!(halted & TCHALT_H)); } -static void cps_cpu_die(unsigned int cpu) +static void cps_cpu_die(unsigned int cpu) { } + +static void cps_cleanup_dead_cpu(unsigned cpu) { unsigned core = cpu_core(&cpu_data[cpu]); unsigned int vpe_id = cpu_vpe_id(&cpu_data[cpu]); @@ -535,12 +536,6 @@ static void cps_cpu_die(unsigned int cpu) unsigned stat; int err; - /* Wait for the cpu to choose its way out */ - if (!cpu_wait_death(cpu, 5)) { - pr_err("CPU%u: didn't offline\n", cpu); - return; - } - /* * Now wait for the CPU to actually offline. Without doing this that * offlining may race with one or more of: @@ -624,6 +619,7 @@ static const struct plat_smp_ops cps_smp_ops = { #ifdef CONFIG_HOTPLUG_CPU .cpu_disable = cps_cpu_disable, .cpu_die = cps_cpu_die, + .cleanup_dead_cpu = cps_cleanup_dead_cpu, #endif #ifdef CONFIG_KEXEC .kexec_nonboot_cpu = cps_kexec_nonboot_cpu, diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index 1d93b85271ba..90c71d800b59 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -690,6 +690,14 @@ void flush_tlb_one(unsigned long vaddr) EXPORT_SYMBOL(flush_tlb_page); EXPORT_SYMBOL(flush_tlb_one); +#ifdef CONFIG_HOTPLUG_CORE_SYNC_DEAD +void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) +{ + if (mp_ops->cleanup_dead_cpu) + mp_ops->cleanup_dead_cpu(cpu); +} +#endif + #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST static void tick_broadcast_callee(void *info) diff --git a/arch/mips/loongson64/smp.c b/arch/mips/loongson64/smp.c index b0e8bb9fa036..cdecd7af11a6 100644 --- a/arch/mips/loongson64/smp.c +++ b/arch/mips/loongson64/smp.c @@ -775,6 +775,7 @@ void play_dead(void) void (*play_dead_at_ckseg1)(int *); idle_task_exit(); + cpuhp_ap_report_dead(); prid_imp = read_c0_prid() & PRID_IMP_MASK; prid_rev = read_c0_prid() & PRID_REV_MASK; -- cgit From 51e0efe1f26ec900ad80025b9faa2bec9575f1b2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 12 May 2023 23:07:38 +0200 Subject: parisc: Switch to hotplug core state synchronization Switch to the CPU hotplug core state tracking and synchronization mechanim. No functional change intended. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Tested-by: Michael Kelley Tested-by: Oleksandr Natalenko Tested-by: Helge Deller # parisc Tested-by: Guilherme G. Piccoli # Steam Deck Link: https://lore.kernel.org/r/20230512205256.859920443@linutronix.de --- arch/parisc/Kconfig | 1 + arch/parisc/kernel/process.c | 4 ++-- arch/parisc/kernel/smp.c | 7 +++---- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 466a25525364..67a3f98a2c5e 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -57,6 +57,7 @@ config PARISC select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_REGS_AND_STACK_ACCESS_API + select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU select GENERIC_SCHED_CLOCK select GENERIC_IRQ_MIGRATION if SMP select HAVE_UNSTABLE_SCHED_CLOCK if SMP diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 97c6f875bd0e..66f6543417b7 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -166,8 +166,8 @@ void __noreturn arch_cpu_idle_dead(void) local_irq_disable(); - /* Tell __cpu_die() that this CPU is now safe to dispose of. */ - (void)cpu_report_death(); + /* Tell the core that this CPU is now safe to dispose of. */ + cpuhp_ap_report_dead(); /* Ensure that the cache lines are written out. */ flush_cache_all_local(); diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c index b7fc859fa87d..6b6eaa485946 100644 --- a/arch/parisc/kernel/smp.c +++ b/arch/parisc/kernel/smp.c @@ -500,11 +500,10 @@ int __cpu_disable(void) void __cpu_die(unsigned int cpu) { pdc_cpu_rendezvous_lock(); +} - if (!cpu_wait_death(cpu, 5)) { - pr_crit("CPU%u: cpu didn't die\n", cpu); - return; - } +void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) +{ pr_info("CPU%u: is shutting down\n", cpu); /* set task's state to interruptible sleep */ -- cgit From 72b11aa7f8f93449141544cecb21b2963416902d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 12 May 2023 23:07:40 +0200 Subject: riscv: Switch to hotplug core state synchronization Switch to the CPU hotplug core state tracking and synchronization mechanim. No functional change intended. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Acked-by: Palmer Dabbelt Tested-by: Michael Kelley Tested-by: Oleksandr Natalenko Tested-by: Helge Deller # parisc Tested-by: Guilherme G. Piccoli # Steam Deck Link: https://lore.kernel.org/r/20230512205256.916055844@linutronix.de --- arch/riscv/Kconfig | 1 + arch/riscv/include/asm/smp.h | 2 +- arch/riscv/kernel/cpu-hotplug.c | 14 +++++++------- 3 files changed, 9 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 348c0fa1fc8c..13f058490608 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -122,6 +122,7 @@ config RISCV select HAVE_RSEQ select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS + select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU select IRQ_DOMAIN select IRQ_FORCED_THREADING select KASAN_VMALLOC if KASAN diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h index c4b77017ec58..0d555847cde6 100644 --- a/arch/riscv/include/asm/smp.h +++ b/arch/riscv/include/asm/smp.h @@ -70,7 +70,7 @@ asmlinkage void smp_callin(void); #if defined CONFIG_HOTPLUG_CPU int __cpu_disable(void); -void __cpu_die(unsigned int cpu); +static inline void __cpu_die(unsigned int cpu) { } #endif /* CONFIG_HOTPLUG_CPU */ #else diff --git a/arch/riscv/kernel/cpu-hotplug.c b/arch/riscv/kernel/cpu-hotplug.c index a941adc7cbf2..457a18efcb11 100644 --- a/arch/riscv/kernel/cpu-hotplug.c +++ b/arch/riscv/kernel/cpu-hotplug.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -49,17 +50,15 @@ int __cpu_disable(void) return ret; } +#ifdef CONFIG_HOTPLUG_CPU /* - * Called on the thread which is asking for a CPU to be shutdown. + * Called on the thread which is asking for a CPU to be shutdown, if the + * CPU reported dead to the hotplug core. */ -void __cpu_die(unsigned int cpu) +void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) { int ret = 0; - if (!cpu_wait_death(cpu, 5)) { - pr_err("CPU %u: didn't die\n", cpu); - return; - } pr_notice("CPU%u: off\n", cpu); /* Verify from the firmware if the cpu is really stopped*/ @@ -76,9 +75,10 @@ void __noreturn arch_cpu_idle_dead(void) { idle_task_exit(); - (void)cpu_report_death(); + cpuhp_ap_report_dead(); cpu_ops[smp_processor_id()]->cpu_stop(); /* It should never reach here */ BUG(); } +#endif -- cgit From a631be92b996c5db9b368e8b96305d22fb8c4180 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 12 May 2023 23:07:45 +0200 Subject: cpu/hotplug: Provide a split up CPUHP_BRINGUP mechanism The bring up logic of a to be onlined CPU consists of several parts, which are considered to be a single hotplug state: 1) Control CPU issues the wake-up 2) To be onlined CPU starts up, does the minimal initialization, reports to be alive and waits for release into the complete bring-up. 3) Control CPU waits for the alive report and releases the upcoming CPU for the complete bring-up. Allow to split this into two states: 1) Control CPU issues the wake-up After that the to be onlined CPU starts up, does the minimal initialization, reports to be alive and waits for release into the full bring-up. As this can run after the control CPU dropped the hotplug locks the code which is executed on the AP before it reports alive has to be carefully audited to not violate any of the hotplug constraints, especially not modifying any of the various cpumasks. This is really only meant to avoid waiting for the AP to react on the wake-up. Of course an architecture can move strict CPU related setup functionality, e.g. microcode loading, with care before the synchronization point to save further pointless waiting time. 2) Control CPU waits for the alive report and releases the upcoming CPU for the complete bring-up. This allows that the two states can be split up to run all to be onlined CPUs up to state #1 on the control CPU and then at a later point run state #2. This spares some of the latencies of the full serialized per CPU bringup by avoiding the per CPU wakeup/wait serialization. The assumption is that the first AP already waits when the last AP has been woken up. This obvioulsy depends on the hardware latencies and depending on the timings this might still not completely eliminate all wait scenarios. This split is just a preparatory step for enabling the parallel bringup later. The boot time bringup is still fully serialized. It has a separate config switch so that architectures which want to support parallel bringup can test the split of the CPUHP_BRINGUG step separately. To enable this the architecture must support the CPU hotplug core sync mechanism and has to be audited that there are no implicit hotplug state dependencies which require a fully serialized bringup. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Tested-by: Michael Kelley Tested-by: Oleksandr Natalenko Tested-by: Helge Deller # parisc Tested-by: Guilherme G. Piccoli # Steam Deck Link: https://lore.kernel.org/r/20230512205257.080801387@linutronix.de --- arch/Kconfig | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/Kconfig b/arch/Kconfig index f55c5fcbea38..d3015a61c148 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -49,6 +49,10 @@ config HOTPLUG_CORE_SYNC_FULL select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU select HOTPLUG_CORE_SYNC +config HOTPLUG_SPLIT_STARTUP + bool + select HOTPLUG_CORE_SYNC_FULL + config GENERIC_ENTRY bool -- cgit From 8b5a0f957cc5ca1f68486163d4da4683be3b47aa Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 12 May 2023 23:07:46 +0200 Subject: x86/smpboot: Enable split CPU startup The x86 CPU bringup state currently does AP wake-up, wait for AP to respond and then release it for full bringup. It is safe to be split into a wake-up and and a separate wait+release state. Provide the required functions and enable the split CPU bringup, which prepares for parallel bringup, where the bringup of the non-boot CPUs takes two iterations: One to prepare and wake all APs and the second to wait and release them. Depending on timing this can eliminate the wait time completely. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Tested-by: Michael Kelley Tested-by: Oleksandr Natalenko Tested-by: Helge Deller # parisc Tested-by: Guilherme G. Piccoli # Steam Deck Link: https://lore.kernel.org/r/20230512205257.133453992@linutronix.de --- arch/x86/Kconfig | 2 +- arch/x86/include/asm/smp.h | 9 ++------- arch/x86/kernel/smp.c | 2 +- arch/x86/kernel/smpboot.c | 8 ++++---- arch/x86/xen/smp_pv.c | 4 ++-- 5 files changed, 10 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index adb1ec8503d0..c140a73b4295 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -274,8 +274,8 @@ config X86 select HAVE_UNSTABLE_SCHED_CLOCK select HAVE_USER_RETURN_NOTIFIER select HAVE_GENERIC_VDSO - select HOTPLUG_CORE_SYNC_FULL if SMP select HOTPLUG_SMT if SMP + select HOTPLUG_SPLIT_STARTUP if SMP select IRQ_FORCED_THREADING select NEED_PER_CPU_EMBED_FIRST_CHUNK select NEED_PER_CPU_PAGE_FIRST_CHUNK diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 693c9997c069..726c2a243eb0 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -40,7 +40,7 @@ struct smp_ops { void (*cleanup_dead_cpu)(unsigned cpu); void (*poll_sync_state)(void); - int (*cpu_up)(unsigned cpu, struct task_struct *tidle); + int (*kick_ap_alive)(unsigned cpu, struct task_struct *tidle); int (*cpu_disable)(void); void (*cpu_die)(unsigned int cpu); void (*play_dead)(void); @@ -80,11 +80,6 @@ static inline void smp_cpus_done(unsigned int max_cpus) smp_ops.smp_cpus_done(max_cpus); } -static inline int __cpu_up(unsigned int cpu, struct task_struct *tidle) -{ - return smp_ops.cpu_up(cpu, tidle); -} - static inline int __cpu_disable(void) { return smp_ops.cpu_disable(); @@ -124,7 +119,7 @@ void native_smp_prepare_cpus(unsigned int max_cpus); void calculate_max_logical_packages(void); void native_smp_cpus_done(unsigned int max_cpus); int common_cpu_up(unsigned int cpunum, struct task_struct *tidle); -int native_cpu_up(unsigned int cpunum, struct task_struct *tidle); +int native_kick_ap(unsigned int cpu, struct task_struct *tidle); int native_cpu_disable(void); void __noreturn hlt_play_dead(void); void native_play_dead(void); diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 4f6375b4ba9a..0d54115c3859 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -268,7 +268,7 @@ struct smp_ops smp_ops = { #endif .smp_send_reschedule = native_smp_send_reschedule, - .cpu_up = native_cpu_up, + .kick_ap_alive = native_kick_ap, .cpu_disable = native_cpu_disable, .play_dead = native_play_dead, diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 59386419b4b1..a27941ac528c 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1052,7 +1052,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) return ret; } -static int native_kick_ap(unsigned int cpu, struct task_struct *tidle) +int native_kick_ap(unsigned int cpu, struct task_struct *tidle) { int apicid = apic->cpu_present_to_apicid(cpu); int err; @@ -1088,15 +1088,15 @@ static int native_kick_ap(unsigned int cpu, struct task_struct *tidle) return err; } -int native_cpu_up(unsigned int cpu, struct task_struct *tidle) +int arch_cpuhp_kick_ap_alive(unsigned int cpu, struct task_struct *tidle) { - return native_kick_ap(cpu, tidle); + return smp_ops.kick_ap_alive(cpu, tidle); } void arch_cpuhp_cleanup_kick_cpu(unsigned int cpu) { /* Cleanup possible dangling ends... */ - if (smp_ops.cpu_up == native_cpu_up && x86_platform.legacy.warm_reset) + if (smp_ops.kick_ap_alive == native_kick_ap && x86_platform.legacy.warm_reset) smpboot_restore_warm_reset_vector(); } diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index b089fb1cefc6..a92e8002b5cf 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -314,7 +314,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) return 0; } -static int xen_pv_cpu_up(unsigned int cpu, struct task_struct *idle) +static int xen_pv_kick_ap(unsigned int cpu, struct task_struct *idle) { int rc; @@ -438,7 +438,7 @@ static const struct smp_ops xen_smp_ops __initconst = { .smp_prepare_cpus = xen_pv_smp_prepare_cpus, .smp_cpus_done = xen_smp_cpus_done, - .cpu_up = xen_pv_cpu_up, + .kick_ap_alive = xen_pv_kick_ap, .cpu_die = xen_pv_cpu_die, .cleanup_dead_cpu = xen_pv_cleanup_dead_cpu, .poll_sync_state = xen_pv_poll_sync_state, -- cgit From f54d4434c281f38b975d58de47adeca671beff4f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 12 May 2023 23:07:48 +0200 Subject: x86/apic: Provide cpu_primary_thread mask Make the primary thread tracking CPU mask based in preparation for simpler handling of parallel bootup. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Tested-by: Michael Kelley Tested-by: Oleksandr Natalenko Tested-by: Helge Deller # parisc Tested-by: Guilherme G. Piccoli # Steam Deck Link: https://lore.kernel.org/r/20230512205257.186599880@linutronix.de --- arch/x86/include/asm/apic.h | 2 -- arch/x86/include/asm/topology.h | 19 +++++++++++++++---- arch/x86/kernel/apic/apic.c | 20 +++++++++----------- arch/x86/kernel/smpboot.c | 12 +++--------- 4 files changed, 27 insertions(+), 26 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index dc50ed7db447..030f5fb1daaf 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -506,10 +506,8 @@ extern int default_check_phys_apicid_present(int phys_apicid); #endif /* CONFIG_X86_LOCAL_APIC */ #ifdef CONFIG_SMP -bool apic_id_is_primary_thread(unsigned int id); void apic_smt_update(void); #else -static inline bool apic_id_is_primary_thread(unsigned int id) { return false; } static inline void apic_smt_update(void) { } #endif diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 2ba57588e937..caf41c4869a0 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -31,9 +31,9 @@ * CONFIG_NUMA. */ #include +#include #ifdef CONFIG_NUMA -#include #include #include @@ -139,9 +139,20 @@ static inline int topology_max_smt_threads(void) int topology_update_package_map(unsigned int apicid, unsigned int cpu); int topology_update_die_map(unsigned int dieid, unsigned int cpu); int topology_phys_to_logical_pkg(unsigned int pkg); -bool topology_is_primary_thread(unsigned int cpu); bool topology_smt_supported(void); -#else + +extern struct cpumask __cpu_primary_thread_mask; +#define cpu_primary_thread_mask ((const struct cpumask *)&__cpu_primary_thread_mask) + +/** + * topology_is_primary_thread - Check whether CPU is the primary SMT thread + * @cpu: CPU to check + */ +static inline bool topology_is_primary_thread(unsigned int cpu) +{ + return cpumask_test_cpu(cpu, cpu_primary_thread_mask); +} +#else /* CONFIG_SMP */ #define topology_max_packages() (1) static inline int topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; } @@ -152,7 +163,7 @@ static inline int topology_max_die_per_package(void) { return 1; } static inline int topology_max_smt_threads(void) { return 1; } static inline bool topology_is_primary_thread(unsigned int cpu) { return true; } static inline bool topology_smt_supported(void) { return false; } -#endif +#endif /* !CONFIG_SMP */ static inline void arch_fix_phys_package_id(int num, u32 slot) { diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 770557110051..e17600d3e285 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2386,20 +2386,16 @@ bool arch_match_cpu_phys_id(int cpu, u64 phys_id) } #ifdef CONFIG_SMP -/** - * apic_id_is_primary_thread - Check whether APIC ID belongs to a primary thread - * @apicid: APIC ID to check - */ -bool apic_id_is_primary_thread(unsigned int apicid) +static void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { - u32 mask; - - if (smp_num_siblings == 1) - return true; /* Isolate the SMT bit(s) in the APICID and check for 0 */ - mask = (1U << (fls(smp_num_siblings) - 1)) - 1; - return !(apicid & mask); + u32 mask = (1U << (fls(smp_num_siblings) - 1)) - 1; + + if (smp_num_siblings == 1 || !(apicid & mask)) + cpumask_set_cpu(cpu, &__cpu_primary_thread_mask); } +#else +static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { } #endif /* @@ -2544,6 +2540,8 @@ int generic_processor_info(int apicid, int version) set_cpu_present(cpu, true); num_processors++; + cpu_mark_primary_thread(cpu, apicid); + return cpu; } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index a27941ac528c..51122f0ba3f6 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -102,6 +102,9 @@ EXPORT_PER_CPU_SYMBOL(cpu_die_map); DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); EXPORT_PER_CPU_SYMBOL(cpu_info); +/* CPUs which are the primary SMT threads */ +struct cpumask __cpu_primary_thread_mask __read_mostly; + /* Representing CPUs for which sibling maps can be computed */ static cpumask_var_t cpu_sibling_setup_mask; @@ -276,15 +279,6 @@ static void notrace start_secondary(void *unused) cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } -/** - * topology_is_primary_thread - Check whether CPU is the primary SMT thread - * @cpu: CPU to check - */ -bool topology_is_primary_thread(unsigned int cpu) -{ - return apic_id_is_primary_thread(per_cpu(x86_cpu_to_apicid, cpu)); -} - /** * topology_smt_supported - Check whether SMT is supported by the CPUs */ -- cgit From 18415f33e2ac4ab382cbca8b5ff82a9036b5bd49 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 12 May 2023 23:07:50 +0200 Subject: cpu/hotplug: Allow "parallel" bringup up to CPUHP_BP_KICK_AP_STATE There is often significant latency in the early stages of CPU bringup, and time is wasted by waking each CPU (e.g. with SIPI/INIT/INIT on x86) and then waiting for it to respond before moving on to the next. Allow a platform to enable parallel setup which brings all to be onlined CPUs up to the CPUHP_BP_KICK_AP state. While this state advancement on the control CPU (BP) is single-threaded the important part is the last state CPUHP_BP_KICK_AP which wakes the to be onlined CPUs up. This allows the CPUs to run up to the first sychronization point cpuhp_ap_sync_alive() where they wait for the control CPU to release them one by one for the full onlining procedure. This parallelism depends on the CPU hotplug core sync mechanism which ensures that the parallel brought up CPUs wait for release before touching any state which would make the CPU visible to anything outside the hotplug control mechanism. To handle the SMT constraints of X86 correctly the bringup happens in two iterations when CONFIG_HOTPLUG_SMT is enabled. The control CPU brings up the primary SMT threads of each core first, which can load the microcode without the need to rendevouz with the thread siblings. Once that's completed it brings up the secondary SMT threads. Co-developed-by: David Woodhouse Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Tested-by: Michael Kelley Tested-by: Oleksandr Natalenko Tested-by: Helge Deller # parisc Tested-by: Guilherme G. Piccoli # Steam Deck Link: https://lore.kernel.org/r/20230512205257.240231377@linutronix.de --- arch/Kconfig | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/Kconfig b/arch/Kconfig index d3015a61c148..64d771855ecd 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -53,6 +53,10 @@ config HOTPLUG_SPLIT_STARTUP bool select HOTPLUG_CORE_SYNC_FULL +config HOTPLUG_PARALLEL + bool + select HOTPLUG_SPLIT_STARTUP + config GENERIC_ENTRY bool -- cgit From bea629d57d006733d155bdb65ba4867788da69b6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 12 May 2023 23:07:51 +0200 Subject: x86/apic: Save the APIC virtual base address For parallel CPU brinugp it's required to read the APIC ID in the low level startup code. The virtual APIC base address is a constant because its a fix-mapped address. Exposing that constant which is composed via macros to assembly code is non-trivial due to header inclusion hell. Aside of that it's constant only because of the vsyscall ABI requirement. Once vsyscall is out of the picture the fixmap can be placed at runtime. Avoid header hell, stay flexible and store the address in a variable which can be exposed to the low level startup code. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Tested-by: Michael Kelley Tested-by: Oleksandr Natalenko Tested-by: Helge Deller # parisc Tested-by: Guilherme G. Piccoli # Steam Deck Link: https://lore.kernel.org/r/20230512205257.299231005@linutronix.de --- arch/x86/include/asm/smp.h | 1 + arch/x86/kernel/apic/apic.c | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 726c2a243eb0..c6d5b65b7a47 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -196,6 +196,7 @@ extern void nmi_selftest(void); #endif extern unsigned int smpboot_control; +extern unsigned long apic_mmio_base; #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index e17600d3e285..d3f6c18cd3ec 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -101,6 +101,9 @@ static int apic_extnmi __ro_after_init = APIC_EXTNMI_BSP; */ static bool virt_ext_dest_id __ro_after_init; +/* For parallel bootup. */ +unsigned long apic_mmio_base __ro_after_init; + /* * Map cpu index to physical APIC ID */ @@ -2163,6 +2166,7 @@ void __init register_lapic_address(unsigned long address) if (!x2apic_mode) { set_fixmap_nocache(FIX_APIC_BASE, address); + apic_mmio_base = APIC_BASE; apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", APIC_BASE, address); } -- cgit From f6f1ae9128d2a080ecdd55f85e8a0ca3ed1d58eb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 12 May 2023 23:07:53 +0200 Subject: x86/smpboot: Implement a bit spinlock to protect the realmode stack Parallel AP bringup requires that the APs can run fully parallel through the early startup code including the real mode trampoline. To prepare for this implement a bit-spinlock to serialize access to the real mode stack so that parallel upcoming APs are not going to corrupt each others stack while going through the real mode startup code. Co-developed-by: David Woodhouse Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Tested-by: Michael Kelley Tested-by: Oleksandr Natalenko Tested-by: Helge Deller # parisc Tested-by: Guilherme G. Piccoli # Steam Deck Link: https://lore.kernel.org/r/20230512205257.355425551@linutronix.de --- arch/x86/include/asm/realmode.h | 3 +++ arch/x86/kernel/head_64.S | 12 ++++++++++++ arch/x86/realmode/init.c | 3 +++ arch/x86/realmode/rm/trampoline_64.S | 23 ++++++++++++++++++----- 4 files changed, 36 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h index f6a1737c77be..87e5482acd0d 100644 --- a/arch/x86/include/asm/realmode.h +++ b/arch/x86/include/asm/realmode.h @@ -52,6 +52,7 @@ struct trampoline_header { u64 efer; u32 cr4; u32 flags; + u32 lock; #endif }; @@ -64,6 +65,8 @@ extern unsigned long initial_stack; extern unsigned long initial_vc_handler; #endif +extern u32 *trampoline_lock; + extern unsigned char real_mode_blob[]; extern unsigned char real_mode_relocs[]; diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 8458033bb9f1..f99e9ab6bd26 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -251,6 +251,16 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) movq pcpu_hot + X86_current_task(%rdx), %rax movq TASK_threadsp(%rax), %rsp + /* + * Now that this CPU is running on its own stack, drop the realmode + * protection. For the boot CPU the pointer is NULL! + */ + movq trampoline_lock(%rip), %rax + testq %rax, %rax + jz .Lsetup_gdt + movl $0, (%rax) + +.Lsetup_gdt: /* * We must switch to a new descriptor in kernel space for the GDT * because soon the kernel won't have access anymore to the userspace @@ -433,6 +443,8 @@ SYM_DATA(initial_code, .quad x86_64_start_kernel) #ifdef CONFIG_AMD_MEM_ENCRYPT SYM_DATA(initial_vc_handler, .quad handle_vc_boot_ghcb) #endif + +SYM_DATA(trampoline_lock, .quad 0); __FINITDATA __INIT diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c index af565816d2ba..788e5559549f 100644 --- a/arch/x86/realmode/init.c +++ b/arch/x86/realmode/init.c @@ -154,6 +154,9 @@ static void __init setup_real_mode(void) trampoline_header->flags = 0; + trampoline_lock = &trampoline_header->lock; + *trampoline_lock = 0; + trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd); /* Map the real mode stub as virtual == physical */ diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S index e38d61d6562e..4822ad2a5e89 100644 --- a/arch/x86/realmode/rm/trampoline_64.S +++ b/arch/x86/realmode/rm/trampoline_64.S @@ -37,6 +37,20 @@ .text .code16 +.macro LOAD_REALMODE_ESP + /* + * Make sure only one CPU fiddles with the realmode stack + */ +.Llock_rm\@: + lock btsl $0, tr_lock + jnc 2f + pause + jmp .Llock_rm\@ +2: + # Setup stack + movl $rm_stack_end, %esp +.endm + .balign PAGE_SIZE SYM_CODE_START(trampoline_start) cli # We should be safe anyway @@ -49,8 +63,7 @@ SYM_CODE_START(trampoline_start) mov %ax, %es mov %ax, %ss - # Setup stack - movl $rm_stack_end, %esp + LOAD_REALMODE_ESP call verify_cpu # Verify the cpu supports long mode testl %eax, %eax # Check for return code @@ -93,8 +106,7 @@ SYM_CODE_START(sev_es_trampoline_start) mov %ax, %es mov %ax, %ss - # Setup stack - movl $rm_stack_end, %esp + LOAD_REALMODE_ESP jmp .Lswitch_to_protected SYM_CODE_END(sev_es_trampoline_start) @@ -177,7 +189,7 @@ SYM_CODE_START(pa_trampoline_compat) * In compatibility mode. Prep ESP and DX for startup_32, then disable * paging and complete the switch to legacy 32-bit mode. */ - movl $rm_stack_end, %esp + LOAD_REALMODE_ESP movw $__KERNEL_DS, %dx movl $(CR0_STATE & ~X86_CR0_PG), %eax @@ -241,6 +253,7 @@ SYM_DATA_START(trampoline_header) SYM_DATA(tr_efer, .space 8) SYM_DATA(tr_cr4, .space 4) SYM_DATA(tr_flags, .space 4) + SYM_DATA(tr_lock, .space 4) SYM_DATA_END(trampoline_header) #include "trampoline_common.S" -- cgit From 7e75178a0950c5ceffa2ca3225701b69752f7d3a Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 12 May 2023 23:07:55 +0200 Subject: x86/smpboot: Support parallel startup of secondary CPUs In parallel startup mode the APs are kicked alive by the control CPU quickly after each other and run through the early startup code in parallel. The real-mode startup code is already serialized with a bit-spinlock to protect the real-mode stack. In parallel startup mode the smpboot_control variable obviously cannot contain the Linux CPU number so the APs have to determine their Linux CPU number on their own. This is required to find the CPUs per CPU offset in order to find the idle task stack and other per CPU data. To achieve this, export the cpuid_to_apicid[] array so that each AP can find its own CPU number by searching therein based on its APIC ID. Introduce a flag in the top bits of smpboot_control which indicates that the AP should find its CPU number by reading the APIC ID from the APIC. This is required because CPUID based APIC ID retrieval can only provide the initial APIC ID, which might have been overruled by the firmware. Some AMD APUs come up with APIC ID = initial APIC ID + 0x10, so the APIC ID to CPU number lookup would fail miserably if based on CPUID. Also virtualization can make its own APIC ID assignements. The only requirement is that the APIC IDs are consistent with the APCI/MADT table. For the boot CPU or in case parallel bringup is disabled the control bits are empty and the CPU number is directly available in bit 0-23 of smpboot_control. [ tglx: Initial proof of concept patch with bitlock and APIC ID lookup ] [ dwmw2: Rework and testing, commit message, CPUID 0x1 and CPU0 support ] [ seanc: Fix stray override of initial_gs in common_cpu_up() ] [ Oleksandr Natalenko: reported suspend/resume issue fixed in x86_acpi_suspend_lowlevel ] [ tglx: Make it read the APIC ID from the APIC instead of using CPUID, split the bitlock part out ] Co-developed-by: Thomas Gleixner Co-developed-by: Brian Gerst Signed-off-by: Thomas Gleixner Signed-off-by: Brian Gerst Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Tested-by: Michael Kelley Tested-by: Oleksandr Natalenko Tested-by: Helge Deller # parisc Tested-by: Guilherme G. Piccoli # Steam Deck Link: https://lore.kernel.org/r/20230512205257.411554373@linutronix.de --- arch/x86/include/asm/apic.h | 2 ++ arch/x86/include/asm/apicdef.h | 5 +++- arch/x86/include/asm/smp.h | 6 +++++ arch/x86/kernel/acpi/sleep.c | 9 ++++++- arch/x86/kernel/apic/apic.c | 2 +- arch/x86/kernel/head_64.S | 61 ++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/smpboot.c | 2 +- 7 files changed, 83 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 030f5fb1daaf..98c32aa5963a 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -55,6 +55,8 @@ extern int local_apic_timer_c2_ok; extern int disable_apic; extern unsigned int lapic_timer_period; +extern int cpuid_to_apicid[]; + extern enum apic_intr_mode_id apic_intr_mode; enum apic_intr_mode_id { APIC_PIC, diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index 68d213e83fcc..bf546dfb6e58 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h @@ -138,7 +138,8 @@ #define APIC_EILVT_MASKED (1 << 16) #define APIC_BASE (fix_to_virt(FIX_APIC_BASE)) -#define APIC_BASE_MSR 0x800 +#define APIC_BASE_MSR 0x800 +#define APIC_X2APIC_ID_MSR 0x802 #define XAPIC_ENABLE (1UL << 11) #define X2APIC_ENABLE (1UL << 10) @@ -162,6 +163,7 @@ #define APIC_CPUID(apicid) ((apicid) & XAPIC_DEST_CPUS_MASK) #define NUM_APIC_CLUSTERS ((BAD_APICID + 1) >> XAPIC_DEST_CPUS_SHIFT) +#ifndef __ASSEMBLY__ /* * the local APIC register structure, memory mapped. Not terribly well * tested, but we might eventually use this one in the future - the @@ -435,4 +437,5 @@ enum apic_delivery_modes { APIC_DELIVERY_MODE_EXTINT = 7, }; +#endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_APICDEF_H */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index c6d5b65b7a47..42060775a3d0 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -200,4 +200,10 @@ extern unsigned long apic_mmio_base; #endif /* !__ASSEMBLY__ */ +/* Control bits for startup_64 */ +#define STARTUP_READ_APICID 0x80000000 + +/* Top 8 bits are reserved for control */ +#define STARTUP_PARALLEL_MASK 0xFF000000 + #endif /* _ASM_X86_SMP_H */ diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 1328c221af30..6dfecb27b846 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include "../../realmode/rm/wakeup.h" @@ -127,7 +128,13 @@ int x86_acpi_suspend_lowlevel(void) * value is in the actual %rsp register. */ current->thread.sp = (unsigned long)temp_stack + sizeof(temp_stack); - smpboot_control = smp_processor_id(); + /* + * Ensure the CPU knows which one it is when it comes back, if + * it isn't in parallel mode and expected to work that out for + * itself. + */ + if (!(smpboot_control & STARTUP_PARALLEL_MASK)) + smpboot_control = smp_processor_id(); #endif initial_code = (unsigned long)wakeup_long64; saved_magic = 0x123456789abcdef0L; diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index d3f6c18cd3ec..209c5052556d 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2380,7 +2380,7 @@ static int nr_logical_cpuids = 1; /* * Used to store mapping between logical CPU IDs and APIC IDs. */ -static int cpuid_to_apicid[] = { +int cpuid_to_apicid[] = { [0 ... NR_CPUS - 1] = -1, }; diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index f99e9ab6bd26..9cd77d319555 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -24,7 +24,9 @@ #include "../entry/calling.h" #include #include +#include #include +#include /* * We are not able to switch in one step to the final KERNEL ADDRESS SPACE @@ -234,8 +236,67 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) ANNOTATE_NOENDBR // above #ifdef CONFIG_SMP + /* + * For parallel boot, the APIC ID is read from the APIC, and then + * used to look up the CPU number. For booting a single CPU, the + * CPU number is encoded in smpboot_control. + * + * Bit 31 STARTUP_READ_APICID (Read APICID from APIC) + * Bit 0-23 CPU# if STARTUP_xx flags are not set + */ movl smpboot_control(%rip), %ecx + testl $STARTUP_READ_APICID, %ecx + jnz .Lread_apicid + /* + * No control bit set, single CPU bringup. CPU number is provided + * in bit 0-23. This is also the boot CPU case (CPU number 0). + */ + andl $(~STARTUP_PARALLEL_MASK), %ecx + jmp .Lsetup_cpu + +.Lread_apicid: + /* Check whether X2APIC mode is already enabled */ + mov $MSR_IA32_APICBASE, %ecx + rdmsr + testl $X2APIC_ENABLE, %eax + jnz .Lread_apicid_msr + + /* Read the APIC ID from the fix-mapped MMIO space. */ + movq apic_mmio_base(%rip), %rcx + addq $APIC_ID, %rcx + movl (%rcx), %eax + shr $24, %eax + jmp .Llookup_AP + +.Lread_apicid_msr: + mov $APIC_X2APIC_ID_MSR, %ecx + rdmsr + +.Llookup_AP: + /* EAX contains the APIC ID of the current CPU */ + xorq %rcx, %rcx + leaq cpuid_to_apicid(%rip), %rbx + +.Lfind_cpunr: + cmpl (%rbx,%rcx,4), %eax + jz .Lsetup_cpu + inc %ecx +#ifdef CONFIG_FORCE_NR_CPUS + cmpl $NR_CPUS, %ecx +#else + cmpl nr_cpu_ids(%rip), %ecx +#endif + jb .Lfind_cpunr + + /* APIC ID not found in the table. Drop the trampoline lock and bail. */ + movq trampoline_lock(%rip), %rax + movl $0, (%rax) + +1: cli + hlt + jmp 1b +.Lsetup_cpu: /* Get the per cpu offset for the given CPU# which is in ECX */ movq __per_cpu_offset(,%rcx,8), %rdx #else diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 51122f0ba3f6..4b97373d82c8 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -996,7 +996,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) if (IS_ENABLED(CONFIG_X86_32)) { early_gdt_descr.address = (unsigned long)get_cpu_gdt_rw(cpu); initial_stack = idle->thread.sp; - } else { + } else if (!(smpboot_control & STARTUP_PARALLEL_MASK)) { smpboot_control = cpu; } -- cgit From 0c7ffa32dbd6b09a87fea4ad1de8b27145dfd9a6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 12 May 2023 23:07:56 +0200 Subject: x86/smpboot/64: Implement arch_cpuhp_init_parallel_bringup() and enable it Implement the validation function which tells the core code whether parallel bringup is possible. The only condition for now is that the kernel does not run in an encrypted guest as these will trap the RDMSR via #VC, which cannot be handled at that point in early startup. There was an earlier variant for AMD-SEV which used the GHBC protocol for retrieving the APIC ID via CPUID, but there is no guarantee that the initial APIC ID in CPUID is the same as the real APIC ID. There is no enforcement from the secure firmware and the hypervisor can assign APIC IDs as it sees fit as long as the ACPI/MADT table is consistent with that assignment. Unfortunately there is no RDMSR GHCB protocol at the moment, so enabling AMD-SEV guests for parallel startup needs some more thought. Intel-TDX provides a secure RDMSR hypercall, but supporting that is outside the scope of this change. Fixup announce_cpu() as e.g. on Hyper-V CPU1 is the secondary sibling of CPU0, which makes the @cpu == 1 logic in announce_cpu() fall apart. [ mikelley: Reported the announce_cpu() fallout Originally-by: David Woodhouse Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Tested-by: Michael Kelley Tested-by: Oleksandr Natalenko Tested-by: Helge Deller # parisc Tested-by: Guilherme G. Piccoli # Steam Deck Link: https://lore.kernel.org/r/20230512205257.467571745@linutronix.de --- arch/x86/Kconfig | 3 +- arch/x86/kernel/cpu/common.c | 6 +-- arch/x86/kernel/smpboot.c | 87 ++++++++++++++++++++++++++++++++++++-------- 3 files changed, 75 insertions(+), 21 deletions(-) (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c140a73b4295..953823fdf57b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -274,8 +274,9 @@ config X86 select HAVE_UNSTABLE_SCHED_CLOCK select HAVE_USER_RETURN_NOTIFIER select HAVE_GENERIC_VDSO + select HOTPLUG_PARALLEL if SMP && X86_64 select HOTPLUG_SMT if SMP - select HOTPLUG_SPLIT_STARTUP if SMP + select HOTPLUG_SPLIT_STARTUP if SMP && X86_32 select IRQ_FORCED_THREADING select NEED_PER_CPU_EMBED_FIRST_CHUNK select NEED_PER_CPU_PAGE_FIRST_CHUNK diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 640fd1802c72..7cc44ebead5a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -2128,11 +2128,7 @@ static inline void setup_getcpu(int cpu) } #ifdef CONFIG_X86_64 -static inline void ucode_cpu_init(int cpu) -{ - if (cpu) - load_ucode_ap(); -} +static inline void ucode_cpu_init(int cpu) { } static inline void tss_setup_ist(struct tss_struct *tss) { diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 4b97373d82c8..660709e94823 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -58,6 +58,7 @@ #include #include #include +#include #include #include @@ -75,7 +76,7 @@ #include #include #include -#include +#include #include #include #include @@ -128,7 +129,6 @@ int arch_update_cpu_topology(void) return retval; } - static unsigned int smpboot_warm_reset_vector_count; static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) @@ -226,16 +226,43 @@ static void notrace start_secondary(void *unused) */ cr4_init(); -#ifdef CONFIG_X86_32 - /* switch away from the initial page table */ - load_cr3(swapper_pg_dir); - __flush_tlb_all(); -#endif + /* + * 32-bit specific. 64-bit reaches this code with the correct page + * table established. Yet another historical divergence. + */ + if (IS_ENABLED(CONFIG_X86_32)) { + /* switch away from the initial page table */ + load_cr3(swapper_pg_dir); + __flush_tlb_all(); + } + cpu_init_exception_handling(); /* - * Synchronization point with the hotplug core. Sets the - * synchronization state to ALIVE and waits for the control CPU to + * 32-bit systems load the microcode from the ASM startup code for + * historical reasons. + * + * On 64-bit systems load it before reaching the AP alive + * synchronization point below so it is not part of the full per + * CPU serialized bringup part when "parallel" bringup is enabled. + * + * That's even safe when hyperthreading is enabled in the CPU as + * the core code starts the primary threads first and leaves the + * secondary threads waiting for SIPI. Loading microcode on + * physical cores concurrently is a safe operation. + * + * This covers both the Intel specific issue that concurrent + * microcode loading on SMT siblings must be prohibited and the + * vendor independent issue`that microcode loading which changes + * CPUID, MSRs etc. must be strictly serialized to maintain + * software state correctness. + */ + if (IS_ENABLED(CONFIG_X86_64)) + load_ucode_ap(); + + /* + * Synchronization point with the hotplug core. Sets this CPUs + * synchronization state to ALIVE and spin-waits for the control CPU to * release this CPU for further bringup. */ cpuhp_ap_sync_alive(); @@ -918,9 +945,9 @@ static int wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_ei /* reduce the number of lines printed when booting a large cpu count system */ static void announce_cpu(int cpu, int apicid) { + static int width, node_width, first = 1; static int current_node = NUMA_NO_NODE; int node = early_cpu_to_node(cpu); - static int width, node_width; if (!width) width = num_digits(num_possible_cpus()) + 1; /* + '#' sign */ @@ -928,10 +955,10 @@ static void announce_cpu(int cpu, int apicid) if (!node_width) node_width = num_digits(num_possible_nodes()) + 1; /* + '#' */ - if (cpu == 1) - printk(KERN_INFO "x86: Booting SMP configuration:\n"); - if (system_state < SYSTEM_RUNNING) { + if (first) + pr_info("x86: Booting SMP configuration:\n"); + if (node != current_node) { if (current_node > (-1)) pr_cont("\n"); @@ -942,11 +969,11 @@ static void announce_cpu(int cpu, int apicid) } /* Add padding for the BSP */ - if (cpu == 1) + if (first) pr_cont("%*s", width + 1, " "); + first = 0; pr_cont("%*s#%d", width - num_digits(cpu), " ", cpu); - } else pr_info("Booting Node %d Processor %d APIC 0x%x\n", node, cpu, apicid); @@ -1236,6 +1263,36 @@ void __init smp_prepare_cpus_common(void) set_cpu_sibling_map(0); } +#ifdef CONFIG_X86_64 +/* Establish whether parallel bringup can be supported. */ +bool __init arch_cpuhp_init_parallel_bringup(void) +{ + /* + * Encrypted guests require special handling. They enforce X2APIC + * mode but the RDMSR to read the APIC ID is intercepted and raises + * #VC or #VE which cannot be handled in the early startup code. + * + * AMD-SEV does not provide a RDMSR GHCB protocol so the early + * startup code cannot directly communicate with the secure + * firmware. The alternative solution to retrieve the APIC ID via + * CPUID(0xb), which is covered by the GHCB protocol, is not viable + * either because there is no enforcement of the CPUID(0xb) + * provided "initial" APIC ID to be the same as the real APIC ID. + * + * Intel-TDX has a secure RDMSR hypercall, but that needs to be + * implemented seperately in the low level startup ASM code. + */ + if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) { + pr_info("Parallel CPU startup disabled due to guest state encryption\n"); + return false; + } + + smpboot_control = STARTUP_READ_APICID; + pr_debug("Parallel CPU startup enabled: 0x%08x\n", smpboot_control); + return true; +} +#endif + /* * Prepare for SMP bootup. * @max_cpus: configured maximum number of CPUs, It is a legacy parameter -- cgit From 6a4be6984595b164b6f281c5b242dbdf1c06d528 Mon Sep 17 00:00:00 2001 From: Andrew Cooper Date: Mon, 22 May 2023 11:57:38 +0100 Subject: x86/apic: Fix use of X{,2}APIC_ENABLE in asm with older binutils "x86/smpboot: Support parallel startup of secondary CPUs" adds the first use of X2APIC_ENABLE in assembly, but older binutils don't tolerate the UL suffix. Switch to using BIT() instead. Fixes: 7e75178a0950 ("x86/smpboot: Support parallel startup of secondary CPUs") Reported-by: Jeffrey Hugo Signed-off-by: Andrew Cooper Signed-off-by: Thomas Gleixner Tested-by: Jeffrey Hugo Link: https://lore.kernel.org/r/20230522105738.2378364-1-andrew.cooper3@citrix.com --- arch/x86/include/asm/apicdef.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index bf546dfb6e58..4b125e5b3187 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_APICDEF_H #define _ASM_X86_APICDEF_H +#include + /* * Constants for various Intel APICs. (local APIC, IOAPIC, etc.) * @@ -140,8 +142,8 @@ #define APIC_BASE (fix_to_virt(FIX_APIC_BASE)) #define APIC_BASE_MSR 0x800 #define APIC_X2APIC_ID_MSR 0x802 -#define XAPIC_ENABLE (1UL << 11) -#define X2APIC_ENABLE (1UL << 10) +#define XAPIC_ENABLE BIT(11) +#define X2APIC_ENABLE BIT(10) #ifdef CONFIG_X86_32 # define MAX_IO_APICS 64 -- cgit From 5da80b28bf25c3458c7beb23794ff53622ce7eb4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 26 May 2023 21:38:47 +0200 Subject: x86/smp: Initialize cpu_primary_thread_mask late Marking primary threads in the cpumask during early boot is only correct in certain configurations, but broken e.g. for the legacy hyperthreading detection. This is due to the complete mess in the CPUID evaluation code which initializes smp_num_siblings only half during early init and fixes it up later when identify_boot_cpu() is invoked. So using smp_num_siblings before identify_boot_cpu() leads to incorrect results. Fixing the early CPU init code to provide the proper data is a larger scale surgery as the code has dependencies on data structures which are not initialized during early boot. Move the initialization of cpu_primary_thread_mask wich depends on smp_num_siblings being correct to an early initcall so that it is set up correctly before SMP bringup. Fixes: f54d4434c281 ("x86/apic: Provide cpu_primary_thread mask") Reported-by: "Kirill A. Shutemov" Signed-off-by: Thomas Gleixner Tested-by: Kirill A. Shutemov Link: https://lore.kernel.org/r/87sfbhlwp9.ffs@tglx --- arch/x86/kernel/apic/apic.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 209c5052556d..af49e24b46a4 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2398,6 +2398,21 @@ static void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) if (smp_num_siblings == 1 || !(apicid & mask)) cpumask_set_cpu(cpu, &__cpu_primary_thread_mask); } + +/* + * Due to the utter mess of CPUID evaluation smp_num_siblings is not valid + * during early boot. Initialize the primary thread mask before SMP + * bringup. + */ +static int __init smp_init_primary_thread_mask(void) +{ + unsigned int cpu; + + for (cpu = 0; cpu < nr_logical_cpuids; cpu++) + cpu_mark_primary_thread(cpu, cpuid_to_apicid[cpu]); + return 0; +} +early_initcall(smp_init_primary_thread_mask); #else static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { } #endif @@ -2544,7 +2559,8 @@ int generic_processor_info(int apicid, int version) set_cpu_present(cpu, true); num_processors++; - cpu_mark_primary_thread(cpu, apicid); + if (system_state != SYSTEM_BOOTING) + cpu_mark_primary_thread(cpu, apicid); return cpu; } -- cgit From 33e20b07bec4991c169e3c6ff28c2126583724fc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 30 May 2023 12:46:22 +0200 Subject: x86/realmode: Make stack lock work in trampoline_compat() The stack locking and stack assignment macro LOAD_REALMODE_ESP fails to work when invoked from the 64bit trampoline entry point: trampoline_start64 trampoline_compat LOAD_REALMODE_ESP <- lock Accessing tr_lock is only possible from 16bit mode. For the compat entry point this needs to be pa_tr_lock so that the required relocation entry is generated. Otherwise it locks the non-relocated address which is aside of being wrong never cleared in secondary_startup_64() causing all but the first CPU to get stuck on the lock. Make the macro take an argument lock_pa which defaults to 0 and rename it to LOCK_AND_LOAD_REALMODE_ESP to make it clear what this is about. Fixes: f6f1ae9128d2 ("x86/smpboot: Implement a bit spinlock to protect the realmode stack") Reported-by: Kirill A. Shutemov Signed-off-by: Thomas Gleixner Tested-by: Kirill A. Shutemov Link: https://lore.kernel.org/r/87h6rujdvl.ffs@tglx --- arch/x86/realmode/rm/trampoline_64.S | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S index 4822ad2a5e89..c9f76fae902e 100644 --- a/arch/x86/realmode/rm/trampoline_64.S +++ b/arch/x86/realmode/rm/trampoline_64.S @@ -37,12 +37,16 @@ .text .code16 -.macro LOAD_REALMODE_ESP +.macro LOCK_AND_LOAD_REALMODE_ESP lock_pa=0 /* * Make sure only one CPU fiddles with the realmode stack */ .Llock_rm\@: + .if \lock_pa + lock btsl $0, pa_tr_lock + .else lock btsl $0, tr_lock + .endif jnc 2f pause jmp .Llock_rm\@ @@ -63,7 +67,7 @@ SYM_CODE_START(trampoline_start) mov %ax, %es mov %ax, %ss - LOAD_REALMODE_ESP + LOCK_AND_LOAD_REALMODE_ESP call verify_cpu # Verify the cpu supports long mode testl %eax, %eax # Check for return code @@ -106,7 +110,7 @@ SYM_CODE_START(sev_es_trampoline_start) mov %ax, %es mov %ax, %ss - LOAD_REALMODE_ESP + LOCK_AND_LOAD_REALMODE_ESP jmp .Lswitch_to_protected SYM_CODE_END(sev_es_trampoline_start) @@ -189,7 +193,7 @@ SYM_CODE_START(pa_trampoline_compat) * In compatibility mode. Prep ESP and DX for startup_32, then disable * paging and complete the switch to legacy 32-bit mode. */ - LOAD_REALMODE_ESP + LOCK_AND_LOAD_REALMODE_ESP lock_pa=1 movw $__KERNEL_DS, %dx movl $(CR0_STATE & ~X86_CR0_PG), %eax -- cgit From ff3cfcb0d46adc541283a507560f88b7d7114dbe Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 31 May 2023 09:44:26 +0200 Subject: x86/smpboot: Fix the parallel bringup decision The decision to allow parallel bringup of secondary CPUs checks CC_ATTR_GUEST_STATE_ENCRYPT to detect encrypted guests. Those cannot use parallel bootup because accessing the local APIC is intercepted and raises a #VC or #VE, which cannot be handled at that point. The check works correctly, but only for AMD encrypted guests. TDX does not set that flag. As there is no real connection between CC attributes and the inability to support parallel bringup, replace this with a generic control flag in x86_cpuinit and let SEV-ES and TDX init code disable it. Fixes: 0c7ffa32dbd6 ("x86/smpboot/64: Implement arch_cpuhp_init_parallel_bringup() and enable it") Reported-by: Kirill A. Shutemov Signed-off-by: Thomas Gleixner Tested-by: Tom Lendacky Tested-by: Kirill A. Shutemov Link: https://lore.kernel.org/r/87ilc9gd2d.ffs@tglx --- arch/x86/coco/tdx/tdx.c | 11 +++++++++++ arch/x86/include/asm/x86_init.h | 3 +++ arch/x86/kernel/smpboot.c | 19 ++----------------- arch/x86/kernel/x86_init.c | 1 + arch/x86/mm/mem_encrypt_amd.c | 15 +++++++++++++++ 5 files changed, 32 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index e146b599260f..27ce10c9ff61 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -871,5 +871,16 @@ void __init tdx_early_init(void) x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required; x86_platform.guest.enc_status_change_finish = tdx_enc_status_changed; + /* + * TDX intercepts the RDMSR to read the X2APIC ID in the parallel + * bringup low level code. That raises #VE which cannot be handled + * there. + * + * Intel-TDX has a secure RDMSR hypercall, but that needs to be + * implemented seperately in the low level startup ASM code. + * Until that is in place, disable parallel bringup for TDX. + */ + x86_cpuinit.parallel_bringup = false; + pr_info("Guest detected\n"); } diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 88085f369ff6..0bf4d735ff00 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -177,11 +177,14 @@ struct x86_init_ops { * struct x86_cpuinit_ops - platform specific cpu hotplug setups * @setup_percpu_clockev: set up the per cpu clock event device * @early_percpu_clock_init: early init of the per cpu clock event device + * @fixup_cpu_id: fixup function for cpuinfo_x86::phys_proc_id + * @parallel_bringup: Parallel bringup control */ struct x86_cpuinit_ops { void (*setup_percpu_clockev)(void); void (*early_percpu_clock_init)(void); void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node); + bool parallel_bringup; }; struct timespec64; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 660709e94823..aaa876cfd9d9 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1267,23 +1267,8 @@ void __init smp_prepare_cpus_common(void) /* Establish whether parallel bringup can be supported. */ bool __init arch_cpuhp_init_parallel_bringup(void) { - /* - * Encrypted guests require special handling. They enforce X2APIC - * mode but the RDMSR to read the APIC ID is intercepted and raises - * #VC or #VE which cannot be handled in the early startup code. - * - * AMD-SEV does not provide a RDMSR GHCB protocol so the early - * startup code cannot directly communicate with the secure - * firmware. The alternative solution to retrieve the APIC ID via - * CPUID(0xb), which is covered by the GHCB protocol, is not viable - * either because there is no enforcement of the CPUID(0xb) - * provided "initial" APIC ID to be the same as the real APIC ID. - * - * Intel-TDX has a secure RDMSR hypercall, but that needs to be - * implemented seperately in the low level startup ASM code. - */ - if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) { - pr_info("Parallel CPU startup disabled due to guest state encryption\n"); + if (!x86_cpuinit.parallel_bringup) { + pr_info("Parallel CPU startup disabled by the platform\n"); return false; } diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index d82f4fa2f1bf..1da4baa34d1b 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -126,6 +126,7 @@ struct x86_init_ops x86_init __initdata = { struct x86_cpuinit_ops x86_cpuinit = { .early_percpu_clock_init = x86_init_noop, .setup_percpu_clockev = setup_secondary_APIC_clock, + .parallel_bringup = true, }; static void default_nmi_init(void) { }; diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index e0b51c09109f..4855e5f92970 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -501,6 +501,21 @@ void __init sme_early_init(void) x86_platform.guest.enc_status_change_finish = amd_enc_status_change_finish; x86_platform.guest.enc_tlb_flush_required = amd_enc_tlb_flush_required; x86_platform.guest.enc_cache_flush_required = amd_enc_cache_flush_required; + + /* + * AMD-SEV-ES intercepts the RDMSR to read the X2APIC ID in the + * parallel bringup low level code. That raises #VC which cannot be + * handled there. + * It does not provide a RDMSR GHCB protocol so the early startup + * code cannot directly communicate with the secure firmware. The + * alternative solution to retrieve the APIC ID via CPUID(0xb), + * which is covered by the GHCB protocol, is not viable either + * because there is no enforcement of the CPUID(0xb) provided + * "initial" APIC ID to be the same as the real APIC ID. + * Disable parallel bootup. + */ + if (sev_status & MSR_AMD64_SEV_ES_ENABLED) + x86_cpuinit.parallel_bringup = false; } void __init mem_encrypt_free_decrypted_mem(void) -- cgit