summaryrefslogtreecommitdiff
path: root/arch/x86/kernel/process.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-03-11 19:53:15 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2024-03-11 19:53:15 -0700
commit685d98211273f60e38a6d361b62d7016c545297e (patch)
tree76c3be7af88578437c72325c322a52a12be3d05d /arch/x86/kernel/process.c
parentfcc196579aa1fc167d6778948bff69fae6116737 (diff)
parent35ce64922c8263448e58a2b9e8d15a64e11e9b2d (diff)
Merge tag 'x86-core-2024-03-11' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull core x86 updates from Ingo Molnar: - The biggest change is the rework of the percpu code, to support the 'Named Address Spaces' GCC feature, by Uros Bizjak: - This allows C code to access GS and FS segment relative memory via variables declared with such attributes, which allows the compiler to better optimize those accesses than the previous inline assembly code. - The series also includes a number of micro-optimizations for various percpu access methods, plus a number of cleanups of %gs accesses in assembly code. - These changes have been exposed to linux-next testing for the last ~5 months, with no known regressions in this area. - Fix/clean up __switch_to()'s broken but accidentally working handling of FPU switching - which also generates better code - Propagate more RIP-relative addressing in assembly code, to generate slightly better code - Rework the CPU mitigations Kconfig space to be less idiosyncratic, to make it easier for distros to follow & maintain these options - Rework the x86 idle code to cure RCU violations and to clean up the logic - Clean up the vDSO Makefile logic - Misc cleanups and fixes * tag 'x86-core-2024-03-11' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (52 commits) x86/idle: Select idle routine only once x86/idle: Let prefer_mwait_c1_over_halt() return bool x86/idle: Cleanup idle_setup() x86/idle: Clean up idle selection x86/idle: Sanitize X86_BUG_AMD_E400 handling sched/idle: Conditionally handle tick broadcast in default_idle_call() x86: Increase brk randomness entropy for 64-bit systems x86/vdso: Move vDSO to mmap region x86/vdso/kbuild: Group non-standard build attributes and primary object file rules together x86/vdso: Fix rethunk patching for vdso-image-{32,64}.o x86/retpoline: Ensure default return thunk isn't used at runtime x86/vdso: Use CONFIG_COMPAT_32 to specify vdso32 x86/vdso: Use $(addprefix ) instead of $(foreach ) x86/vdso: Simplify obj-y addition x86/vdso: Consolidate targets and clean-files x86/bugs: Rename CONFIG_RETHUNK => CONFIG_MITIGATION_RETHUNK x86/bugs: Rename CONFIG_CPU_SRSO => CONFIG_MITIGATION_SRSO x86/bugs: Rename CONFIG_CPU_IBRS_ENTRY => CONFIG_MITIGATION_IBRS_ENTRY x86/bugs: Rename CONFIG_CPU_UNRET_ENTRY => CONFIG_MITIGATION_UNRET_ENTRY x86/bugs: Rename CONFIG_SLS => CONFIG_MITIGATION_SLS ...
Diffstat (limited to 'arch/x86/kernel/process.c')
-rw-r--r--arch/x86/kernel/process.c99
1 files changed, 36 insertions, 63 deletions
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 6121c2b42ecf..b8441147eb5e 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -846,31 +846,6 @@ void __noreturn stop_this_cpu(void *dummy)
}
/*
- * AMD Erratum 400 aware idle routine. We handle it the same way as C3 power
- * states (local apic timer and TSC stop).
- *
- * XXX this function is completely buggered vs RCU and tracing.
- */
-static void amd_e400_idle(void)
-{
- /*
- * We cannot use static_cpu_has_bug() here because X86_BUG_AMD_APIC_C1E
- * gets set after static_cpu_has() places have been converted via
- * alternatives.
- */
- if (!boot_cpu_has_bug(X86_BUG_AMD_APIC_C1E)) {
- default_idle();
- return;
- }
-
- tick_broadcast_enter();
-
- default_idle();
-
- tick_broadcast_exit();
-}
-
-/*
* Prefer MWAIT over HALT if MWAIT is supported, MWAIT_CPUID leaf
* exists and whenever MONITOR/MWAIT extensions are present there is at
* least one C1 substate.
@@ -878,21 +853,22 @@ static void amd_e400_idle(void)
* Do not prefer MWAIT if MONITOR instruction has a bug or idle=nomwait
* is passed to kernel commandline parameter.
*/
-static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c)
+static __init bool prefer_mwait_c1_over_halt(void)
{
+ const struct cpuinfo_x86 *c = &boot_cpu_data;
u32 eax, ebx, ecx, edx;
- /* User has disallowed the use of MWAIT. Fallback to HALT */
- if (boot_option_idle_override == IDLE_NOMWAIT)
- return 0;
+ /* If override is enforced on the command line, fall back to HALT. */
+ if (boot_option_idle_override != IDLE_NO_OVERRIDE)
+ return false;
/* MWAIT is not supported on this platform. Fallback to HALT */
if (!cpu_has(c, X86_FEATURE_MWAIT))
- return 0;
+ return false;
- /* Monitor has a bug. Fallback to HALT */
- if (boot_cpu_has_bug(X86_BUG_MONITOR))
- return 0;
+ /* Monitor has a bug or APIC stops in C1E. Fallback to HALT */
+ if (boot_cpu_has_bug(X86_BUG_MONITOR) || boot_cpu_has_bug(X86_BUG_AMD_APIC_C1E))
+ return false;
cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
@@ -901,13 +877,13 @@ static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c)
* with EAX=0, ECX=0.
*/
if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED))
- return 1;
+ return true;
/*
* If MWAIT extensions are available, there should be at least one
* MWAIT C1 substate present.
*/
- return (edx & MWAIT_C1_SUBSTATE_MASK);
+ return !!(edx & MWAIT_C1_SUBSTATE_MASK);
}
/*
@@ -933,26 +909,27 @@ static __cpuidle void mwait_idle(void)
__current_clr_polling();
}
-void select_idle_routine(const struct cpuinfo_x86 *c)
+void __init select_idle_routine(void)
{
-#ifdef CONFIG_SMP
- if (boot_option_idle_override == IDLE_POLL && __max_threads_per_core > 1)
- pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n");
-#endif
- if (x86_idle_set() || boot_option_idle_override == IDLE_POLL)
+ if (boot_option_idle_override == IDLE_POLL) {
+ if (IS_ENABLED(CONFIG_SMP) && __max_threads_per_core > 1)
+ pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n");
+ return;
+ }
+
+ /* Required to guard against xen_set_default_idle() */
+ if (x86_idle_set())
return;
- if (boot_cpu_has_bug(X86_BUG_AMD_E400)) {
- pr_info("using AMD E400 aware idle routine\n");
- static_call_update(x86_idle, amd_e400_idle);
- } else if (prefer_mwait_c1_over_halt(c)) {
+ if (prefer_mwait_c1_over_halt()) {
pr_info("using mwait in idle threads\n");
static_call_update(x86_idle, mwait_idle);
} else if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) {
pr_info("using TDX aware idle routine\n");
static_call_update(x86_idle, tdx_safe_halt);
- } else
+ } else {
static_call_update(x86_idle, default_idle);
+ }
}
void amd_e400_c1e_apic_setup(void)
@@ -985,7 +962,10 @@ void __init arch_post_acpi_subsys_init(void)
if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
mark_tsc_unstable("TSC halt in AMD C1E");
- pr_info("System has AMD C1E enabled\n");
+
+ if (IS_ENABLED(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST_IDLE))
+ static_branch_enable(&arch_needs_tick_broadcast);
+ pr_info("System has AMD C1E erratum E400. Workaround enabled.\n");
}
static int __init idle_setup(char *str)
@@ -998,24 +978,14 @@ static int __init idle_setup(char *str)
boot_option_idle_override = IDLE_POLL;
cpu_idle_poll_ctrl(true);
} else if (!strcmp(str, "halt")) {
- /*
- * When the boot option of idle=halt is added, halt is
- * forced to be used for CPU idle. In such case CPU C2/C3
- * won't be used again.
- * To continue to load the CPU idle driver, don't touch
- * the boot_option_idle_override.
- */
- static_call_update(x86_idle, default_idle);
+ /* 'idle=halt' HALT for idle. C-states are disabled. */
boot_option_idle_override = IDLE_HALT;
} else if (!strcmp(str, "nomwait")) {
- /*
- * If the boot option of "idle=nomwait" is added,
- * it means that mwait will be disabled for CPU C1/C2/C3
- * states.
- */
+ /* 'idle=nomwait' disables MWAIT for idle */
boot_option_idle_override = IDLE_NOMWAIT;
- } else
- return -1;
+ } else {
+ return -EINVAL;
+ }
return 0;
}
@@ -1030,7 +1000,10 @@ unsigned long arch_align_stack(unsigned long sp)
unsigned long arch_randomize_brk(struct mm_struct *mm)
{
- return randomize_page(mm->brk, 0x02000000);
+ if (mmap_is_ia32())
+ return randomize_page(mm->brk, SZ_32M);
+
+ return randomize_page(mm->brk, SZ_1G);
}
/*