diff options
Diffstat (limited to 'arch/x86/include/asm/mwait.h')
| -rw-r--r-- | arch/x86/include/asm/mwait.h | 105 |
1 files changed, 70 insertions, 35 deletions
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index bda3c27f0da0..e4815e15dc9a 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_MWAIT_H #define _ASM_X86_MWAIT_H @@ -5,43 +6,46 @@ #include <linux/sched/idle.h> #include <asm/cpufeature.h> +#include <asm/nospec-branch.h> #define MWAIT_SUBSTATE_MASK 0xf #define MWAIT_CSTATE_MASK 0xf #define MWAIT_SUBSTATE_SIZE 4 #define MWAIT_HINT2CSTATE(hint) (((hint) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) #define MWAIT_HINT2SUBSTATE(hint) ((hint) & MWAIT_CSTATE_MASK) +#define MWAIT_C1_SUBSTATE_MASK 0xf0 -#define CPUID_MWAIT_LEAF 5 #define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1 #define CPUID5_ECX_INTERRUPT_BREAK 0x2 #define MWAIT_ECX_INTERRUPT_BREAK 0x1 #define MWAITX_ECX_TIMER_ENABLE BIT(1) -#define MWAITX_MAX_LOOPS ((u32)-1) -#define MWAITX_DISABLE_CSTATES 0xf +#define MWAITX_MAX_WAIT_CYCLES UINT_MAX +#define MWAITX_DISABLE_CSTATES 0xf0 +#define TPAUSE_C01_STATE 1 +#define TPAUSE_C02_STATE 0 -static inline void __monitor(const void *eax, unsigned long ecx, - unsigned long edx) +static __always_inline void __monitor(const void *eax, u32 ecx, u32 edx) { - /* "monitor %eax, %ecx, %edx;" */ - asm volatile(".byte 0x0f, 0x01, 0xc8;" - :: "a" (eax), "c" (ecx), "d"(edx)); + /* + * Use the instruction mnemonic with implicit operands, as the LLVM + * assembler fails to assemble the mnemonic with explicit operands: + */ + asm volatile("monitor" :: "a" (eax), "c" (ecx), "d" (edx)); } -static inline void __monitorx(const void *eax, unsigned long ecx, - unsigned long edx) +static __always_inline void __monitorx(const void *eax, u32 ecx, u32 edx) { - /* "monitorx %eax, %ecx, %edx;" */ - asm volatile(".byte 0x0f, 0x01, 0xfa;" - :: "a" (eax), "c" (ecx), "d"(edx)); + asm volatile("monitorx" :: "a" (eax), "c" (ecx), "d"(edx)); } -static inline void __mwait(unsigned long eax, unsigned long ecx) +static __always_inline void __mwait(u32 eax, u32 ecx) { - /* "mwait %eax, %ecx;" */ - asm volatile(".byte 0x0f, 0x01, 0xc9;" - :: "a" (eax), "c" (ecx)); + /* + * Use the instruction mnemonic with implicit operands, as the LLVM + * assembler fails to assemble the mnemonic with explicit operands: + */ + asm volatile("mwait" :: "a" (eax), "c" (ecx)); } /* @@ -70,20 +74,26 @@ static inline void __mwait(unsigned long eax, unsigned long ecx) * EAX (logical) address to monitor * ECX #GP if not zero */ -static inline void __mwaitx(unsigned long eax, unsigned long ebx, - unsigned long ecx) +static __always_inline void __mwaitx(u32 eax, u32 ebx, u32 ecx) { - /* "mwaitx %eax, %ebx, %ecx;" */ - asm volatile(".byte 0x0f, 0x01, 0xfb;" - :: "a" (eax), "b" (ebx), "c" (ecx)); + /* No need for TSA buffer clearing on AMD */ + + asm volatile("mwaitx" :: "a" (eax), "b" (ebx), "c" (ecx)); } -static inline void __sti_mwait(unsigned long eax, unsigned long ecx) +/* + * Re-enable interrupts right upon calling mwait in such a way that + * no interrupt can fire _before_ the execution of mwait, ie: no + * instruction must be placed between "sti" and "mwait". + * + * This is necessary because if an interrupt queues a timer before + * executing mwait, it would otherwise go unnoticed and the next tick + * would not be reprogrammed accordingly before mwait ever wakes up. + */ +static __always_inline void __sti_mwait(u32 eax, u32 ecx) { - trace_hardirqs_on(); - /* "mwait %eax, %ecx;" */ - asm volatile("sti; .byte 0x0f, 0x01, 0xc9;" - :: "a" (eax), "c" (ecx)); + + asm volatile("sti; mwait" :: "a" (eax), "c" (ecx)); } /* @@ -96,20 +106,45 @@ static inline void __sti_mwait(unsigned long eax, unsigned long ecx) * New with Core Duo processors, MWAIT can take some hints based on CPU * capability. */ -static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) +static __always_inline void mwait_idle_with_hints(u32 eax, u32 ecx) { + if (need_resched()) + return; + + x86_idle_clear_cpu_buffers(); + if (static_cpu_has_bug(X86_BUG_MONITOR) || !current_set_polling_and_test()) { - if (static_cpu_has_bug(X86_BUG_CLFLUSH_MONITOR)) { - mb(); - clflush((void *)¤t_thread_info()->flags); - mb(); - } + const void *addr = ¤t_thread_info()->flags; + + alternative_input("", "clflush (%[addr])", X86_BUG_CLFLUSH_MONITOR, [addr] "a" (addr)); + __monitor(addr, 0, 0); - __monitor((void *)¤t_thread_info()->flags, 0, 0); - if (!need_resched()) + if (need_resched()) + goto out; + + if (ecx & 1) { __mwait(eax, ecx); + } else { + __sti_mwait(eax, ecx); + raw_local_irq_disable(); + } } + +out: current_clr_polling(); } +/* + * Caller can specify whether to enter C0.1 (low latency, less + * power saving) or C0.2 state (saves more power, but longer wakeup + * latency). This may be overridden by the IA32_UMWAIT_CONTROL MSR + * which can force requests for C0.2 to be downgraded to C0.1. + */ +static inline void __tpause(u32 ecx, u32 edx, u32 eax) +{ + /* "tpause %ecx" */ + asm volatile(".byte 0x66, 0x0f, 0xae, 0xf1" + :: "c" (ecx), "d" (edx), "a" (eax)); +} + #endif /* _ASM_X86_MWAIT_H */ |
