From cec5f268cd02d25d2d74807843d8ae0292fe0fb7 Mon Sep 17 00:00:00 2001 From: Kyung Min Park Date: Fri, 24 Apr 2020 12:37:56 -0700 Subject: x86/delay: Introduce TPAUSE delay TPAUSE instructs the processor to enter an implementation-dependent optimized state. The instruction execution wakes up when the time-stamp counter reaches or exceeds the implicit EDX:EAX 64-bit input value. The instruction execution also wakes up due to the expiration of the operating system time-limit or by an external interrupt or exceptions such as a debug exception or a machine check exception. TPAUSE offers a choice of two lower power states: 1. Light-weight power/performance optimized state C0.1 2. Improved power/performance optimized state C0.2 This way, it can save power with low wake-up latency in comparison to spinloop based delay. The selection between the two is governed by the input register. TPAUSE is available on processors with X86_FEATURE_WAITPKG. Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Signed-off-by: Kyung Min Park Signed-off-by: Thomas Gleixner Reviewed-by: Tony Luck Link: https://lkml.kernel.org/r/1587757076-30337-4-git-send-email-kyung.min.park@intel.com --- arch/x86/Kconfig.assembler | 4 ++++ arch/x86/include/asm/delay.h | 1 + arch/x86/include/asm/mwait.h | 22 ++++++++++++++++++++++ arch/x86/kernel/time.c | 3 +++ arch/x86/lib/delay.c | 27 +++++++++++++++++++++++++++ 5 files changed, 57 insertions(+) diff --git a/arch/x86/Kconfig.assembler b/arch/x86/Kconfig.assembler index 13de0db38d4e..26b8c08e2fc4 100644 --- a/arch/x86/Kconfig.assembler +++ b/arch/x86/Kconfig.assembler @@ -15,3 +15,7 @@ config AS_SHA256_NI def_bool $(as-instr,sha256msg1 %xmm0$(comma)%xmm1) help Supported by binutils >= 2.24 and LLVM integrated assembler +config AS_TPAUSE + def_bool $(as-instr,tpause %ecx) + help + Supported by binutils >= 2.31.1 and LLVM integrated assembler >= V7 diff --git a/arch/x86/include/asm/delay.h b/arch/x86/include/asm/delay.h index 9aa38de7bd72..630891d25819 100644 --- a/arch/x86/include/asm/delay.h +++ b/arch/x86/include/asm/delay.h @@ -6,6 +6,7 @@ #include void __init use_tsc_delay(void); +void __init use_tpause_delay(void); void use_mwaitx_delay(void); #endif /* _ASM_X86_DELAY_H */ diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index a43b35b35049..73d997aa2966 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -22,6 +22,8 @@ #define MWAITX_ECX_TIMER_ENABLE BIT(1) #define MWAITX_MAX_WAIT_CYCLES UINT_MAX #define MWAITX_DISABLE_CSTATES 0xf0 +#define TPAUSE_C01_STATE 1 +#define TPAUSE_C02_STATE 0 u32 get_umwait_control_msr(void); @@ -122,4 +124,24 @@ static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) current_clr_polling(); } +/* + * Caller can specify whether to enter C0.1 (low latency, less + * power saving) or C0.2 state (saves more power, but longer wakeup + * latency). This may be overridden by the IA32_UMWAIT_CONTROL MSR + * which can force requests for C0.2 to be downgraded to C0.1. + */ +static inline void __tpause(u32 ecx, u32 edx, u32 eax) +{ + /* "tpause %ecx, %edx, %eax;" */ + #ifdef CONFIG_AS_TPAUSE + asm volatile("tpause %%ecx\n" + : + : "c"(ecx), "d"(edx), "a"(eax)); + #else + asm volatile(".byte 0x66, 0x0f, 0xae, 0xf1\t\n" + : + : "c"(ecx), "d"(edx), "a"(eax)); + #endif +} + #endif /* _ASM_X86_MWAIT_H */ diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index 106e7f87f534..371a6b348e44 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -103,6 +103,9 @@ static __init void x86_late_time_init(void) */ x86_init.irqs.intr_mode_init(); tsc_init(); + + if (static_cpu_has(X86_FEATURE_WAITPKG)) + use_tpause_delay(); } /* diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index fe91dc171cf8..65d15df6212d 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c @@ -96,6 +96,27 @@ static void delay_tsc(u64 cycles) preempt_enable(); } +/* + * On Intel the TPAUSE instruction waits until any of: + * 1) the TSC counter exceeds the value provided in EDX:EAX + * 2) global timeout in IA32_UMWAIT_CONTROL is exceeded + * 3) an external interrupt occurs + */ +static void delay_halt_tpause(u64 start, u64 cycles) +{ + u64 until = start + cycles; + u32 eax, edx; + + eax = lower_32_bits(until); + edx = upper_32_bits(until); + + /* + * Hard code the deeper (C0.2) sleep state because exit latency is + * small compared to the "microseconds" that usleep() will delay. + */ + __tpause(TPAUSE_C02_STATE, edx, eax); +} + /* * On some AMD platforms, MWAITX has a configurable 32-bit timer, that * counts with TSC frequency. The input value is the number of TSC cycles @@ -156,6 +177,12 @@ void __init use_tsc_delay(void) delay_fn = delay_tsc; } +void __init use_tpause_delay(void) +{ + delay_halt_fn = delay_halt_tpause; + delay_fn = delay_halt; +} + void use_mwaitx_delay(void) { delay_halt_fn = delay_halt_mwaitx; -- cgit