diff options
Diffstat (limited to 'arch/x86/lib/delay.c')
| -rw-r--r-- | arch/x86/lib/delay.c | 141 |
1 files changed, 116 insertions, 25 deletions
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index 7c3bee636e2f..eb2d2e1cbddd 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Precise Delay Loops for i386 * @@ -11,24 +12,35 @@ * we have to worry about. */ -#include <linux/module.h> +#include <linux/export.h> #include <linux/sched.h> #include <linux/timex.h> #include <linux/preempt.h> #include <linux/delay.h> -#include <linux/init.h> #include <asm/processor.h> #include <asm/delay.h> #include <asm/timer.h> +#include <asm/mwait.h> #ifdef CONFIG_SMP # include <asm/smp.h> #endif +static void delay_loop(u64 __loops); + +/* + * Calibration and selection of the delay mechanism happens only once + * during boot. + */ +static void (*delay_fn)(u64) __ro_after_init = delay_loop; +static void (*delay_halt_fn)(u64 start, u64 cycles) __ro_after_init; + /* simple loop based delay: */ -static void delay_loop(unsigned long loops) +static void delay_loop(u64 __loops) { + unsigned long loops = (unsigned long)__loops; + asm volatile( " test %0,%0 \n" " jz 3f \n" @@ -42,30 +54,28 @@ static void delay_loop(unsigned long loops) " jnz 2b \n" "3: dec %0 \n" - : /* we don't need output */ - :"a" (loops) + : "+a" (loops) + : ); } /* TSC based delay: */ -static void delay_tsc(unsigned long __loops) +static void delay_tsc(u64 cycles) { - u32 bclock, now, loops = __loops; + u64 bclock, now; int cpu; preempt_disable(); cpu = smp_processor_id(); - rdtsc_barrier(); - rdtscl(bclock); + bclock = rdtsc_ordered(); for (;;) { - rdtsc_barrier(); - rdtscl(now); - if ((now - bclock) >= loops) + now = rdtsc_ordered(); + if ((now - bclock) >= cycles) break; /* Allow RT tasks to run */ preempt_enable(); - rep_nop(); + native_pause(); preempt_disable(); /* @@ -78,30 +88,111 @@ static void delay_tsc(unsigned long __loops) * counter for this CPU. */ if (unlikely(cpu != smp_processor_id())) { - loops -= (now - bclock); + cycles -= (now - bclock); cpu = smp_processor_id(); - rdtsc_barrier(); - rdtscl(bclock); + bclock = rdtsc_ordered(); } } preempt_enable(); } /* - * Since we calibrate only once at boot, this - * function should be set once at boot and not changed + * On Intel the TPAUSE instruction waits until any of: + * 1) the TSC counter exceeds the value provided in EDX:EAX + * 2) global timeout in IA32_UMWAIT_CONTROL is exceeded + * 3) an external interrupt occurs + */ +static void delay_halt_tpause(u64 start, u64 cycles) +{ + u64 until = start + cycles; + u32 eax, edx; + + eax = lower_32_bits(until); + edx = upper_32_bits(until); + + /* + * Hard code the deeper (C0.2) sleep state because exit latency is + * small compared to the "microseconds" that usleep() will delay. + */ + __tpause(TPAUSE_C02_STATE, edx, eax); +} + +/* + * On some AMD platforms, MWAITX has a configurable 32-bit timer, that + * counts with TSC frequency. The input value is the number of TSC cycles + * to wait. MWAITX will also exit when the timer expires. + */ +static void delay_halt_mwaitx(u64 unused, u64 cycles) +{ + u64 delay; + + delay = min_t(u64, MWAITX_MAX_WAIT_CYCLES, cycles); + /* + * Use cpu_tss_rw as a cacheline-aligned, seldom accessed per-cpu + * variable as the monitor target. + */ + __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0); + + /* + * AMD, like Intel, supports the EAX hint and EAX=0xf means, do not + * enter any deep C-state and we use it here in delay() to minimize + * wakeup latency. + */ + __mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE); +} + +/* + * Call a vendor specific function to delay for a given amount of time. Because + * these functions may return earlier than requested, check for actual elapsed + * time and call again until done. */ -static void (*delay_fn)(unsigned long) = delay_loop; +static void delay_halt(u64 __cycles) +{ + u64 start, end, cycles = __cycles; + + /* + * Timer value of 0 causes MWAITX to wait indefinitely, unless there + * is a store on the memory monitored by MONITORX. + */ + if (!cycles) + return; + + start = rdtsc_ordered(); + + for (;;) { + delay_halt_fn(start, cycles); + end = rdtsc_ordered(); + + if (cycles <= end - start) + break; + + cycles -= end - start; + start = end; + } +} + +void __init use_tsc_delay(void) +{ + if (delay_fn == delay_loop) + delay_fn = delay_tsc; +} + +void __init use_tpause_delay(void) +{ + delay_halt_fn = delay_halt_tpause; + delay_fn = delay_halt; +} -void use_tsc_delay(void) +void use_mwaitx_delay(void) { - delay_fn = delay_tsc; + delay_halt_fn = delay_halt_mwaitx; + delay_fn = delay_halt; } int read_current_timer(unsigned long *timer_val) { if (delay_fn == delay_tsc) { - rdtscll(*timer_val); + *timer_val = rdtsc(); return 0; } return -1; @@ -113,15 +204,15 @@ void __delay(unsigned long loops) } EXPORT_SYMBOL(__delay); -inline void __const_udelay(unsigned long xloops) +noinline void __const_udelay(unsigned long xloops) { + unsigned long lpj = this_cpu_read(cpu_info.loops_per_jiffy) ? : loops_per_jiffy; int d0; xloops *= 4; asm("mull %%edx" :"=d" (xloops), "=&a" (d0) - :"1" (xloops), "0" - (this_cpu_read(cpu_info.loops_per_jiffy) * (HZ/4))); + :"1" (xloops), "0" (lpj * (HZ / 4))); __delay(++xloops); } |
