summaryrefslogtreecommitdiff
path: root/arch/x86/lib/delay.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/lib/delay.c')
-rw-r--r--arch/x86/lib/delay.c141
1 files changed, 116 insertions, 25 deletions
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index 7c3bee636e2f..eb2d2e1cbddd 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Precise Delay Loops for i386
*
@@ -11,24 +12,35 @@
* we have to worry about.
*/
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/sched.h>
#include <linux/timex.h>
#include <linux/preempt.h>
#include <linux/delay.h>
-#include <linux/init.h>
#include <asm/processor.h>
#include <asm/delay.h>
#include <asm/timer.h>
+#include <asm/mwait.h>
#ifdef CONFIG_SMP
# include <asm/smp.h>
#endif
+static void delay_loop(u64 __loops);
+
+/*
+ * Calibration and selection of the delay mechanism happens only once
+ * during boot.
+ */
+static void (*delay_fn)(u64) __ro_after_init = delay_loop;
+static void (*delay_halt_fn)(u64 start, u64 cycles) __ro_after_init;
+
/* simple loop based delay: */
-static void delay_loop(unsigned long loops)
+static void delay_loop(u64 __loops)
{
+ unsigned long loops = (unsigned long)__loops;
+
asm volatile(
" test %0,%0 \n"
" jz 3f \n"
@@ -42,30 +54,28 @@ static void delay_loop(unsigned long loops)
" jnz 2b \n"
"3: dec %0 \n"
- : /* we don't need output */
- :"a" (loops)
+ : "+a" (loops)
+ :
);
}
/* TSC based delay: */
-static void delay_tsc(unsigned long __loops)
+static void delay_tsc(u64 cycles)
{
- u32 bclock, now, loops = __loops;
+ u64 bclock, now;
int cpu;
preempt_disable();
cpu = smp_processor_id();
- rdtsc_barrier();
- rdtscl(bclock);
+ bclock = rdtsc_ordered();
for (;;) {
- rdtsc_barrier();
- rdtscl(now);
- if ((now - bclock) >= loops)
+ now = rdtsc_ordered();
+ if ((now - bclock) >= cycles)
break;
/* Allow RT tasks to run */
preempt_enable();
- rep_nop();
+ native_pause();
preempt_disable();
/*
@@ -78,30 +88,111 @@ static void delay_tsc(unsigned long __loops)
* counter for this CPU.
*/
if (unlikely(cpu != smp_processor_id())) {
- loops -= (now - bclock);
+ cycles -= (now - bclock);
cpu = smp_processor_id();
- rdtsc_barrier();
- rdtscl(bclock);
+ bclock = rdtsc_ordered();
}
}
preempt_enable();
}
/*
- * Since we calibrate only once at boot, this
- * function should be set once at boot and not changed
+ * On Intel the TPAUSE instruction waits until any of:
+ * 1) the TSC counter exceeds the value provided in EDX:EAX
+ * 2) global timeout in IA32_UMWAIT_CONTROL is exceeded
+ * 3) an external interrupt occurs
+ */
+static void delay_halt_tpause(u64 start, u64 cycles)
+{
+ u64 until = start + cycles;
+ u32 eax, edx;
+
+ eax = lower_32_bits(until);
+ edx = upper_32_bits(until);
+
+ /*
+ * Hard code the deeper (C0.2) sleep state because exit latency is
+ * small compared to the "microseconds" that usleep() will delay.
+ */
+ __tpause(TPAUSE_C02_STATE, edx, eax);
+}
+
+/*
+ * On some AMD platforms, MWAITX has a configurable 32-bit timer, that
+ * counts with TSC frequency. The input value is the number of TSC cycles
+ * to wait. MWAITX will also exit when the timer expires.
+ */
+static void delay_halt_mwaitx(u64 unused, u64 cycles)
+{
+ u64 delay;
+
+ delay = min_t(u64, MWAITX_MAX_WAIT_CYCLES, cycles);
+ /*
+ * Use cpu_tss_rw as a cacheline-aligned, seldom accessed per-cpu
+ * variable as the monitor target.
+ */
+ __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);
+
+ /*
+ * AMD, like Intel, supports the EAX hint and EAX=0xf means, do not
+ * enter any deep C-state and we use it here in delay() to minimize
+ * wakeup latency.
+ */
+ __mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE);
+}
+
+/*
+ * Call a vendor specific function to delay for a given amount of time. Because
+ * these functions may return earlier than requested, check for actual elapsed
+ * time and call again until done.
*/
-static void (*delay_fn)(unsigned long) = delay_loop;
+static void delay_halt(u64 __cycles)
+{
+ u64 start, end, cycles = __cycles;
+
+ /*
+ * Timer value of 0 causes MWAITX to wait indefinitely, unless there
+ * is a store on the memory monitored by MONITORX.
+ */
+ if (!cycles)
+ return;
+
+ start = rdtsc_ordered();
+
+ for (;;) {
+ delay_halt_fn(start, cycles);
+ end = rdtsc_ordered();
+
+ if (cycles <= end - start)
+ break;
+
+ cycles -= end - start;
+ start = end;
+ }
+}
+
+void __init use_tsc_delay(void)
+{
+ if (delay_fn == delay_loop)
+ delay_fn = delay_tsc;
+}
+
+void __init use_tpause_delay(void)
+{
+ delay_halt_fn = delay_halt_tpause;
+ delay_fn = delay_halt;
+}
-void use_tsc_delay(void)
+void use_mwaitx_delay(void)
{
- delay_fn = delay_tsc;
+ delay_halt_fn = delay_halt_mwaitx;
+ delay_fn = delay_halt;
}
int read_current_timer(unsigned long *timer_val)
{
if (delay_fn == delay_tsc) {
- rdtscll(*timer_val);
+ *timer_val = rdtsc();
return 0;
}
return -1;
@@ -113,15 +204,15 @@ void __delay(unsigned long loops)
}
EXPORT_SYMBOL(__delay);
-inline void __const_udelay(unsigned long xloops)
+noinline void __const_udelay(unsigned long xloops)
{
+ unsigned long lpj = this_cpu_read(cpu_info.loops_per_jiffy) ? : loops_per_jiffy;
int d0;
xloops *= 4;
asm("mull %%edx"
:"=d" (xloops), "=&a" (d0)
- :"1" (xloops), "0"
- (this_cpu_read(cpu_info.loops_per_jiffy) * (HZ/4)));
+ :"1" (xloops), "0" (lpj * (HZ / 4)));
__delay(++xloops);
}