From 545ac13892ab391049a92108cf59a0d05de7e28c Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 9 Aug 2013 19:51:49 +0530 Subject: x86, spinlock: Replace pv spinlocks with pv ticketlocks Rather than outright replacing the entire spinlock implementation in order to paravirtualize it, keep the ticket lock implementation but add a couple of pvops hooks on the slow patch (long spin on lock, unlocking a contended lock). Ticket locks have a number of nice properties, but they also have some surprising behaviours in virtual environments. They enforce a strict FIFO ordering on cpus trying to take a lock; however, if the hypervisor scheduler does not schedule the cpus in the correct order, the system can waste a huge amount of time spinning until the next cpu can take the lock. (See Thomas Friebel's talk "Prevent Guests from Spinning Around" http://www.xen.org/files/xensummitboston08/LHP.pdf for more details.) To address this, we add two hooks: - __ticket_spin_lock which is called after the cpu has been spinning on the lock for a significant number of iterations but has failed to take the lock (presumably because the cpu holding the lock has been descheduled). The lock_spinning pvop is expected to block the cpu until it has been kicked by the current lock holder. - __ticket_spin_unlock, which on releasing a contended lock (there are more cpus with tail tickets), it looks to see if the next cpu is blocked and wakes it if so. When compiled with CONFIG_PARAVIRT_SPINLOCKS disabled, a set of stub functions causes all the extra code to go away. Results: ======= setup: 32 core machine with 32 vcpu KVM guest (HT off) with 8GB RAM base = 3.11-rc patched = base + pvspinlock V12 +-----------------+----------------+--------+ dbench (Throughput in MB/sec. Higher is better) +-----------------+----------------+--------+ | base (stdev %)|patched(stdev%) | %gain | +-----------------+----------------+--------+ | 15035.3 (0.3) |15150.0 (0.6) | 0.8 | | 1470.0 (2.2) | 1713.7 (1.9) | 16.6 | | 848.6 (4.3) | 967.8 (4.3) | 14.0 | | 652.9 (3.5) | 685.3 (3.7) | 5.0 | +-----------------+----------------+--------+ pvspinlock shows benefits for overcommit ratio > 1 for PLE enabled cases, and undercommits results are flat Signed-off-by: Jeremy Fitzhardinge Link: http://lkml.kernel.org/r/1376058122-8248-2-git-send-email-raghavendra.kt@linux.vnet.ibm.com Reviewed-by: Konrad Rzeszutek Wilk Tested-by: Attilio Rao [ Raghavendra: Changed SPIN_THRESHOLD, fixed redefinition of arch_spinlock_t] Signed-off-by: Raghavendra K T Acked-by: Ingo Molnar Signed-off-by: H. Peter Anvin --- arch/x86/xen/spinlock.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index cf3caee356b3..d50962936af4 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -139,6 +139,9 @@ struct xen_spinlock { xen_spinners_t spinners; /* count of waiting cpus */ }; +static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; + +#if 0 static int xen_spin_is_locked(struct arch_spinlock *lock) { struct xen_spinlock *xl = (struct xen_spinlock *)lock; @@ -167,7 +170,6 @@ static int xen_spin_trylock(struct arch_spinlock *lock) } static DEFINE_PER_CPU(char *, irq_name); -static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners); /* @@ -354,6 +356,7 @@ static void xen_spin_unlock(struct arch_spinlock *lock) if (unlikely(xl->spinners)) xen_spin_unlock_slow(xl); } +#endif static irqreturn_t dummy_handler(int irq, void *dev_id) { @@ -418,13 +421,14 @@ void __init xen_init_spinlocks(void) return; BUILD_BUG_ON(sizeof(struct xen_spinlock) > sizeof(arch_spinlock_t)); - +#if 0 pv_lock_ops.spin_is_locked = xen_spin_is_locked; pv_lock_ops.spin_is_contended = xen_spin_is_contended; pv_lock_ops.spin_lock = xen_spin_lock; pv_lock_ops.spin_lock_flags = xen_spin_lock_flags; pv_lock_ops.spin_trylock = xen_spin_trylock; pv_lock_ops.spin_unlock = xen_spin_unlock; +#endif } #ifdef CONFIG_XEN_DEBUG_FS -- cgit From bf7aab3ad4b4364a293421d628a912a2153ee1ee Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 9 Aug 2013 19:51:52 +0530 Subject: xen: Defer spinlock setup until boot CPU setup There's no need to do it at very early init, and doing it there makes it impossible to use the jump_label machinery. Signed-off-by: Jeremy Fitzhardinge Link: http://lkml.kernel.org/r/1376058122-8248-5-git-send-email-raghavendra.kt@linux.vnet.ibm.com Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Raghavendra K T Acked-by: Ingo Molnar Signed-off-by: H. Peter Anvin --- arch/x86/xen/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index ca92754eb846..3b52d8075e47 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -279,6 +279,7 @@ static void __init xen_smp_prepare_boot_cpu(void) xen_filter_cpu_maps(); xen_setup_vcpu_info_placement(); + xen_init_spinlocks(); } static void __init xen_smp_prepare_cpus(unsigned int max_cpus) @@ -680,7 +681,6 @@ void __init xen_smp_init(void) { smp_ops = xen_smp_ops; xen_fill_possible_map(); - xen_init_spinlocks(); } static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) -- cgit From 80bd58fef495d000a02fc5b55ca76d423400e748 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 9 Aug 2013 19:51:53 +0530 Subject: xen, pvticketlock: Xen implementation for PV ticket locks Replace the old Xen implementation of PV spinlocks with and implementation of xen_lock_spinning and xen_unlock_kick. xen_lock_spinning simply registers the cpu in its entry in lock_waiting, adds itself to the waiting_cpus set, and blocks on an event channel until the channel becomes pending. xen_unlock_kick searches the cpus in waiting_cpus looking for the one which next wants this lock with the next ticket, if any. If found, it kicks it by making its event channel pending, which wakes it up. We need to make sure interrupts are disabled while we're relying on the contents of the per-cpu lock_waiting values, otherwise an interrupt handler could come in, try to take some other lock, block, and overwrite our values. Signed-off-by: Jeremy Fitzhardinge Link: http://lkml.kernel.org/r/1376058122-8248-6-git-send-email-raghavendra.kt@linux.vnet.ibm.com Reviewed-by: Konrad Rzeszutek Wilk [ Raghavendra: use function + enum instead of macro, cmpxchg for zero status reset Reintroduce break since we know the exact vCPU to send IPI as suggested by Konrad.] Signed-off-by: Raghavendra K T Acked-by: Ingo Molnar Signed-off-by: H. Peter Anvin --- arch/x86/xen/spinlock.c | 348 +++++++++++------------------------------------- 1 file changed, 79 insertions(+), 269 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index d50962936af4..a458729be25f 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -17,45 +17,44 @@ #include "xen-ops.h" #include "debugfs.h" -#ifdef CONFIG_XEN_DEBUG_FS -static struct xen_spinlock_stats -{ - u64 taken; - u32 taken_slow; - u32 taken_slow_nested; - u32 taken_slow_pickup; - u32 taken_slow_spurious; - u32 taken_slow_irqenable; +enum xen_contention_stat { + TAKEN_SLOW, + TAKEN_SLOW_PICKUP, + TAKEN_SLOW_SPURIOUS, + RELEASED_SLOW, + RELEASED_SLOW_KICKED, + NR_CONTENTION_STATS +}; - u64 released; - u32 released_slow; - u32 released_slow_kicked; +#ifdef CONFIG_XEN_DEBUG_FS #define HISTO_BUCKETS 30 - u32 histo_spin_total[HISTO_BUCKETS+1]; - u32 histo_spin_spinning[HISTO_BUCKETS+1]; +static struct xen_spinlock_stats +{ + u32 contention_stats[NR_CONTENTION_STATS]; u32 histo_spin_blocked[HISTO_BUCKETS+1]; - - u64 time_total; - u64 time_spinning; u64 time_blocked; } spinlock_stats; static u8 zero_stats; -static unsigned lock_timeout = 1 << 10; -#define TIMEOUT lock_timeout - static inline void check_zero(void) { - if (unlikely(zero_stats)) { - memset(&spinlock_stats, 0, sizeof(spinlock_stats)); - zero_stats = 0; + u8 ret; + u8 old = ACCESS_ONCE(zero_stats); + if (unlikely(old)) { + ret = cmpxchg(&zero_stats, old, 0); + /* This ensures only one fellow resets the stat */ + if (ret == old) + memset(&spinlock_stats, 0, sizeof(spinlock_stats)); } } -#define ADD_STATS(elem, val) \ - do { check_zero(); spinlock_stats.elem += (val); } while(0) +static inline void add_stats(enum xen_contention_stat var, u32 val) +{ + check_zero(); + spinlock_stats.contention_stats[var] += val; +} static inline u64 spin_time_start(void) { @@ -74,22 +73,6 @@ static void __spin_time_accum(u64 delta, u32 *array) array[HISTO_BUCKETS]++; } -static inline void spin_time_accum_spinning(u64 start) -{ - u32 delta = xen_clocksource_read() - start; - - __spin_time_accum(delta, spinlock_stats.histo_spin_spinning); - spinlock_stats.time_spinning += delta; -} - -static inline void spin_time_accum_total(u64 start) -{ - u32 delta = xen_clocksource_read() - start; - - __spin_time_accum(delta, spinlock_stats.histo_spin_total); - spinlock_stats.time_total += delta; -} - static inline void spin_time_accum_blocked(u64 start) { u32 delta = xen_clocksource_read() - start; @@ -99,19 +82,15 @@ static inline void spin_time_accum_blocked(u64 start) } #else /* !CONFIG_XEN_DEBUG_FS */ #define TIMEOUT (1 << 10) -#define ADD_STATS(elem, val) do { (void)(val); } while(0) +static inline void add_stats(enum xen_contention_stat var, u32 val) +{ +} static inline u64 spin_time_start(void) { return 0; } -static inline void spin_time_accum_total(u64 start) -{ -} -static inline void spin_time_accum_spinning(u64 start) -{ -} static inline void spin_time_accum_blocked(u64 start) { } @@ -134,230 +113,84 @@ typedef u16 xen_spinners_t; asm(LOCK_PREFIX " decw %0" : "+m" ((xl)->spinners) : : "memory"); #endif -struct xen_spinlock { - unsigned char lock; /* 0 -> free; 1 -> locked */ - xen_spinners_t spinners; /* count of waiting cpus */ +struct xen_lock_waiting { + struct arch_spinlock *lock; + __ticket_t want; }; static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; - -#if 0 -static int xen_spin_is_locked(struct arch_spinlock *lock) -{ - struct xen_spinlock *xl = (struct xen_spinlock *)lock; - - return xl->lock != 0; -} - -static int xen_spin_is_contended(struct arch_spinlock *lock) -{ - struct xen_spinlock *xl = (struct xen_spinlock *)lock; - - /* Not strictly true; this is only the count of contended - lock-takers entering the slow path. */ - return xl->spinners != 0; -} - -static int xen_spin_trylock(struct arch_spinlock *lock) -{ - struct xen_spinlock *xl = (struct xen_spinlock *)lock; - u8 old = 1; - - asm("xchgb %b0,%1" - : "+q" (old), "+m" (xl->lock) : : "memory"); - - return old == 0; -} - static DEFINE_PER_CPU(char *, irq_name); -static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners); +static DEFINE_PER_CPU(struct xen_lock_waiting, lock_waiting); +static cpumask_t waiting_cpus; -/* - * Mark a cpu as interested in a lock. Returns the CPU's previous - * lock of interest, in case we got preempted by an interrupt. - */ -static inline struct xen_spinlock *spinning_lock(struct xen_spinlock *xl) +static void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want) { - struct xen_spinlock *prev; - - prev = __this_cpu_read(lock_spinners); - __this_cpu_write(lock_spinners, xl); - - wmb(); /* set lock of interest before count */ - - inc_spinners(xl); - - return prev; -} - -/* - * Mark a cpu as no longer interested in a lock. Restores previous - * lock of interest (NULL for none). - */ -static inline void unspinning_lock(struct xen_spinlock *xl, struct xen_spinlock *prev) -{ - dec_spinners(xl); - wmb(); /* decrement count before restoring lock */ - __this_cpu_write(lock_spinners, prev); -} - -static noinline int xen_spin_lock_slow(struct arch_spinlock *lock, bool irq_enable) -{ - struct xen_spinlock *xl = (struct xen_spinlock *)lock; - struct xen_spinlock *prev; int irq = __this_cpu_read(lock_kicker_irq); - int ret; + struct xen_lock_waiting *w = &__get_cpu_var(lock_waiting); + int cpu = smp_processor_id(); u64 start; + unsigned long flags; /* If kicker interrupts not initialized yet, just spin */ if (irq == -1) - return 0; + return; start = spin_time_start(); - /* announce we're spinning */ - prev = spinning_lock(xl); - - ADD_STATS(taken_slow, 1); - ADD_STATS(taken_slow_nested, prev != NULL); - - do { - unsigned long flags; - - /* clear pending */ - xen_clear_irq_pending(irq); - - /* check again make sure it didn't become free while - we weren't looking */ - ret = xen_spin_trylock(lock); - if (ret) { - ADD_STATS(taken_slow_pickup, 1); - - /* - * If we interrupted another spinlock while it - * was blocking, make sure it doesn't block - * without rechecking the lock. - */ - if (prev != NULL) - xen_set_irq_pending(irq); - goto out; - } + /* + * Make sure an interrupt handler can't upset things in a + * partially setup state. + */ + local_irq_save(flags); - flags = arch_local_save_flags(); - if (irq_enable) { - ADD_STATS(taken_slow_irqenable, 1); - raw_local_irq_enable(); - } + w->want = want; + smp_wmb(); + w->lock = lock; - /* - * Block until irq becomes pending. If we're - * interrupted at this point (after the trylock but - * before entering the block), then the nested lock - * handler guarantees that the irq will be left - * pending if there's any chance the lock became free; - * xen_poll_irq() returns immediately if the irq is - * pending. - */ - xen_poll_irq(irq); + /* This uses set_bit, which atomic and therefore a barrier */ + cpumask_set_cpu(cpu, &waiting_cpus); + add_stats(TAKEN_SLOW, 1); - raw_local_irq_restore(flags); + /* clear pending */ + xen_clear_irq_pending(irq); - ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq)); - } while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */ + /* Only check lock once pending cleared */ + barrier(); + /* check again make sure it didn't become free while + we weren't looking */ + if (ACCESS_ONCE(lock->tickets.head) == want) { + add_stats(TAKEN_SLOW_PICKUP, 1); + goto out; + } + /* Block until irq becomes pending (or perhaps a spurious wakeup) */ + xen_poll_irq(irq); + add_stats(TAKEN_SLOW_SPURIOUS, !xen_test_irq_pending(irq)); kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); - out: - unspinning_lock(xl, prev); + cpumask_clear_cpu(cpu, &waiting_cpus); + w->lock = NULL; + local_irq_restore(flags); spin_time_accum_blocked(start); - - return ret; } -static inline void __xen_spin_lock(struct arch_spinlock *lock, bool irq_enable) -{ - struct xen_spinlock *xl = (struct xen_spinlock *)lock; - unsigned timeout; - u8 oldval; - u64 start_spin; - - ADD_STATS(taken, 1); - - start_spin = spin_time_start(); - - do { - u64 start_spin_fast = spin_time_start(); - - timeout = TIMEOUT; - - asm("1: xchgb %1,%0\n" - " testb %1,%1\n" - " jz 3f\n" - "2: rep;nop\n" - " cmpb $0,%0\n" - " je 1b\n" - " dec %2\n" - " jnz 2b\n" - "3:\n" - : "+m" (xl->lock), "=q" (oldval), "+r" (timeout) - : "1" (1) - : "memory"); - - spin_time_accum_spinning(start_spin_fast); - - } while (unlikely(oldval != 0 && - (TIMEOUT == ~0 || !xen_spin_lock_slow(lock, irq_enable)))); - - spin_time_accum_total(start_spin); -} - -static void xen_spin_lock(struct arch_spinlock *lock) -{ - __xen_spin_lock(lock, false); -} - -static void xen_spin_lock_flags(struct arch_spinlock *lock, unsigned long flags) -{ - __xen_spin_lock(lock, !raw_irqs_disabled_flags(flags)); -} - -static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl) +static void xen_unlock_kick(struct arch_spinlock *lock, __ticket_t next) { int cpu; - ADD_STATS(released_slow, 1); + add_stats(RELEASED_SLOW, 1); + + for_each_cpu(cpu, &waiting_cpus) { + const struct xen_lock_waiting *w = &per_cpu(lock_waiting, cpu); - for_each_online_cpu(cpu) { - /* XXX should mix up next cpu selection */ - if (per_cpu(lock_spinners, cpu) == xl) { - ADD_STATS(released_slow_kicked, 1); + if (w->lock == lock && w->want == next) { + add_stats(RELEASED_SLOW_KICKED, 1); xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); + break; } } } -static void xen_spin_unlock(struct arch_spinlock *lock) -{ - struct xen_spinlock *xl = (struct xen_spinlock *)lock; - - ADD_STATS(released, 1); - - smp_wmb(); /* make sure no writes get moved after unlock */ - xl->lock = 0; /* release lock */ - - /* - * Make sure unlock happens before checking for waiting - * spinners. We need a strong barrier to enforce the - * write-read ordering to different memory locations, as the - * CPU makes no implied guarantees about their ordering. - */ - mb(); - - if (unlikely(xl->spinners)) - xen_spin_unlock_slow(xl); -} -#endif - static irqreturn_t dummy_handler(int irq, void *dev_id) { BUG(); @@ -420,15 +253,8 @@ void __init xen_init_spinlocks(void) if (xen_hvm_domain()) return; - BUILD_BUG_ON(sizeof(struct xen_spinlock) > sizeof(arch_spinlock_t)); -#if 0 - pv_lock_ops.spin_is_locked = xen_spin_is_locked; - pv_lock_ops.spin_is_contended = xen_spin_is_contended; - pv_lock_ops.spin_lock = xen_spin_lock; - pv_lock_ops.spin_lock_flags = xen_spin_lock_flags; - pv_lock_ops.spin_trylock = xen_spin_trylock; - pv_lock_ops.spin_unlock = xen_spin_unlock; -#endif + pv_lock_ops.lock_spinning = xen_lock_spinning; + pv_lock_ops.unlock_kick = xen_unlock_kick; } #ifdef CONFIG_XEN_DEBUG_FS @@ -446,37 +272,21 @@ static int __init xen_spinlock_debugfs(void) debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats); - debugfs_create_u32("timeout", 0644, d_spin_debug, &lock_timeout); - - debugfs_create_u64("taken", 0444, d_spin_debug, &spinlock_stats.taken); debugfs_create_u32("taken_slow", 0444, d_spin_debug, - &spinlock_stats.taken_slow); - debugfs_create_u32("taken_slow_nested", 0444, d_spin_debug, - &spinlock_stats.taken_slow_nested); + &spinlock_stats.contention_stats[TAKEN_SLOW]); debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug, - &spinlock_stats.taken_slow_pickup); + &spinlock_stats.contention_stats[TAKEN_SLOW_PICKUP]); debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug, - &spinlock_stats.taken_slow_spurious); - debugfs_create_u32("taken_slow_irqenable", 0444, d_spin_debug, - &spinlock_stats.taken_slow_irqenable); + &spinlock_stats.contention_stats[TAKEN_SLOW_SPURIOUS]); - debugfs_create_u64("released", 0444, d_spin_debug, &spinlock_stats.released); debugfs_create_u32("released_slow", 0444, d_spin_debug, - &spinlock_stats.released_slow); + &spinlock_stats.contention_stats[RELEASED_SLOW]); debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug, - &spinlock_stats.released_slow_kicked); + &spinlock_stats.contention_stats[RELEASED_SLOW_KICKED]); - debugfs_create_u64("time_spinning", 0444, d_spin_debug, - &spinlock_stats.time_spinning); debugfs_create_u64("time_blocked", 0444, d_spin_debug, &spinlock_stats.time_blocked); - debugfs_create_u64("time_total", 0444, d_spin_debug, - &spinlock_stats.time_total); - debugfs_create_u32_array("histo_total", 0444, d_spin_debug, - spinlock_stats.histo_spin_total, HISTO_BUCKETS + 1); - debugfs_create_u32_array("histo_spinning", 0444, d_spin_debug, - spinlock_stats.histo_spin_spinning, HISTO_BUCKETS + 1); debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug, spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1); -- cgit From b8fa70b51aa76737bdb6b493901ef7376977489c Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 9 Aug 2013 19:51:54 +0530 Subject: xen, pvticketlocks: Add xen_nopvspin parameter to disable xen pv ticketlocks Signed-off-by: Jeremy Fitzhardinge Link: http://lkml.kernel.org/r/1376058122-8248-7-git-send-email-raghavendra.kt@linux.vnet.ibm.com Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Raghavendra K T Acked-by: Ingo Molnar Signed-off-by: H. Peter Anvin --- arch/x86/xen/spinlock.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index a458729be25f..669a971c78fd 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -244,6 +244,8 @@ void xen_uninit_lock_cpu(int cpu) per_cpu(irq_name, cpu) = NULL; } +static bool xen_pvspin __initdata = true; + void __init xen_init_spinlocks(void) { /* @@ -253,10 +255,22 @@ void __init xen_init_spinlocks(void) if (xen_hvm_domain()) return; + if (!xen_pvspin) { + printk(KERN_DEBUG "xen: PV spinlocks disabled\n"); + return; + } + pv_lock_ops.lock_spinning = xen_lock_spinning; pv_lock_ops.unlock_kick = xen_unlock_kick; } +static __init int xen_parse_nopvspin(char *arg) +{ + xen_pvspin = false; + return 0; +} +early_param("xen_nopvspin", xen_parse_nopvspin); + #ifdef CONFIG_XEN_DEBUG_FS static struct dentry *d_spin_debug; -- cgit From 354714dd2607778692db53947ab93b74956494e5 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 9 Aug 2013 19:51:55 +0530 Subject: x86, pvticketlock: Use callee-save for lock_spinning Although the lock_spinning calls in the spinlock code are on the uncommon path, their presence can cause the compiler to generate many more register save/restores in the function pre/postamble, which is in the fast path. To avoid this, convert it to using the pvops callee-save calling convention, which defers all the save/restores until the actual function is called, keeping the fastpath clean. Signed-off-by: Jeremy Fitzhardinge Link: http://lkml.kernel.org/r/1376058122-8248-8-git-send-email-raghavendra.kt@linux.vnet.ibm.com Reviewed-by: Konrad Rzeszutek Wilk Tested-by: Attilio Rao Signed-off-by: Raghavendra K T Acked-by: Ingo Molnar Signed-off-by: H. Peter Anvin --- arch/x86/xen/spinlock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 669a971c78fd..6c8792b298ed 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -173,6 +173,7 @@ out: local_irq_restore(flags); spin_time_accum_blocked(start); } +PV_CALLEE_SAVE_REGS_THUNK(xen_lock_spinning); static void xen_unlock_kick(struct arch_spinlock *lock, __ticket_t next) { @@ -260,7 +261,7 @@ void __init xen_init_spinlocks(void) return; } - pv_lock_ops.lock_spinning = xen_lock_spinning; + pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning); pv_lock_ops.unlock_kick = xen_unlock_kick; } -- cgit From 96f853eaa889c7a22718d275b0df7bebdbd6780e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 9 Aug 2013 19:51:58 +0530 Subject: x86, ticketlock: Add slowpath logic Maintain a flag in the LSB of the ticket lock tail which indicates whether anyone is in the lock slowpath and may need kicking when the current holder unlocks. The flags are set when the first locker enters the slowpath, and cleared when unlocking to an empty queue (ie, no contention). In the specific implementation of lock_spinning(), make sure to set the slowpath flags on the lock just before blocking. We must do this before the last-chance pickup test to prevent a deadlock with the unlocker: Unlocker Locker test for lock pickup -> fail unlock test slowpath -> false set slowpath flags block Whereas this works in any ordering: Unlocker Locker set slowpath flags test for lock pickup -> fail block unlock test slowpath -> true, kick If the unlocker finds that the lock has the slowpath flag set but it is actually uncontended (ie, head == tail, so nobody is waiting), then it clears the slowpath flag. The unlock code uses a locked add to update the head counter. This also acts as a full memory barrier so that its safe to subsequently read back the slowflag state, knowing that the updated lock is visible to the other CPUs. If it were an unlocked add, then the flag read may just be forwarded from the store buffer before it was visible to the other CPUs, which could result in a deadlock. Unfortunately this means we need to do a locked instruction when unlocking with PV ticketlocks. However, if PV ticketlocks are not enabled, then the old non-locked "add" is the only unlocking code. Note: this code relies on gcc making sure that unlikely() code is out of line of the fastpath, which only happens when OPTIMIZE_SIZE=n. If it doesn't the generated code isn't too bad, but its definitely suboptimal. Thanks to Srivatsa Vaddagiri for providing a bugfix to the original version of this change, which has been folded in. Thanks to Stephan Diestelhorst for commenting on some code which relied on an inaccurate reading of the x86 memory ordering rules. Signed-off-by: Jeremy Fitzhardinge Link: http://lkml.kernel.org/r/1376058122-8248-11-git-send-email-raghavendra.kt@linux.vnet.ibm.com Signed-off-by: Srivatsa Vaddagiri Reviewed-by: Konrad Rzeszutek Wilk Cc: Stephan Diestelhorst Signed-off-by: Raghavendra K T Acked-by: Ingo Molnar Signed-off-by: H. Peter Anvin --- arch/x86/xen/spinlock.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 6c8792b298ed..546112ed463f 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -157,6 +157,10 @@ static void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want) /* Only check lock once pending cleared */ barrier(); + /* Mark entry to slowpath before doing the pickup test to make + sure we don't deadlock with an unlocker. */ + __ticket_enter_slowpath(lock); + /* check again make sure it didn't become free while we weren't looking */ if (ACCESS_ONCE(lock->tickets.head) == want) { @@ -261,6 +265,8 @@ void __init xen_init_spinlocks(void) return; } + static_key_slow_inc(¶virt_ticketlocks_enabled); + pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning); pv_lock_ops.unlock_kick = xen_unlock_kick; } -- cgit From 1ed7bf5f5227169b661c619636f754b98001ec30 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 9 Aug 2013 19:51:59 +0530 Subject: xen, pvticketlock: Allow interrupts to be enabled while blocking If interrupts were enabled when taking the spinlock, we can leave them enabled while blocking to get the lock. If we can enable interrupts while waiting for the lock to become available, and we take an interrupt before entering the poll, and the handler takes a spinlock which ends up going into the slow state (invalidating the per-cpu "lock" and "want" values), then when the interrupt handler returns the event channel will remain pending so the poll will return immediately, causing it to return out to the main spinlock loop. Signed-off-by: Jeremy Fitzhardinge Link: http://lkml.kernel.org/r/1376058122-8248-12-git-send-email-raghavendra.kt@linux.vnet.ibm.com Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Raghavendra K T Acked-by: Ingo Molnar Signed-off-by: H. Peter Anvin --- arch/x86/xen/spinlock.c | 46 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 40 insertions(+), 6 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 546112ed463f..0438b9324a72 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -142,7 +142,20 @@ static void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want) * partially setup state. */ local_irq_save(flags); - + /* + * We don't really care if we're overwriting some other + * (lock,want) pair, as that would mean that we're currently + * in an interrupt context, and the outer context had + * interrupts enabled. That has already kicked the VCPU out + * of xen_poll_irq(), so it will just return spuriously and + * retry with newly setup (lock,want). + * + * The ordering protocol on this is that the "lock" pointer + * may only be set non-NULL if the "want" ticket is correct. + * If we're updating "want", we must first clear "lock". + */ + w->lock = NULL; + smp_wmb(); w->want = want; smp_wmb(); w->lock = lock; @@ -157,24 +170,43 @@ static void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want) /* Only check lock once pending cleared */ barrier(); - /* Mark entry to slowpath before doing the pickup test to make - sure we don't deadlock with an unlocker. */ + /* + * Mark entry to slowpath before doing the pickup test to make + * sure we don't deadlock with an unlocker. + */ __ticket_enter_slowpath(lock); - /* check again make sure it didn't become free while - we weren't looking */ + /* + * check again make sure it didn't become free while + * we weren't looking + */ if (ACCESS_ONCE(lock->tickets.head) == want) { add_stats(TAKEN_SLOW_PICKUP, 1); goto out; } + + /* Allow interrupts while blocked */ + local_irq_restore(flags); + + /* + * If an interrupt happens here, it will leave the wakeup irq + * pending, which will cause xen_poll_irq() to return + * immediately. + */ + /* Block until irq becomes pending (or perhaps a spurious wakeup) */ xen_poll_irq(irq); add_stats(TAKEN_SLOW_SPURIOUS, !xen_test_irq_pending(irq)); + + local_irq_save(flags); + kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); out: cpumask_clear_cpu(cpu, &waiting_cpus); w->lock = NULL; + local_irq_restore(flags); + spin_time_accum_blocked(start); } PV_CALLEE_SAVE_REGS_THUNK(xen_lock_spinning); @@ -188,7 +220,9 @@ static void xen_unlock_kick(struct arch_spinlock *lock, __ticket_t next) for_each_cpu(cpu, &waiting_cpus) { const struct xen_lock_waiting *w = &per_cpu(lock_waiting, cpu); - if (w->lock == lock && w->want == next) { + /* Make sure we read lock before want */ + if (ACCESS_ONCE(w->lock) == lock && + ACCESS_ONCE(w->want) == next) { add_stats(RELEASED_SLOW_KICKED, 1); xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); break; -- cgit