From 46ad0840b1584b92b5ff2cc3ed0b011dd6b8e0f1 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 22 Mar 2019 10:30:06 -0400 Subject: locking/rwsem: Remove arch specific rwsem files As the generic rwsem-xadd code is using the appropriate acquire and release versions of the atomic operations, the arch specific rwsem.h files will not be that much faster than the generic code as long as the atomic functions are properly implemented. So we can remove those arch specific rwsem.h and stop building asm/rwsem.h to reduce maintenance effort. Currently, only x86, alpha and ia64 have implemented architecture specific fast paths. I don't have access to alpha and ia64 systems for testing, but they are legacy systems that are not likely to be updated to the latest kernel anyway. By using a rwsem microbenchmark, the total locking rates on a 4-socket 56-core 112-thread x86-64 system before and after the patch were as follows (mixed means equal # of read and write locks): Before Patch After Patch # of Threads wlock rlock mixed wlock rlock mixed ------------ ----- ----- ----- ----- ----- ----- 1 29,201 30,143 29,458 28,615 30,172 29,201 2 6,807 13,299 1,171 7,725 15,025 1,804 4 6,504 12,755 1,520 7,127 14,286 1,345 8 6,762 13,412 764 6,826 13,652 726 16 6,693 15,408 662 6,599 15,938 626 32 6,145 15,286 496 5,549 15,487 511 64 5,812 15,495 60 5,858 15,572 60 There were some run-to-run variations for the multi-thread tests. For x86-64, using the generic C code fast path seems to be a little bit faster than the assembly version with low lock contention. Looking at the assembly version of the fast paths, there are assembly to/from C code wrappers that save and restore all the callee-clobbered registers (7 registers on x86-64). The assembly generated from the generic C code doesn't need to do that. That may explain the slight performance gain here. The generic asm rwsem.h can also be merged into kernel/locking/rwsem.h with no code change as no other code other than those under kernel/locking needs to access the internal rwsem macros and functions. Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra (Intel) Acked-by: Linus Torvalds Cc: Andrew Morton Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Davidlohr Bueso Cc: H. Peter Anvin Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tim Chen Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-c6x-dev@linux-c6x.org Cc: linux-m68k@lists.linux-m68k.org Cc: linux-riscv@lists.infradead.org Cc: linux-um@lists.infradead.org Cc: linux-xtensa@linux-xtensa.org Cc: linuxppc-dev@lists.ozlabs.org Cc: nios2-dev@lists.rocketboards.org Cc: openrisc@lists.librecores.org Cc: uclinux-h8-devel@lists.sourceforge.jp Link: https://lkml.kernel.org/r/20190322143008.21313-2-longman@redhat.com Signed-off-by: Ingo Molnar --- kernel/locking/rwsem.h | 130 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) (limited to 'kernel/locking/rwsem.h') diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h index bad2bca0268b..067e265fa5c1 100644 --- a/kernel/locking/rwsem.h +++ b/kernel/locking/rwsem.h @@ -32,6 +32,26 @@ # define DEBUG_RWSEMS_WARN_ON(c) #endif +/* + * R/W semaphores originally for PPC using the stuff in lib/rwsem.c. + * Adapted largely from include/asm-i386/rwsem.h + * by Paul Mackerras . + */ + +/* + * the semaphore definition + */ +#ifdef CONFIG_64BIT +# define RWSEM_ACTIVE_MASK 0xffffffffL +#else +# define RWSEM_ACTIVE_MASK 0x0000ffffL +#endif + +#define RWSEM_ACTIVE_BIAS 0x00000001L +#define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1) +#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS +#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) + #ifdef CONFIG_RWSEM_SPIN_ON_OWNER /* * All writes to owner are protected by WRITE_ONCE() to make sure that @@ -132,3 +152,113 @@ static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem) { } #endif + +#ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM +/* + * lock for reading + */ +static inline void __down_read(struct rw_semaphore *sem) +{ + if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) + rwsem_down_read_failed(sem); +} + +static inline int __down_read_killable(struct rw_semaphore *sem) +{ + if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) { + if (IS_ERR(rwsem_down_read_failed_killable(sem))) + return -EINTR; + } + + return 0; +} + +static inline int __down_read_trylock(struct rw_semaphore *sem) +{ + long tmp; + + while ((tmp = atomic_long_read(&sem->count)) >= 0) { + if (tmp == atomic_long_cmpxchg_acquire(&sem->count, tmp, + tmp + RWSEM_ACTIVE_READ_BIAS)) { + return 1; + } + } + return 0; +} + +/* + * lock for writing + */ +static inline void __down_write(struct rw_semaphore *sem) +{ + long tmp; + + tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS, + &sem->count); + if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS)) + rwsem_down_write_failed(sem); +} + +static inline int __down_write_killable(struct rw_semaphore *sem) +{ + long tmp; + + tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS, + &sem->count); + if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS)) + if (IS_ERR(rwsem_down_write_failed_killable(sem))) + return -EINTR; + return 0; +} + +static inline int __down_write_trylock(struct rw_semaphore *sem) +{ + long tmp; + + tmp = atomic_long_cmpxchg_acquire(&sem->count, RWSEM_UNLOCKED_VALUE, + RWSEM_ACTIVE_WRITE_BIAS); + return tmp == RWSEM_UNLOCKED_VALUE; +} + +/* + * unlock after reading + */ +static inline void __up_read(struct rw_semaphore *sem) +{ + long tmp; + + tmp = atomic_long_dec_return_release(&sem->count); + if (unlikely(tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0)) + rwsem_wake(sem); +} + +/* + * unlock after writing + */ +static inline void __up_write(struct rw_semaphore *sem) +{ + if (unlikely(atomic_long_sub_return_release(RWSEM_ACTIVE_WRITE_BIAS, + &sem->count) < 0)) + rwsem_wake(sem); +} + +/* + * downgrade write lock to read lock + */ +static inline void __downgrade_write(struct rw_semaphore *sem) +{ + long tmp; + + /* + * When downgrading from exclusive to shared ownership, + * anything inside the write-locked region cannot leak + * into the read side. In contrast, anything in the + * read-locked region is ok to be re-ordered into the + * write side. As such, rely on RELEASE semantics. + */ + tmp = atomic_long_add_return_release(-RWSEM_WAITING_BIAS, &sem->count); + if (tmp < 0) + rwsem_downgrade_wake(sem); +} + +#endif /* CONFIG_RWSEM_XCHGADD_ALGORITHM */ -- cgit From 390a0c62c23cb026cd4664a66f6f45fed3a215f6 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 22 Mar 2019 10:30:07 -0400 Subject: locking/rwsem: Remove rwsem-spinlock.c & use rwsem-xadd.c for all archs Currently, we have two different implementation of rwsem: 1) CONFIG_RWSEM_GENERIC_SPINLOCK (rwsem-spinlock.c) 2) CONFIG_RWSEM_XCHGADD_ALGORITHM (rwsem-xadd.c) As we are going to use a single generic implementation for rwsem-xadd.c and no architecture-specific code will be needed, there is no point in keeping two different implementations of rwsem. In most cases, the performance of rwsem-spinlock.c will be worse. It also doesn't get all the performance tuning and optimizations that had been implemented in rwsem-xadd.c over the years. For simplication, we are going to remove rwsem-spinlock.c and make all architectures use a single implementation of rwsem - rwsem-xadd.c. All references to RWSEM_GENERIC_SPINLOCK and RWSEM_XCHGADD_ALGORITHM in the code are removed. Suggested-by: Peter Zijlstra Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra (Intel) Acked-by: Linus Torvalds Cc: Andrew Morton Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Davidlohr Bueso Cc: H. Peter Anvin Cc: Paul E. McKenney Cc: Thomas Gleixner Cc: Tim Chen Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-c6x-dev@linux-c6x.org Cc: linux-m68k@lists.linux-m68k.org Cc: linux-riscv@lists.infradead.org Cc: linux-um@lists.infradead.org Cc: linux-xtensa@linux-xtensa.org Cc: linuxppc-dev@lists.ozlabs.org Cc: nios2-dev@lists.rocketboards.org Cc: openrisc@lists.librecores.org Cc: uclinux-h8-devel@lists.sourceforge.jp Link: https://lkml.kernel.org/r/20190322143008.21313-3-longman@redhat.com Signed-off-by: Ingo Molnar --- kernel/locking/rwsem.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'kernel/locking/rwsem.h') diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h index 067e265fa5c1..45ee00236e03 100644 --- a/kernel/locking/rwsem.h +++ b/kernel/locking/rwsem.h @@ -153,7 +153,6 @@ static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem) } #endif -#ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM /* * lock for reading */ @@ -260,5 +259,3 @@ static inline void __downgrade_write(struct rw_semaphore *sem) if (tmp < 0) rwsem_downgrade_wake(sem); } - -#endif /* CONFIG_RWSEM_XCHGADD_ALGORITHM */ -- cgit From ddb20d1d3aed8f130519c0a29cd5392efcc067b8 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 22 Mar 2019 10:30:08 -0400 Subject: locking/rwsem: Optimize down_read_trylock() Modify __down_read_trylock() to optimize for an unlocked rwsem and make it generate slightly better code. Before this patch, down_read_trylock: 0x0000000000000000 <+0>: callq 0x5 0x0000000000000005 <+5>: jmp 0x18 0x0000000000000007 <+7>: lea 0x1(%rdx),%rcx 0x000000000000000b <+11>: mov %rdx,%rax 0x000000000000000e <+14>: lock cmpxchg %rcx,(%rdi) 0x0000000000000013 <+19>: cmp %rax,%rdx 0x0000000000000016 <+22>: je 0x23 0x0000000000000018 <+24>: mov (%rdi),%rdx 0x000000000000001b <+27>: test %rdx,%rdx 0x000000000000001e <+30>: jns 0x7 0x0000000000000020 <+32>: xor %eax,%eax 0x0000000000000022 <+34>: retq 0x0000000000000023 <+35>: mov %gs:0x0,%rax 0x000000000000002c <+44>: or $0x3,%rax 0x0000000000000030 <+48>: mov %rax,0x20(%rdi) 0x0000000000000034 <+52>: mov $0x1,%eax 0x0000000000000039 <+57>: retq After patch, down_read_trylock: 0x0000000000000000 <+0>: callq 0x5 0x0000000000000005 <+5>: xor %eax,%eax 0x0000000000000007 <+7>: lea 0x1(%rax),%rdx 0x000000000000000b <+11>: lock cmpxchg %rdx,(%rdi) 0x0000000000000010 <+16>: jne 0x29 0x0000000000000012 <+18>: mov %gs:0x0,%rax 0x000000000000001b <+27>: or $0x3,%rax 0x000000000000001f <+31>: mov %rax,0x20(%rdi) 0x0000000000000023 <+35>: mov $0x1,%eax 0x0000000000000028 <+40>: retq 0x0000000000000029 <+41>: test %rax,%rax 0x000000000000002c <+44>: jns 0x7 0x000000000000002e <+46>: xor %eax,%eax 0x0000000000000030 <+48>: retq By using a rwsem microbenchmark, the down_read_trylock() rate (with a load of 10 to lengthen the lock critical section) on a x86-64 system before and after the patch were: Before Patch After Patch # of Threads rlock rlock ------------ ----- ----- 1 14,496 14,716 2 8,644 8,453 4 6,799 6,983 8 5,664 7,190 On a ARM64 system, the performance results were: Before Patch After Patch # of Threads rlock rlock ------------ ----- ----- 1 23,676 24,488 2 7,697 9,502 4 4,945 3,440 8 2,641 1,603 For the uncontended case (1 thread), the new down_read_trylock() is a little bit faster. For the contended cases, the new down_read_trylock() perform pretty well in x86-64, but performance degrades at high contention level on ARM64. Suggested-by: Linus Torvalds Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra (Intel) Acked-by: Linus Torvalds Cc: Andrew Morton Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Davidlohr Bueso Cc: H. Peter Anvin Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tim Chen Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-c6x-dev@linux-c6x.org Cc: linux-m68k@lists.linux-m68k.org Cc: linux-riscv@lists.infradead.org Cc: linux-um@lists.infradead.org Cc: linux-xtensa@linux-xtensa.org Cc: linuxppc-dev@lists.ozlabs.org Cc: nios2-dev@lists.rocketboards.org Cc: openrisc@lists.librecores.org Cc: uclinux-h8-devel@lists.sourceforge.jp Link: https://lkml.kernel.org/r/20190322143008.21313-4-longman@redhat.com Signed-off-by: Ingo Molnar --- kernel/locking/rwsem.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'kernel/locking/rwsem.h') diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h index 45ee00236e03..1f5775aa6a1d 100644 --- a/kernel/locking/rwsem.h +++ b/kernel/locking/rwsem.h @@ -174,14 +174,17 @@ static inline int __down_read_killable(struct rw_semaphore *sem) static inline int __down_read_trylock(struct rw_semaphore *sem) { - long tmp; + /* + * Optimize for the case when the rwsem is not locked at all. + */ + long tmp = RWSEM_UNLOCKED_VALUE; - while ((tmp = atomic_long_read(&sem->count)) >= 0) { - if (tmp == atomic_long_cmpxchg_acquire(&sem->count, tmp, - tmp + RWSEM_ACTIVE_READ_BIAS)) { + do { + if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp, + tmp + RWSEM_ACTIVE_READ_BIAS)) { return 1; } - } + } while (tmp >= 0); return 0; } -- cgit From c7580c1e84435c9ccc6c612d9fee8e71811f7be6 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 4 Apr 2019 13:43:11 -0400 Subject: locking/rwsem: Move owner setting code from rwsem.c to rwsem.h Move all the owner setting code closer to the rwsem-xadd fast paths directly within rwsem.h file as well as in the slowpaths where owner setting is done after acquring the lock. This will enable us to add DEBUG_RWSEMS check in a later patch to make sure that read lock is really acquired when rwsem_down_read_failed() returns, for instance. Signed-off-by: Waiman Long Acked-by: Peter Zijlstra Acked-by: Davidlohr Bueso Cc: Andrew Morton Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Davidlohr Bueso Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tim Chen Cc: Will Deacon Link: http://lkml.kernel.org/r/20190404174320.22416-3-longman@redhat.com Signed-off-by: Ingo Molnar --- kernel/locking/rwsem.h | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'kernel/locking/rwsem.h') diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h index 1f5775aa6a1d..ee24c4f257a5 100644 --- a/kernel/locking/rwsem.h +++ b/kernel/locking/rwsem.h @@ -160,6 +160,8 @@ static inline void __down_read(struct rw_semaphore *sem) { if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) rwsem_down_read_failed(sem); + else + rwsem_set_reader_owned(sem); } static inline int __down_read_killable(struct rw_semaphore *sem) @@ -167,8 +169,9 @@ static inline int __down_read_killable(struct rw_semaphore *sem) if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) { if (IS_ERR(rwsem_down_read_failed_killable(sem))) return -EINTR; + } else { + rwsem_set_reader_owned(sem); } - return 0; } @@ -182,6 +185,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) do { if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp, tmp + RWSEM_ACTIVE_READ_BIAS)) { + rwsem_set_reader_owned(sem); return 1; } } while (tmp >= 0); @@ -199,6 +203,7 @@ static inline void __down_write(struct rw_semaphore *sem) &sem->count); if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS)) rwsem_down_write_failed(sem); + rwsem_set_owner(sem); } static inline int __down_write_killable(struct rw_semaphore *sem) @@ -210,6 +215,7 @@ static inline int __down_write_killable(struct rw_semaphore *sem) if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS)) if (IS_ERR(rwsem_down_write_failed_killable(sem))) return -EINTR; + rwsem_set_owner(sem); return 0; } @@ -219,7 +225,11 @@ static inline int __down_write_trylock(struct rw_semaphore *sem) tmp = atomic_long_cmpxchg_acquire(&sem->count, RWSEM_UNLOCKED_VALUE, RWSEM_ACTIVE_WRITE_BIAS); - return tmp == RWSEM_UNLOCKED_VALUE; + if (tmp == RWSEM_UNLOCKED_VALUE) { + rwsem_set_owner(sem); + return true; + } + return false; } /* @@ -229,6 +239,7 @@ static inline void __up_read(struct rw_semaphore *sem) { long tmp; + rwsem_clear_reader_owned(sem); tmp = atomic_long_dec_return_release(&sem->count); if (unlikely(tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0)) rwsem_wake(sem); @@ -239,6 +250,7 @@ static inline void __up_read(struct rw_semaphore *sem) */ static inline void __up_write(struct rw_semaphore *sem) { + rwsem_clear_owner(sem); if (unlikely(atomic_long_sub_return_release(RWSEM_ACTIVE_WRITE_BIAS, &sem->count) < 0)) rwsem_wake(sem); @@ -259,6 +271,7 @@ static inline void __downgrade_write(struct rw_semaphore *sem) * write side. As such, rely on RELEASE semantics. */ tmp = atomic_long_add_return_release(-RWSEM_WAITING_BIAS, &sem->count); + rwsem_set_reader_owned(sem); if (tmp < 0) rwsem_downgrade_wake(sem); } -- cgit From 12a30a7fc142a123c61da9623bd824d95d36c12e Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 4 Apr 2019 13:43:12 -0400 Subject: locking/rwsem: Move rwsem internal function declarations to rwsem-xadd.h We don't need to expose rwsem internal functions which are not supposed to be called directly from other kernel code. Signed-off-by: Waiman Long Acked-by: Peter Zijlstra Acked-by: Will Deacon Acked-by: Davidlohr Bueso Cc: Andrew Morton Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Davidlohr Bueso Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tim Chen Link: http://lkml.kernel.org/r/20190404174320.22416-4-longman@redhat.com Signed-off-by: Ingo Molnar --- kernel/locking/rwsem.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'kernel/locking/rwsem.h') diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h index ee24c4f257a5..19997c82270b 100644 --- a/kernel/locking/rwsem.h +++ b/kernel/locking/rwsem.h @@ -153,6 +153,13 @@ static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem) } #endif +extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem); +extern struct rw_semaphore *rwsem_down_read_failed_killable(struct rw_semaphore *sem); +extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem); +extern struct rw_semaphore *rwsem_down_write_failed_killable(struct rw_semaphore *sem); +extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem); +extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem); + /* * lock for reading */ -- cgit From a68e2c4c637918da47b3aa270051545cff7d8245 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 4 Apr 2019 13:43:14 -0400 Subject: locking/rwsem: Add debug check for __down_read*() When rwsem_down_read_failed*() return, the read lock is acquired indirectly by others. So debug checks are added in __down_read() and __down_read_killable() to make sure the rwsem is really reader-owned. The other debug check calls in kernel/locking/rwsem.c except the one in up_read_non_owner() are also moved over to rwsem-xadd.h. Signed-off-by: Waiman Long Acked-by: Peter Zijlstra Acked-by: Davidlohr Bueso Cc: Andrew Morton Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Davidlohr Bueso Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tim Chen Cc: Will Deacon Link: http://lkml.kernel.org/r/20190404174320.22416-6-longman@redhat.com Signed-off-by: Ingo Molnar --- kernel/locking/rwsem.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'kernel/locking/rwsem.h') diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h index 19997c82270b..1d8f722a6761 100644 --- a/kernel/locking/rwsem.h +++ b/kernel/locking/rwsem.h @@ -165,10 +165,13 @@ extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem); */ static inline void __down_read(struct rw_semaphore *sem) { - if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) + if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) { rwsem_down_read_failed(sem); - else + DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & + RWSEM_READER_OWNED)); + } else { rwsem_set_reader_owned(sem); + } } static inline int __down_read_killable(struct rw_semaphore *sem) @@ -176,6 +179,8 @@ static inline int __down_read_killable(struct rw_semaphore *sem) if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) { if (IS_ERR(rwsem_down_read_failed_killable(sem))) return -EINTR; + DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & + RWSEM_READER_OWNED)); } else { rwsem_set_reader_owned(sem); } @@ -246,6 +251,7 @@ static inline void __up_read(struct rw_semaphore *sem) { long tmp; + DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED)); rwsem_clear_reader_owned(sem); tmp = atomic_long_dec_return_release(&sem->count); if (unlikely(tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0)) @@ -257,6 +263,7 @@ static inline void __up_read(struct rw_semaphore *sem) */ static inline void __up_write(struct rw_semaphore *sem) { + DEBUG_RWSEMS_WARN_ON(sem->owner != current); rwsem_clear_owner(sem); if (unlikely(atomic_long_sub_return_release(RWSEM_ACTIVE_WRITE_BIAS, &sem->count) < 0)) @@ -277,6 +284,7 @@ static inline void __downgrade_write(struct rw_semaphore *sem) * read-locked region is ok to be re-ordered into the * write side. As such, rely on RELEASE semantics. */ + DEBUG_RWSEMS_WARN_ON(sem->owner != current); tmp = atomic_long_add_return_release(-RWSEM_WAITING_BIAS, &sem->count); rwsem_set_reader_owned(sem); if (tmp < 0) -- cgit From 3b4ba6643d26a95e08067fca9a5da1828f9afabf Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 4 Apr 2019 13:43:15 -0400 Subject: locking/rwsem: Enhance DEBUG_RWSEMS_WARN_ON() macro Currently, the DEBUG_RWSEMS_WARN_ON() macro just dumps a stack trace when the rwsem isn't in the right state. It does not show the actual states of the rwsem. This may not be that helpful in the debugging process. Enhance the DEBUG_RWSEMS_WARN_ON() macro to also show the current content of the rwsem count and owner fields to give more information about what is wrong with the rwsem. The debug_locks_off() function is called as is done inside DEBUG_LOCKS_WARN_ON(). Signed-off-by: Waiman Long Acked-by: Peter Zijlstra Acked-by: Davidlohr Bueso Cc: Andrew Morton Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Davidlohr Bueso Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tim Chen Cc: Will Deacon Link: http://lkml.kernel.org/r/20190404174320.22416-7-longman@redhat.com Signed-off-by: Ingo Molnar --- kernel/locking/rwsem.h | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'kernel/locking/rwsem.h') diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h index 1d8f722a6761..3059a2dc39f8 100644 --- a/kernel/locking/rwsem.h +++ b/kernel/locking/rwsem.h @@ -27,9 +27,15 @@ #define RWSEM_ANONYMOUSLY_OWNED (1UL << 1) #ifdef CONFIG_DEBUG_RWSEMS -# define DEBUG_RWSEMS_WARN_ON(c) DEBUG_LOCKS_WARN_ON(c) +# define DEBUG_RWSEMS_WARN_ON(c, sem) do { \ + if (WARN_ONCE(c, "DEBUG_RWSEMS_WARN_ON(%s): count = 0x%lx, owner = 0x%lx, curr 0x%lx, list %sempty\n",\ + #c, atomic_long_read(&(sem)->count), \ + (long)((sem)->owner), (long)current, \ + list_empty(&(sem)->wait_list) ? "" : "not ")) \ + debug_locks_off(); \ + } while (0) #else -# define DEBUG_RWSEMS_WARN_ON(c) +# define DEBUG_RWSEMS_WARN_ON(c, sem) #endif /* @@ -168,7 +174,7 @@ static inline void __down_read(struct rw_semaphore *sem) if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) { rwsem_down_read_failed(sem); DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & - RWSEM_READER_OWNED)); + RWSEM_READER_OWNED), sem); } else { rwsem_set_reader_owned(sem); } @@ -180,7 +186,7 @@ static inline int __down_read_killable(struct rw_semaphore *sem) if (IS_ERR(rwsem_down_read_failed_killable(sem))) return -EINTR; DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & - RWSEM_READER_OWNED)); + RWSEM_READER_OWNED), sem); } else { rwsem_set_reader_owned(sem); } @@ -251,7 +257,8 @@ static inline void __up_read(struct rw_semaphore *sem) { long tmp; - DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED)); + DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED), + sem); rwsem_clear_reader_owned(sem); tmp = atomic_long_dec_return_release(&sem->count); if (unlikely(tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0)) @@ -263,7 +270,7 @@ static inline void __up_read(struct rw_semaphore *sem) */ static inline void __up_write(struct rw_semaphore *sem) { - DEBUG_RWSEMS_WARN_ON(sem->owner != current); + DEBUG_RWSEMS_WARN_ON(sem->owner != current, sem); rwsem_clear_owner(sem); if (unlikely(atomic_long_sub_return_release(RWSEM_ACTIVE_WRITE_BIAS, &sem->count) < 0)) @@ -284,7 +291,7 @@ static inline void __downgrade_write(struct rw_semaphore *sem) * read-locked region is ok to be re-ordered into the * write side. As such, rely on RELEASE semantics. */ - DEBUG_RWSEMS_WARN_ON(sem->owner != current); + DEBUG_RWSEMS_WARN_ON(sem->owner != current, sem); tmp = atomic_long_add_return_release(-RWSEM_WAITING_BIAS, &sem->count); rwsem_set_reader_owned(sem); if (tmp < 0) -- cgit From a8654596f0371c2604c4d475422c48f4fc6a56c9 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 4 Apr 2019 13:43:19 -0400 Subject: locking/rwsem: Enable lock event counting Add lock event counting calls so that we can track the number of lock events happening in the rwsem code. With CONFIG_LOCK_EVENT_COUNTS on and booting a 4-socket 112-thread x86-64 system, the rwsem counts after system bootup were as follows: rwsem_opt_fail=261 rwsem_opt_wlock=50636 rwsem_rlock=445 rwsem_rlock_fail=0 rwsem_rlock_fast=22 rwsem_rtrylock=810144 rwsem_sleep_reader=441 rwsem_sleep_writer=310 rwsem_wake_reader=355 rwsem_wake_writer=2335 rwsem_wlock=261 rwsem_wlock_fail=0 rwsem_wtrylock=20583 It can be seen that most of the lock acquisitions in the slowpath were write-locks in the optimistic spinning code path with no sleeping at all. For this system, over 97% of the locks are acquired via optimistic spinning. It illustrates the importance of optimistic spinning in improving the performance of rwsem. Signed-off-by: Waiman Long Acked-by: Peter Zijlstra Acked-by: Davidlohr Bueso Cc: Andrew Morton Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Davidlohr Bueso Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tim Chen Cc: Will Deacon Link: http://lkml.kernel.org/r/20190404174320.22416-11-longman@redhat.com Signed-off-by: Ingo Molnar --- kernel/locking/rwsem.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel/locking/rwsem.h') diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h index 3059a2dc39f8..37db17890e36 100644 --- a/kernel/locking/rwsem.h +++ b/kernel/locking/rwsem.h @@ -23,6 +23,8 @@ * is involved. Ideally we would like to track all the readers that own * a rwsem, but the overhead is simply too big. */ +#include "lock_events.h" + #define RWSEM_READER_OWNED (1UL << 0) #define RWSEM_ANONYMOUSLY_OWNED (1UL << 1) @@ -200,6 +202,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) */ long tmp = RWSEM_UNLOCKED_VALUE; + lockevent_inc(rwsem_rtrylock); do { if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp, tmp + RWSEM_ACTIVE_READ_BIAS)) { @@ -241,6 +244,7 @@ static inline int __down_write_trylock(struct rw_semaphore *sem) { long tmp; + lockevent_inc(rwsem_wtrylock); tmp = atomic_long_cmpxchg_acquire(&sem->count, RWSEM_UNLOCKED_VALUE, RWSEM_ACTIVE_WRITE_BIAS); if (tmp == RWSEM_UNLOCKED_VALUE) { -- cgit From 26536e7c242e2b0f73c25c46fc50d2525ebe400b Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sat, 13 Apr 2019 13:22:44 -0400 Subject: locking/rwsem: Prevent unneeded warning during locking selftest Disable the DEBUG_RWSEMS check when locking selftest is running with debug_locks_silent flag set. Signed-off-by: Waiman Long Cc: Davidlohr Bueso Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tim Chen Cc: Will Deacon Cc: huang ying Link: http://lkml.kernel.org/r/20190413172259.2740-2-longman@redhat.com Signed-off-by: Ingo Molnar --- kernel/locking/rwsem.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/locking/rwsem.h') diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h index 37db17890e36..64877f5294e3 100644 --- a/kernel/locking/rwsem.h +++ b/kernel/locking/rwsem.h @@ -30,7 +30,8 @@ #ifdef CONFIG_DEBUG_RWSEMS # define DEBUG_RWSEMS_WARN_ON(c, sem) do { \ - if (WARN_ONCE(c, "DEBUG_RWSEMS_WARN_ON(%s): count = 0x%lx, owner = 0x%lx, curr 0x%lx, list %sempty\n",\ + if (!debug_locks_silent && \ + WARN_ONCE(c, "DEBUG_RWSEMS_WARN_ON(%s): count = 0x%lx, owner = 0x%lx, curr 0x%lx, list %sempty\n",\ #c, atomic_long_read(&(sem)->count), \ (long)((sem)->owner), (long)current, \ list_empty(&(sem)->wait_list) ? "" : "not ")) \ -- cgit