summaryrefslogtreecommitdiff
path: root/arch/s390/include/asm/barrier.h
diff options
context:
space:
mode:
authorHeiko Carstens <heiko.carstens@de.ibm.com>2014-09-08 08:20:43 +0200
committerMartin Schwidefsky <schwidefsky@de.ibm.com>2014-09-09 08:53:30 +0200
commit442302820356977237e32a76a211e7942255003a (patch)
treecaf9d4d00753beab556f28f4f4cfac3d7f8b73e9 /arch/s390/include/asm/barrier.h
parent3d1e220d08c6a00ffa83d39030b8162f66665b2b (diff)
s390/spinlock: optimize spin_unlock code
Use a memory barrier + store sequence instead of a load + compare and swap sequence to unlock a spinlock and an rw lock. For the spinlock case this saves us two memory reads and a not needed cpu serialization after the compare and swap instruction stored the new value. The kernel size (performance_defconfig) gets reduced by ~14k. Average execution time of a tight inlined spin_unlock loop drops from 5.8ns to 0.7ns on a zEC12 machine. An artificial stress test case where several counters are protected with a single spinlock and which are only incremented while holding the spinlock shows ~30% improvement on a 4 cpu machine. Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'arch/s390/include/asm/barrier.h')
-rw-r--r--arch/s390/include/asm/barrier.h6
1 files changed, 4 insertions, 2 deletions
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h
index 19ff956b752b..b5dce6544d76 100644
--- a/arch/s390/include/asm/barrier.h
+++ b/arch/s390/include/asm/barrier.h
@@ -15,11 +15,13 @@
#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
/* Fast-BCR without checkpoint synchronization */
-#define mb() do { asm volatile("bcr 14,0" : : : "memory"); } while (0)
+#define __ASM_BARRIER "bcr 14,0\n"
#else
-#define mb() do { asm volatile("bcr 15,0" : : : "memory"); } while (0)
+#define __ASM_BARRIER "bcr 15,0\n"
#endif
+#define mb() do { asm volatile(__ASM_BARRIER : : : "memory"); } while (0)
+
#define rmb() mb()
#define wmb() mb()
#define read_barrier_depends() do { } while(0)