summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/atomic_ops.txt4
-rw-r--r--Documentation/locking/lockstat.txt2
-rw-r--r--drivers/net/ethernet/sfc/mcdi.c2
-rw-r--r--drivers/phy/phy-rcar-gen2.c3
-rw-r--r--drivers/staging/speakup/selection.c2
-rw-r--r--include/asm-generic/qrwlock_types.h4
-rw-r--r--kernel/futex.c13
-rw-r--r--kernel/locking/osq_lock.c11
-rw-r--r--kernel/locking/qrwlock.c8
-rw-r--r--kernel/locking/qspinlock_paravirt.h6
10 files changed, 34 insertions, 21 deletions
diff --git a/Documentation/atomic_ops.txt b/Documentation/atomic_ops.txt
index b19fc34efdb1..c9d1cacb4395 100644
--- a/Documentation/atomic_ops.txt
+++ b/Documentation/atomic_ops.txt
@@ -542,6 +542,10 @@ The routines xchg() and cmpxchg() must provide the same exact
memory-barrier semantics as the atomic and bit operations returning
values.
+Note: If someone wants to use xchg(), cmpxchg() and their variants,
+linux/atomic.h should be included rather than asm/cmpxchg.h, unless
+the code is in arch/* and can take care of itself.
+
Spinlocks and rwlocks have memory barrier expectations as well.
The rule to follow is simple:
diff --git a/Documentation/locking/lockstat.txt b/Documentation/locking/lockstat.txt
index 568bbbacee91..5786ad2cd5e6 100644
--- a/Documentation/locking/lockstat.txt
+++ b/Documentation/locking/lockstat.txt
@@ -12,7 +12,7 @@ Because things like lock contention can severely impact performance.
- HOW
Lockdep already has hooks in the lock functions and maps lock instances to
-lock classes. We build on that (see Documentation/lokcing/lockdep-design.txt).
+lock classes. We build on that (see Documentation/locking/lockdep-design.txt).
The graph below shows the relation between the lock functions and the various
hooks therein.
diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c
index 98d172b04f71..a9b9460de0d6 100644
--- a/drivers/net/ethernet/sfc/mcdi.c
+++ b/drivers/net/ethernet/sfc/mcdi.c
@@ -9,7 +9,7 @@
#include <linux/delay.h>
#include <linux/moduleparam.h>
-#include <asm/cmpxchg.h>
+#include <linux/atomic.h>
#include "net_driver.h"
#include "nic.h"
#include "io.h"
diff --git a/drivers/phy/phy-rcar-gen2.c b/drivers/phy/phy-rcar-gen2.c
index 6e0d9fa8e1d1..c7a05996d5c1 100644
--- a/drivers/phy/phy-rcar-gen2.c
+++ b/drivers/phy/phy-rcar-gen2.c
@@ -17,8 +17,7 @@
#include <linux/phy/phy.h>
#include <linux/platform_device.h>
#include <linux/spinlock.h>
-
-#include <asm/cmpxchg.h>
+#include <linux/atomic.h>
#define USBHS_LPSTS 0x02
#define USBHS_UGCTRL 0x80
diff --git a/drivers/staging/speakup/selection.c b/drivers/staging/speakup/selection.c
index 98af3b1f2d2a..aa5ab6c80ed4 100644
--- a/drivers/staging/speakup/selection.c
+++ b/drivers/staging/speakup/selection.c
@@ -7,7 +7,7 @@
#include <linux/workqueue.h>
#include <linux/tty.h>
#include <linux/tty_flip.h>
-#include <asm/cmpxchg.h>
+#include <linux/atomic.h>
#include "speakup.h"
diff --git a/include/asm-generic/qrwlock_types.h b/include/asm-generic/qrwlock_types.h
index 4d76f24df518..0abc6b6062fb 100644
--- a/include/asm-generic/qrwlock_types.h
+++ b/include/asm-generic/qrwlock_types.h
@@ -10,12 +10,12 @@
typedef struct qrwlock {
atomic_t cnts;
- arch_spinlock_t lock;
+ arch_spinlock_t wait_lock;
} arch_rwlock_t;
#define __ARCH_RW_LOCK_UNLOCKED { \
.cnts = ATOMIC_INIT(0), \
- .lock = __ARCH_SPIN_LOCK_UNLOCKED, \
+ .wait_lock = __ARCH_SPIN_LOCK_UNLOCKED, \
}
#endif /* __ASM_GENERIC_QRWLOCK_TYPES_H */
diff --git a/kernel/futex.c b/kernel/futex.c
index 6e443efc65f4..dfc86e93c31d 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -255,9 +255,18 @@ struct futex_hash_bucket {
struct plist_head chain;
} ____cacheline_aligned_in_smp;
-static unsigned long __read_mostly futex_hashsize;
+/*
+ * The base of the bucket array and its size are always used together
+ * (after initialization only in hash_futex()), so ensure that they
+ * reside in the same cacheline.
+ */
+static struct {
+ struct futex_hash_bucket *queues;
+ unsigned long hashsize;
+} __futex_data __read_mostly __aligned(2*sizeof(long));
+#define futex_queues (__futex_data.queues)
+#define futex_hashsize (__futex_data.hashsize)
-static struct futex_hash_bucket *futex_queues;
/*
* Fault injections for futexes.
diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c
index dc85ee23a26f..d092a0c9c2d4 100644
--- a/kernel/locking/osq_lock.c
+++ b/kernel/locking/osq_lock.c
@@ -50,7 +50,7 @@ osq_wait_next(struct optimistic_spin_queue *lock,
for (;;) {
if (atomic_read(&lock->tail) == curr &&
- atomic_cmpxchg(&lock->tail, curr, old) == curr) {
+ atomic_cmpxchg_acquire(&lock->tail, curr, old) == curr) {
/*
* We were the last queued, we moved @lock back. @prev
* will now observe @lock and will complete its
@@ -92,7 +92,11 @@ bool osq_lock(struct optimistic_spin_queue *lock)
node->next = NULL;
node->cpu = curr;
- old = atomic_xchg(&lock->tail, curr);
+ /*
+ * ACQUIRE semantics, pairs with corresponding RELEASE
+ * in unlock() uncontended, or fastpath.
+ */
+ old = atomic_xchg_acquire(&lock->tail, curr);
if (old == OSQ_UNLOCKED_VAL)
return true;
@@ -184,7 +188,8 @@ void osq_unlock(struct optimistic_spin_queue *lock)
/*
* Fast path for the uncontended case.
*/
- if (likely(atomic_cmpxchg(&lock->tail, curr, OSQ_UNLOCKED_VAL) == curr))
+ if (likely(atomic_cmpxchg_release(&lock->tail, curr,
+ OSQ_UNLOCKED_VAL) == curr))
return;
/*
diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c
index f17a3e3b3550..fec082338668 100644
--- a/kernel/locking/qrwlock.c
+++ b/kernel/locking/qrwlock.c
@@ -86,7 +86,7 @@ void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts)
/*
* Put the reader into the wait queue
*/
- arch_spin_lock(&lock->lock);
+ arch_spin_lock(&lock->wait_lock);
/*
* The ACQUIRE semantics of the following spinning code ensure
@@ -99,7 +99,7 @@ void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts)
/*
* Signal the next one in queue to become queue head
*/
- arch_spin_unlock(&lock->lock);
+ arch_spin_unlock(&lock->wait_lock);
}
EXPORT_SYMBOL(queued_read_lock_slowpath);
@@ -112,7 +112,7 @@ void queued_write_lock_slowpath(struct qrwlock *lock)
u32 cnts;
/* Put the writer into the wait queue */
- arch_spin_lock(&lock->lock);
+ arch_spin_lock(&lock->wait_lock);
/* Try to acquire the lock directly if no reader is present */
if (!atomic_read(&lock->cnts) &&
@@ -144,6 +144,6 @@ void queued_write_lock_slowpath(struct qrwlock *lock)
cpu_relax_lowlatency();
}
unlock:
- arch_spin_unlock(&lock->lock);
+ arch_spin_unlock(&lock->wait_lock);
}
EXPORT_SYMBOL(queued_write_lock_slowpath);
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index c8e6e9a596f5..f0450ff4829b 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -267,7 +267,6 @@ static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node)
}
if (!lp) { /* ONCE */
- WRITE_ONCE(pn->state, vcpu_hashed);
lp = pv_hash(lock, pn);
/*
@@ -275,11 +274,9 @@ static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node)
* when we observe _Q_SLOW_VAL in __pv_queued_spin_unlock()
* we'll be sure to be able to observe our hash entry.
*
- * [S] pn->state
* [S] <hash> [Rmw] l->locked == _Q_SLOW_VAL
* MB RMB
* [RmW] l->locked = _Q_SLOW_VAL [L] <unhash>
- * [L] pn->state
*
* Matches the smp_rmb() in __pv_queued_spin_unlock().
*/
@@ -364,8 +361,7 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock)
* vCPU is harmless other than the additional latency in completing
* the unlock.
*/
- if (READ_ONCE(node->state) == vcpu_hashed)
- pv_kick(node->cpu);
+ pv_kick(node->cpu);
}
/*
* Include the architecture specific callee-save thunk of the