diff options
-rw-r--r-- | Documentation/atomic_ops.txt | 4 | ||||
-rw-r--r-- | Documentation/locking/lockstat.txt | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/mcdi.c | 2 | ||||
-rw-r--r-- | drivers/phy/phy-rcar-gen2.c | 3 | ||||
-rw-r--r-- | drivers/staging/speakup/selection.c | 2 | ||||
-rw-r--r-- | include/asm-generic/qrwlock_types.h | 4 | ||||
-rw-r--r-- | kernel/futex.c | 13 | ||||
-rw-r--r-- | kernel/locking/osq_lock.c | 11 | ||||
-rw-r--r-- | kernel/locking/qrwlock.c | 8 | ||||
-rw-r--r-- | kernel/locking/qspinlock_paravirt.h | 6 |
10 files changed, 34 insertions, 21 deletions
diff --git a/Documentation/atomic_ops.txt b/Documentation/atomic_ops.txt index b19fc34efdb1..c9d1cacb4395 100644 --- a/Documentation/atomic_ops.txt +++ b/Documentation/atomic_ops.txt @@ -542,6 +542,10 @@ The routines xchg() and cmpxchg() must provide the same exact memory-barrier semantics as the atomic and bit operations returning values. +Note: If someone wants to use xchg(), cmpxchg() and their variants, +linux/atomic.h should be included rather than asm/cmpxchg.h, unless +the code is in arch/* and can take care of itself. + Spinlocks and rwlocks have memory barrier expectations as well. The rule to follow is simple: diff --git a/Documentation/locking/lockstat.txt b/Documentation/locking/lockstat.txt index 568bbbacee91..5786ad2cd5e6 100644 --- a/Documentation/locking/lockstat.txt +++ b/Documentation/locking/lockstat.txt @@ -12,7 +12,7 @@ Because things like lock contention can severely impact performance. - HOW Lockdep already has hooks in the lock functions and maps lock instances to -lock classes. We build on that (see Documentation/lokcing/lockdep-design.txt). +lock classes. We build on that (see Documentation/locking/lockdep-design.txt). The graph below shows the relation between the lock functions and the various hooks therein. diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c index 98d172b04f71..a9b9460de0d6 100644 --- a/drivers/net/ethernet/sfc/mcdi.c +++ b/drivers/net/ethernet/sfc/mcdi.c @@ -9,7 +9,7 @@ #include <linux/delay.h> #include <linux/moduleparam.h> -#include <asm/cmpxchg.h> +#include <linux/atomic.h> #include "net_driver.h" #include "nic.h" #include "io.h" diff --git a/drivers/phy/phy-rcar-gen2.c b/drivers/phy/phy-rcar-gen2.c index 6e0d9fa8e1d1..c7a05996d5c1 100644 --- a/drivers/phy/phy-rcar-gen2.c +++ b/drivers/phy/phy-rcar-gen2.c @@ -17,8 +17,7 @@ #include <linux/phy/phy.h> #include <linux/platform_device.h> #include <linux/spinlock.h> - -#include <asm/cmpxchg.h> +#include <linux/atomic.h> #define USBHS_LPSTS 0x02 #define USBHS_UGCTRL 0x80 diff --git a/drivers/staging/speakup/selection.c b/drivers/staging/speakup/selection.c index 98af3b1f2d2a..aa5ab6c80ed4 100644 --- a/drivers/staging/speakup/selection.c +++ b/drivers/staging/speakup/selection.c @@ -7,7 +7,7 @@ #include <linux/workqueue.h> #include <linux/tty.h> #include <linux/tty_flip.h> -#include <asm/cmpxchg.h> +#include <linux/atomic.h> #include "speakup.h" diff --git a/include/asm-generic/qrwlock_types.h b/include/asm-generic/qrwlock_types.h index 4d76f24df518..0abc6b6062fb 100644 --- a/include/asm-generic/qrwlock_types.h +++ b/include/asm-generic/qrwlock_types.h @@ -10,12 +10,12 @@ typedef struct qrwlock { atomic_t cnts; - arch_spinlock_t lock; + arch_spinlock_t wait_lock; } arch_rwlock_t; #define __ARCH_RW_LOCK_UNLOCKED { \ .cnts = ATOMIC_INIT(0), \ - .lock = __ARCH_SPIN_LOCK_UNLOCKED, \ + .wait_lock = __ARCH_SPIN_LOCK_UNLOCKED, \ } #endif /* __ASM_GENERIC_QRWLOCK_TYPES_H */ diff --git a/kernel/futex.c b/kernel/futex.c index 6e443efc65f4..dfc86e93c31d 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -255,9 +255,18 @@ struct futex_hash_bucket { struct plist_head chain; } ____cacheline_aligned_in_smp; -static unsigned long __read_mostly futex_hashsize; +/* + * The base of the bucket array and its size are always used together + * (after initialization only in hash_futex()), so ensure that they + * reside in the same cacheline. + */ +static struct { + struct futex_hash_bucket *queues; + unsigned long hashsize; +} __futex_data __read_mostly __aligned(2*sizeof(long)); +#define futex_queues (__futex_data.queues) +#define futex_hashsize (__futex_data.hashsize) -static struct futex_hash_bucket *futex_queues; /* * Fault injections for futexes. diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c index dc85ee23a26f..d092a0c9c2d4 100644 --- a/kernel/locking/osq_lock.c +++ b/kernel/locking/osq_lock.c @@ -50,7 +50,7 @@ osq_wait_next(struct optimistic_spin_queue *lock, for (;;) { if (atomic_read(&lock->tail) == curr && - atomic_cmpxchg(&lock->tail, curr, old) == curr) { + atomic_cmpxchg_acquire(&lock->tail, curr, old) == curr) { /* * We were the last queued, we moved @lock back. @prev * will now observe @lock and will complete its @@ -92,7 +92,11 @@ bool osq_lock(struct optimistic_spin_queue *lock) node->next = NULL; node->cpu = curr; - old = atomic_xchg(&lock->tail, curr); + /* + * ACQUIRE semantics, pairs with corresponding RELEASE + * in unlock() uncontended, or fastpath. + */ + old = atomic_xchg_acquire(&lock->tail, curr); if (old == OSQ_UNLOCKED_VAL) return true; @@ -184,7 +188,8 @@ void osq_unlock(struct optimistic_spin_queue *lock) /* * Fast path for the uncontended case. */ - if (likely(atomic_cmpxchg(&lock->tail, curr, OSQ_UNLOCKED_VAL) == curr)) + if (likely(atomic_cmpxchg_release(&lock->tail, curr, + OSQ_UNLOCKED_VAL) == curr)) return; /* diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c index f17a3e3b3550..fec082338668 100644 --- a/kernel/locking/qrwlock.c +++ b/kernel/locking/qrwlock.c @@ -86,7 +86,7 @@ void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts) /* * Put the reader into the wait queue */ - arch_spin_lock(&lock->lock); + arch_spin_lock(&lock->wait_lock); /* * The ACQUIRE semantics of the following spinning code ensure @@ -99,7 +99,7 @@ void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts) /* * Signal the next one in queue to become queue head */ - arch_spin_unlock(&lock->lock); + arch_spin_unlock(&lock->wait_lock); } EXPORT_SYMBOL(queued_read_lock_slowpath); @@ -112,7 +112,7 @@ void queued_write_lock_slowpath(struct qrwlock *lock) u32 cnts; /* Put the writer into the wait queue */ - arch_spin_lock(&lock->lock); + arch_spin_lock(&lock->wait_lock); /* Try to acquire the lock directly if no reader is present */ if (!atomic_read(&lock->cnts) && @@ -144,6 +144,6 @@ void queued_write_lock_slowpath(struct qrwlock *lock) cpu_relax_lowlatency(); } unlock: - arch_spin_unlock(&lock->lock); + arch_spin_unlock(&lock->wait_lock); } EXPORT_SYMBOL(queued_write_lock_slowpath); diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index c8e6e9a596f5..f0450ff4829b 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -267,7 +267,6 @@ static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node) } if (!lp) { /* ONCE */ - WRITE_ONCE(pn->state, vcpu_hashed); lp = pv_hash(lock, pn); /* @@ -275,11 +274,9 @@ static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node) * when we observe _Q_SLOW_VAL in __pv_queued_spin_unlock() * we'll be sure to be able to observe our hash entry. * - * [S] pn->state * [S] <hash> [Rmw] l->locked == _Q_SLOW_VAL * MB RMB * [RmW] l->locked = _Q_SLOW_VAL [L] <unhash> - * [L] pn->state * * Matches the smp_rmb() in __pv_queued_spin_unlock(). */ @@ -364,8 +361,7 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock) * vCPU is harmless other than the additional latency in completing * the unlock. */ - if (READ_ONCE(node->state) == vcpu_hashed) - pv_kick(node->cpu); + pv_kick(node->cpu); } /* * Include the architecture specific callee-save thunk of the |