From d1be6a28b13ce6d1bc42bf9b6a9454c65839225b Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 22 Feb 2019 12:48:44 +0000 Subject: asm-generic/mmiowb: Add generic implementation of mmiowb() tracking In preparation for removing all explicit mmiowb() calls from driver code, implement a tracking system in asm-generic based loosely on the PowerPC implementation. This allows architectures with a non-empty mmiowb() definition to have the barrier automatically inserted in spin_unlock() following a critical section containing an I/O write. Acked-by: Linus Torvalds Signed-off-by: Will Deacon --- include/asm-generic/mmiowb.h | 63 ++++++++++++++++++++++++++++++++++++++ include/asm-generic/mmiowb_types.h | 12 ++++++++ 2 files changed, 75 insertions(+) create mode 100644 include/asm-generic/mmiowb.h create mode 100644 include/asm-generic/mmiowb_types.h (limited to 'include') diff --git a/include/asm-generic/mmiowb.h b/include/asm-generic/mmiowb.h new file mode 100644 index 000000000000..9439ff037b2d --- /dev/null +++ b/include/asm-generic/mmiowb.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_GENERIC_MMIOWB_H +#define __ASM_GENERIC_MMIOWB_H + +/* + * Generic implementation of mmiowb() tracking for spinlocks. + * + * If your architecture doesn't ensure that writes to an I/O peripheral + * within two spinlocked sections on two different CPUs are seen by the + * peripheral in the order corresponding to the lock handover, then you + * need to follow these FIVE easy steps: + * + * 1. Implement mmiowb() (and arch_mmiowb_state() if you're fancy) + * in asm/mmiowb.h, then #include this file + * 2. Ensure your I/O write accessors call mmiowb_set_pending() + * 3. Select ARCH_HAS_MMIOWB + * 4. Untangle the resulting mess of header files + * 5. Complain to your architects + */ +#ifdef CONFIG_MMIOWB + +#include +#include + +#ifndef arch_mmiowb_state +#include +#include + +DECLARE_PER_CPU(struct mmiowb_state, __mmiowb_state); +#define __mmiowb_state() this_cpu_ptr(&__mmiowb_state) +#else +#define __mmiowb_state() arch_mmiowb_state() +#endif /* arch_mmiowb_state */ + +static inline void mmiowb_set_pending(void) +{ + struct mmiowb_state *ms = __mmiowb_state(); + ms->mmiowb_pending = ms->nesting_count; +} + +static inline void mmiowb_spin_lock(void) +{ + struct mmiowb_state *ms = __mmiowb_state(); + ms->nesting_count++; +} + +static inline void mmiowb_spin_unlock(void) +{ + struct mmiowb_state *ms = __mmiowb_state(); + + if (unlikely(ms->mmiowb_pending)) { + ms->mmiowb_pending = 0; + mmiowb(); + } + + ms->nesting_count--; +} +#else +#define mmiowb_set_pending() do { } while (0) +#define mmiowb_spin_lock() do { } while (0) +#define mmiowb_spin_unlock() do { } while (0) +#endif /* CONFIG_MMIOWB */ +#endif /* __ASM_GENERIC_MMIOWB_H */ diff --git a/include/asm-generic/mmiowb_types.h b/include/asm-generic/mmiowb_types.h new file mode 100644 index 000000000000..8eb0095655e7 --- /dev/null +++ b/include/asm-generic/mmiowb_types.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_GENERIC_MMIOWB_TYPES_H +#define __ASM_GENERIC_MMIOWB_TYPES_H + +#include + +struct mmiowb_state { + u16 nesting_count; + u16 mmiowb_pending; +}; + +#endif /* __ASM_GENERIC_MMIOWB_TYPES_H */ -- cgit From 60ca1e5a200cd294a12907fa36dece4241db4ab8 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 22 Feb 2019 12:59:59 +0000 Subject: mmiowb: Hook up mmiowb helpers to spinlocks and generic I/O accessors Removing explicit calls to mmiowb() from driver code means that we must now call into the generic mmiowb_spin_{lock,unlock}() functions from the core spinlock code. In order to elide barriers following critical sections without any I/O writes, we also hook into the asm-generic I/O routines. Acked-by: Linus Torvalds Signed-off-by: Will Deacon --- include/asm-generic/io.h | 3 ++- include/linux/spinlock.h | 11 ++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index 303871651f8a..bc490a746602 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -19,6 +19,7 @@ #include #endif +#include #include #ifndef mmiowb @@ -49,7 +50,7 @@ /* serialize device access against a spin_unlock, usually handled there. */ #ifndef __io_aw -#define __io_aw() barrier() +#define __io_aw() mmiowb_set_pending() #endif #ifndef __io_pbw diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index e089157dcf97..ed7c4d6b8235 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -57,6 +57,7 @@ #include #include #include +#include /* @@ -178,6 +179,7 @@ static inline void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock) { __acquire(lock); arch_spin_lock(&lock->raw_lock); + mmiowb_spin_lock(); } #ifndef arch_spin_lock_flags @@ -189,15 +191,22 @@ do_raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long *flags) __acquires(lo { __acquire(lock); arch_spin_lock_flags(&lock->raw_lock, *flags); + mmiowb_spin_lock(); } static inline int do_raw_spin_trylock(raw_spinlock_t *lock) { - return arch_spin_trylock(&(lock)->raw_lock); + int ret = arch_spin_trylock(&(lock)->raw_lock); + + if (ret) + mmiowb_spin_lock(); + + return ret; } static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock) { + mmiowb_spin_unlock(); arch_spin_unlock(&lock->raw_lock); __release(lock); } -- cgit From fb24ea52f78e0d595852e09e3a55697c8f442189 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 22 Feb 2019 17:14:59 +0000 Subject: drivers: Remove explicit invocations of mmiowb() mmiowb() is now implied by spin_unlock() on architectures that require it, so there is no reason to call it from driver code. This patch was generated using coccinelle: @mmiowb@ @@ - mmiowb(); and invoked as: $ for d in drivers include/linux/qed sound; do \ spatch --include-headers --sp-file mmiowb.cocci --dir $d --in-place; done NOTE: mmiowb() has only ever guaranteed ordering in conjunction with spin_unlock(). However, pairing each mmiowb() removal in this patch with the corresponding call to spin_unlock() is not at all trivial, so there is a small chance that this change may regress any drivers incorrectly relying on mmiowb() to order MMIO writes between CPUs using lock-free synchronisation. If you've ended up bisecting to this commit, you can reintroduce the mmiowb() calls using wmb() instead, which should restore the old behaviour on all architectures other than some esoteric ia64 systems. Acked-by: Linus Torvalds Signed-off-by: Will Deacon --- include/linux/qed/qed_if.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index f6165d304b4d..48841e5dab90 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -1338,7 +1338,6 @@ static inline u16 qed_sb_update_sb_idx(struct qed_sb_info *sb_info) } /* Let SB update */ - mmiowb(); return rc; } @@ -1374,7 +1373,6 @@ static inline void qed_sb_ack(struct qed_sb_info *sb_info, /* Both segments (interrupts & acks) are written to same place address; * Need to guarantee all commands will be received (in-order) by HW. */ - mmiowb(); barrier(); } -- cgit From 01e3b958efe85a26d9b1b77be3a0a1491bb4cb36 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 22 Feb 2019 17:25:29 +0000 Subject: arch: Remove dummy mmiowb() definitions from arch code Now that no driver code is using mmiowb() directly, remove the dummy definitions remaining in architectures that don't make use of asm-generic/io.h, as well as the definition in asm-generic/io.h itself. Acked-by: Linus Torvalds Signed-off-by: Will Deacon --- include/asm-generic/io.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index bc490a746602..8f3bf95a36d1 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -22,10 +22,6 @@ #include #include -#ifndef mmiowb -#define mmiowb() do {} while (0) -#endif - #ifndef __io_br #define __io_br() barrier() #endif -- cgit