summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-06-27 14:14:30 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-06-27 14:14:30 -0700
commitbc6cb4d5bc3a44197de30784eae71d8ba28483eb (patch)
treefdd00391c6068c217eeb8a4a06afc40cc1fc6853 /arch/x86
parented3b7923a816ded62dccef377c9ee346c7d3b1b4 (diff)
parentb33eb50a92b0a298fa8a6ac350e741c3ec100f6d (diff)
Merge tag 'locking-core-2023-06-27' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull locking updates from Ingo Molnar: - Introduce cmpxchg128() -- aka. the demise of cmpxchg_double() The cmpxchg128() family of functions is basically & functionally the same as cmpxchg_double(), but with a saner interface. Instead of a 6-parameter horror that forced u128 - u64/u64-halves layout details on the interface and exposed users to complexity, fragility & bugs, use a natural 3-parameter interface with u128 types. - Restructure the generated atomic headers, and add kerneldoc comments for all of the generic atomic{,64,_long}_t operations. The generated definitions are much cleaner now, and come with documentation. - Implement lock_set_cmp_fn() on lockdep, for defining an ordering when taking multiple locks of the same type. This gets rid of one use of lockdep_set_novalidate_class() in the bcache code. - Fix raw_cpu_generic_try_cmpxchg() bug due to an unintended variable shadowing generating garbage code on Clang on certain ARM builds. * tag 'locking-core-2023-06-27' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (43 commits) locking/atomic: scripts: fix ${atomic}_dec_if_positive() kerneldoc percpu: Fix self-assignment of __old in raw_cpu_generic_try_cmpxchg() locking/atomic: treewide: delete arch_atomic_*() kerneldoc locking/atomic: docs: Add atomic operations to the driver basic API documentation locking/atomic: scripts: generate kerneldoc comments docs: scripts: kernel-doc: accept bitwise negation like ~@var locking/atomic: scripts: simplify raw_atomic*() definitions locking/atomic: scripts: simplify raw_atomic_long*() definitions locking/atomic: scripts: split pfx/name/sfx/order locking/atomic: scripts: restructure fallback ifdeffery locking/atomic: scripts: build raw_atomic_long*() directly locking/atomic: treewide: use raw_atomic*_<op>() locking/atomic: scripts: add trivial raw_atomic*_<op>() locking/atomic: scripts: factor out order template generation locking/atomic: scripts: remove leftover "${mult}" locking/atomic: scripts: remove bogus order parameter locking/atomic: xtensa: add preprocessor symbols locking/atomic: x86: add preprocessor symbols locking/atomic: sparc: add preprocessor symbols locking/atomic: sh: add preprocessor symbols ...
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/atomic.h87
-rw-r--r--arch/x86/include/asm/atomic64_32.h76
-rw-r--r--arch/x86/include/asm/atomic64_64.h81
-rw-r--r--arch/x86/include/asm/cmpxchg.h25
-rw-r--r--arch/x86/include/asm/cmpxchg_32.h2
-rw-r--r--arch/x86/include/asm/cmpxchg_64.h67
-rw-r--r--arch/x86/include/asm/percpu.h102
-rw-r--r--arch/x86/kernel/alternative.c4
-rw-r--r--arch/x86/kernel/cpu/mce/core.c16
-rw-r--r--arch/x86/kernel/nmi.c2
-rw-r--r--arch/x86/kernel/pvclock.c4
-rw-r--r--arch/x86/kvm/x86.c2
-rw-r--r--arch/x86/lib/Makefile3
-rw-r--r--arch/x86/lib/cmpxchg16b_emu.S43
-rw-r--r--arch/x86/lib/cmpxchg8b_emu.S67
15 files changed, 222 insertions, 359 deletions
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 5e754e895767..55a55ec04350 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -14,12 +14,6 @@
* resource counting etc..
*/
-/**
- * arch_atomic_read - read atomic variable
- * @v: pointer of type atomic_t
- *
- * Atomically reads the value of @v.
- */
static __always_inline int arch_atomic_read(const atomic_t *v)
{
/*
@@ -29,25 +23,11 @@ static __always_inline int arch_atomic_read(const atomic_t *v)
return __READ_ONCE((v)->counter);
}
-/**
- * arch_atomic_set - set atomic variable
- * @v: pointer of type atomic_t
- * @i: required value
- *
- * Atomically sets the value of @v to @i.
- */
static __always_inline void arch_atomic_set(atomic_t *v, int i)
{
__WRITE_ONCE(v->counter, i);
}
-/**
- * arch_atomic_add - add integer to atomic variable
- * @i: integer value to add
- * @v: pointer of type atomic_t
- *
- * Atomically adds @i to @v.
- */
static __always_inline void arch_atomic_add(int i, atomic_t *v)
{
asm volatile(LOCK_PREFIX "addl %1,%0"
@@ -55,13 +35,6 @@ static __always_inline void arch_atomic_add(int i, atomic_t *v)
: "ir" (i) : "memory");
}
-/**
- * arch_atomic_sub - subtract integer from atomic variable
- * @i: integer value to subtract
- * @v: pointer of type atomic_t
- *
- * Atomically subtracts @i from @v.
- */
static __always_inline void arch_atomic_sub(int i, atomic_t *v)
{
asm volatile(LOCK_PREFIX "subl %1,%0"
@@ -69,27 +42,12 @@ static __always_inline void arch_atomic_sub(int i, atomic_t *v)
: "ir" (i) : "memory");
}
-/**
- * arch_atomic_sub_and_test - subtract value from variable and test result
- * @i: integer value to subtract
- * @v: pointer of type atomic_t
- *
- * Atomically subtracts @i from @v and returns
- * true if the result is zero, or false for all
- * other cases.
- */
static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v)
{
return GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, e, "er", i);
}
#define arch_atomic_sub_and_test arch_atomic_sub_and_test
-/**
- * arch_atomic_inc - increment atomic variable
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1.
- */
static __always_inline void arch_atomic_inc(atomic_t *v)
{
asm volatile(LOCK_PREFIX "incl %0"
@@ -97,12 +55,6 @@ static __always_inline void arch_atomic_inc(atomic_t *v)
}
#define arch_atomic_inc arch_atomic_inc
-/**
- * arch_atomic_dec - decrement atomic variable
- * @v: pointer of type atomic_t
- *
- * Atomically decrements @v by 1.
- */
static __always_inline void arch_atomic_dec(atomic_t *v)
{
asm volatile(LOCK_PREFIX "decl %0"
@@ -110,69 +62,30 @@ static __always_inline void arch_atomic_dec(atomic_t *v)
}
#define arch_atomic_dec arch_atomic_dec
-/**
- * arch_atomic_dec_and_test - decrement and test
- * @v: pointer of type atomic_t
- *
- * Atomically decrements @v by 1 and
- * returns true if the result is 0, or false for all other
- * cases.
- */
static __always_inline bool arch_atomic_dec_and_test(atomic_t *v)
{
return GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, e);
}
#define arch_atomic_dec_and_test arch_atomic_dec_and_test
-/**
- * arch_atomic_inc_and_test - increment and test
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
static __always_inline bool arch_atomic_inc_and_test(atomic_t *v)
{
return GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, e);
}
#define arch_atomic_inc_and_test arch_atomic_inc_and_test
-/**
- * arch_atomic_add_negative - add and test if negative
- * @i: integer value to add
- * @v: pointer of type atomic_t
- *
- * Atomically adds @i to @v and returns true
- * if the result is negative, or false when
- * result is greater than or equal to zero.
- */
static __always_inline bool arch_atomic_add_negative(int i, atomic_t *v)
{
return GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, s, "er", i);
}
#define arch_atomic_add_negative arch_atomic_add_negative
-/**
- * arch_atomic_add_return - add integer and return
- * @i: integer value to add
- * @v: pointer of type atomic_t
- *
- * Atomically adds @i to @v and returns @i + @v
- */
static __always_inline int arch_atomic_add_return(int i, atomic_t *v)
{
return i + xadd(&v->counter, i);
}
#define arch_atomic_add_return arch_atomic_add_return
-/**
- * arch_atomic_sub_return - subtract integer and return
- * @v: pointer of type atomic_t
- * @i: integer value to subtract
- *
- * Atomically subtracts @i from @v and returns @v - @i
- */
static __always_inline int arch_atomic_sub_return(int i, atomic_t *v)
{
return arch_atomic_add_return(-i, v);
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index 808b4eece251..3486d91b8595 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -61,30 +61,12 @@ ATOMIC64_DECL(add_unless);
#undef __ATOMIC64_DECL
#undef ATOMIC64_EXPORT
-/**
- * arch_atomic64_cmpxchg - cmpxchg atomic64 variable
- * @v: pointer to type atomic64_t
- * @o: expected value
- * @n: new value
- *
- * Atomically sets @v to @n if it was equal to @o and returns
- * the old value.
- */
-
static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n)
{
return arch_cmpxchg64(&v->counter, o, n);
}
#define arch_atomic64_cmpxchg arch_atomic64_cmpxchg
-/**
- * arch_atomic64_xchg - xchg atomic64 variable
- * @v: pointer to type atomic64_t
- * @n: value to assign
- *
- * Atomically xchgs the value of @v to @n and returns
- * the old value.
- */
static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 n)
{
s64 o;
@@ -97,13 +79,6 @@ static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 n)
}
#define arch_atomic64_xchg arch_atomic64_xchg
-/**
- * arch_atomic64_set - set atomic64 variable
- * @v: pointer to type atomic64_t
- * @i: value to assign
- *
- * Atomically sets the value of @v to @n.
- */
static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i)
{
unsigned high = (unsigned)(i >> 32);
@@ -113,12 +88,6 @@ static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i)
: "eax", "edx", "memory");
}
-/**
- * arch_atomic64_read - read atomic64 variable
- * @v: pointer to type atomic64_t
- *
- * Atomically reads the value of @v and returns it.
- */
static __always_inline s64 arch_atomic64_read(const atomic64_t *v)
{
s64 r;
@@ -126,13 +95,6 @@ static __always_inline s64 arch_atomic64_read(const atomic64_t *v)
return r;
}
-/**
- * arch_atomic64_add_return - add and return
- * @i: integer value to add
- * @v: pointer to type atomic64_t
- *
- * Atomically adds @i to @v and returns @i + *@v
- */
static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
{
alternative_atomic64(add_return,
@@ -142,9 +104,6 @@ static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
}
#define arch_atomic64_add_return arch_atomic64_add_return
-/*
- * Other variants with different arithmetic operators:
- */
static __always_inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v)
{
alternative_atomic64(sub_return,
@@ -172,13 +131,6 @@ static __always_inline s64 arch_atomic64_dec_return(atomic64_t *v)
}
#define arch_atomic64_dec_return arch_atomic64_dec_return
-/**
- * arch_atomic64_add - add integer to atomic64 variable
- * @i: integer value to add
- * @v: pointer to type atomic64_t
- *
- * Atomically adds @i to @v.
- */
static __always_inline s64 arch_atomic64_add(s64 i, atomic64_t *v)
{
__alternative_atomic64(add, add_return,
@@ -187,13 +139,6 @@ static __always_inline s64 arch_atomic64_add(s64 i, atomic64_t *v)
return i;
}
-/**
- * arch_atomic64_sub - subtract the atomic64 variable
- * @i: integer value to subtract
- * @v: pointer to type atomic64_t
- *
- * Atomically subtracts @i from @v.
- */
static __always_inline s64 arch_atomic64_sub(s64 i, atomic64_t *v)
{
__alternative_atomic64(sub, sub_return,
@@ -202,12 +147,6 @@ static __always_inline s64 arch_atomic64_sub(s64 i, atomic64_t *v)
return i;
}
-/**
- * arch_atomic64_inc - increment atomic64 variable
- * @v: pointer to type atomic64_t
- *
- * Atomically increments @v by 1.
- */
static __always_inline void arch_atomic64_inc(atomic64_t *v)
{
__alternative_atomic64(inc, inc_return, /* no output */,
@@ -215,12 +154,6 @@ static __always_inline void arch_atomic64_inc(atomic64_t *v)
}
#define arch_atomic64_inc arch_atomic64_inc
-/**
- * arch_atomic64_dec - decrement atomic64 variable
- * @v: pointer to type atomic64_t
- *
- * Atomically decrements @v by 1.
- */
static __always_inline void arch_atomic64_dec(atomic64_t *v)
{
__alternative_atomic64(dec, dec_return, /* no output */,
@@ -228,15 +161,6 @@ static __always_inline void arch_atomic64_dec(atomic64_t *v)
}
#define arch_atomic64_dec arch_atomic64_dec
-/**
- * arch_atomic64_add_unless - add unless the number is a given value
- * @v: pointer of type atomic64_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as it was not @u.
- * Returns non-zero if the add was done, zero otherwise.
- */
static __always_inline int arch_atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
{
unsigned low = (unsigned)u;
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index c496595bf601..3165c0feedf7 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -10,37 +10,16 @@
#define ATOMIC64_INIT(i) { (i) }
-/**
- * arch_atomic64_read - read atomic64 variable
- * @v: pointer of type atomic64_t
- *
- * Atomically reads the value of @v.
- * Doesn't imply a read memory barrier.
- */
static __always_inline s64 arch_atomic64_read(const atomic64_t *v)
{
return __READ_ONCE((v)->counter);
}
-/**
- * arch_atomic64_set - set atomic64 variable
- * @v: pointer to type atomic64_t
- * @i: required value
- *
- * Atomically sets the value of @v to @i.
- */
static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i)
{
__WRITE_ONCE(v->counter, i);
}
-/**
- * arch_atomic64_add - add integer to atomic64 variable
- * @i: integer value to add
- * @v: pointer to type atomic64_t
- *
- * Atomically adds @i to @v.
- */
static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v)
{
asm volatile(LOCK_PREFIX "addq %1,%0"
@@ -48,13 +27,6 @@ static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v)
: "er" (i), "m" (v->counter) : "memory");
}
-/**
- * arch_atomic64_sub - subtract the atomic64 variable
- * @i: integer value to subtract
- * @v: pointer to type atomic64_t
- *
- * Atomically subtracts @i from @v.
- */
static __always_inline void arch_atomic64_sub(s64 i, atomic64_t *v)
{
asm volatile(LOCK_PREFIX "subq %1,%0"
@@ -62,27 +34,12 @@ static __always_inline void arch_atomic64_sub(s64 i, atomic64_t *v)
: "er" (i), "m" (v->counter) : "memory");
}
-/**
- * arch_atomic64_sub_and_test - subtract value from variable and test result
- * @i: integer value to subtract
- * @v: pointer to type atomic64_t
- *
- * Atomically subtracts @i from @v and returns
- * true if the result is zero, or false for all
- * other cases.
- */
static __always_inline bool arch_atomic64_sub_and_test(s64 i, atomic64_t *v)
{
return GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, e, "er", i);
}
#define arch_atomic64_sub_and_test arch_atomic64_sub_and_test
-/**
- * arch_atomic64_inc - increment atomic64 variable
- * @v: pointer to type atomic64_t
- *
- * Atomically increments @v by 1.
- */
static __always_inline void arch_atomic64_inc(atomic64_t *v)
{
asm volatile(LOCK_PREFIX "incq %0"
@@ -91,12 +48,6 @@ static __always_inline void arch_atomic64_inc(atomic64_t *v)
}
#define arch_atomic64_inc arch_atomic64_inc
-/**
- * arch_atomic64_dec - decrement atomic64 variable
- * @v: pointer to type atomic64_t
- *
- * Atomically decrements @v by 1.
- */
static __always_inline void arch_atomic64_dec(atomic64_t *v)
{
asm volatile(LOCK_PREFIX "decq %0"
@@ -105,56 +56,24 @@ static __always_inline void arch_atomic64_dec(atomic64_t *v)
}
#define arch_atomic64_dec arch_atomic64_dec
-/**
- * arch_atomic64_dec_and_test - decrement and test
- * @v: pointer to type atomic64_t
- *
- * Atomically decrements @v by 1 and
- * returns true if the result is 0, or false for all other
- * cases.
- */
static __always_inline bool arch_atomic64_dec_and_test(atomic64_t *v)
{
return GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, e);
}
#define arch_atomic64_dec_and_test arch_atomic64_dec_and_test
-/**
- * arch_atomic64_inc_and_test - increment and test
- * @v: pointer to type atomic64_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
static __always_inline bool arch_atomic64_inc_and_test(atomic64_t *v)
{
return GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, e);
}
#define arch_atomic64_inc_and_test arch_atomic64_inc_and_test
-/**
- * arch_atomic64_add_negative - add and test if negative
- * @i: integer value to add
- * @v: pointer to type atomic64_t
- *
- * Atomically adds @i to @v and returns true
- * if the result is negative, or false when
- * result is greater than or equal to zero.
- */
static __always_inline bool arch_atomic64_add_negative(s64 i, atomic64_t *v)
{
return GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, s, "er", i);
}
#define arch_atomic64_add_negative arch_atomic64_add_negative
-/**
- * arch_atomic64_add_return - add and return
- * @i: integer value to add
- * @v: pointer to type atomic64_t
- *
- * Atomically adds @i to @v and returns @i + @v
- */
static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
{
return i + xadd(&v->counter, i);
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index 540573f515b7..d53636506134 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -239,29 +239,4 @@ extern void __add_wrong_size(void)
#define __xadd(ptr, inc, lock) __xchg_op((ptr), (inc), xadd, lock)
#define xadd(ptr, inc) __xadd((ptr), (inc), LOCK_PREFIX)
-#define __cmpxchg_double(pfx, p1, p2, o1, o2, n1, n2) \
-({ \
- bool __ret; \
- __typeof__(*(p1)) __old1 = (o1), __new1 = (n1); \
- __typeof__(*(p2)) __old2 = (o2), __new2 = (n2); \
- BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long)); \
- BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long)); \
- VM_BUG_ON((unsigned long)(p1) % (2 * sizeof(long))); \
- VM_BUG_ON((unsigned long)((p1) + 1) != (unsigned long)(p2)); \
- asm volatile(pfx "cmpxchg%c5b %1" \
- CC_SET(e) \
- : CC_OUT(e) (__ret), \
- "+m" (*(p1)), "+m" (*(p2)), \
- "+a" (__old1), "+d" (__old2) \
- : "i" (2 * sizeof(long)), \
- "b" (__new1), "c" (__new2)); \
- __ret; \
-})
-
-#define arch_cmpxchg_double(p1, p2, o1, o2, n1, n2) \
- __cmpxchg_double(LOCK_PREFIX, p1, p2, o1, o2, n1, n2)
-
-#define arch_cmpxchg_double_local(p1, p2, o1, o2, n1, n2) \
- __cmpxchg_double(, p1, p2, o1, o2, n1, n2)
-
#endif /* ASM_X86_CMPXCHG_H */
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
index 6ba80ce9438d..b5731c51f0f4 100644
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -103,6 +103,6 @@ static inline bool __try_cmpxchg64(volatile u64 *ptr, u64 *pold, u64 new)
#endif
-#define system_has_cmpxchg_double() boot_cpu_has(X86_FEATURE_CX8)
+#define system_has_cmpxchg64() boot_cpu_has(X86_FEATURE_CX8)
#endif /* _ASM_X86_CMPXCHG_32_H */
diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h
index 0d3beb27b7fe..44b08b53ab32 100644
--- a/arch/x86/include/asm/cmpxchg_64.h
+++ b/arch/x86/include/asm/cmpxchg_64.h
@@ -20,6 +20,71 @@
arch_try_cmpxchg((ptr), (po), (n)); \
})
-#define system_has_cmpxchg_double() boot_cpu_has(X86_FEATURE_CX16)
+union __u128_halves {
+ u128 full;
+ struct {
+ u64 low, high;
+ };
+};
+
+#define __arch_cmpxchg128(_ptr, _old, _new, _lock) \
+({ \
+ union __u128_halves o = { .full = (_old), }, \
+ n = { .full = (_new), }; \
+ \
+ asm volatile(_lock "cmpxchg16b %[ptr]" \
+ : [ptr] "+m" (*(_ptr)), \
+ "+a" (o.low), "+d" (o.high) \
+ : "b" (n.low), "c" (n.high) \
+ : "memory"); \
+ \
+ o.full; \
+})
+
+static __always_inline u128 arch_cmpxchg128(volatile u128 *ptr, u128 old, u128 new)
+{
+ return __arch_cmpxchg128(ptr, old, new, LOCK_PREFIX);
+}
+#define arch_cmpxchg128 arch_cmpxchg128
+
+static __always_inline u128 arch_cmpxchg128_local(volatile u128 *ptr, u128 old, u128 new)
+{
+ return __arch_cmpxchg128(ptr, old, new,);
+}
+#define arch_cmpxchg128_local arch_cmpxchg128_local
+
+#define __arch_try_cmpxchg128(_ptr, _oldp, _new, _lock) \
+({ \
+ union __u128_halves o = { .full = *(_oldp), }, \
+ n = { .full = (_new), }; \
+ bool ret; \
+ \
+ asm volatile(_lock "cmpxchg16b %[ptr]" \
+ CC_SET(e) \
+ : CC_OUT(e) (ret), \
+ [ptr] "+m" (*ptr), \
+ "+a" (o.low), "+d" (o.high) \
+ : "b" (n.low), "c" (n.high) \
+ : "memory"); \
+ \
+ if (unlikely(!ret)) \
+ *(_oldp) = o.full; \
+ \
+ likely(ret); \
+})
+
+static __always_inline bool arch_try_cmpxchg128(volatile u128 *ptr, u128 *oldp, u128 new)
+{
+ return __arch_try_cmpxchg128(ptr, oldp, new, LOCK_PREFIX);
+}
+#define arch_try_cmpxchg128 arch_try_cmpxchg128
+
+static __always_inline bool arch_try_cmpxchg128_local(volatile u128 *ptr, u128 *oldp, u128 new)
+{
+ return __arch_try_cmpxchg128(ptr, oldp, new,);
+}
+#define arch_try_cmpxchg128_local arch_try_cmpxchg128_local
+
+#define system_has_cmpxchg128() boot_cpu_has(X86_FEATURE_CX16)
#endif /* _ASM_X86_CMPXCHG_64_H */
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 13c0d63ed55e..34734d730463 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -210,6 +210,67 @@ do { \
(typeof(_var))(unsigned long) pco_old__; \
})
+#if defined(CONFIG_X86_32) && !defined(CONFIG_UML)
+#define percpu_cmpxchg64_op(size, qual, _var, _oval, _nval) \
+({ \
+ union { \
+ u64 var; \
+ struct { \
+ u32 low, high; \
+ }; \
+ } old__, new__; \
+ \
+ old__.var = _oval; \
+ new__.var = _nval; \
+ \
+ asm qual (ALTERNATIVE("leal %P[var], %%esi; call this_cpu_cmpxchg8b_emu", \
+ "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
+ : [var] "+m" (_var), \
+ "+a" (old__.low), \
+ "+d" (old__.high) \
+ : "b" (new__.low), \
+ "c" (new__.high) \
+ : "memory", "esi"); \
+ \
+ old__.var; \
+})
+
+#define raw_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg64_op(8, , pcp, oval, nval)
+#define this_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg64_op(8, volatile, pcp, oval, nval)
+#endif
+
+#ifdef CONFIG_X86_64
+#define raw_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg_op(8, , pcp, oval, nval);
+#define this_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg_op(8, volatile, pcp, oval, nval);
+
+#define percpu_cmpxchg128_op(size, qual, _var, _oval, _nval) \
+({ \
+ union { \
+ u128 var; \
+ struct { \
+ u64 low, high; \
+ }; \
+ } old__, new__; \
+ \
+ old__.var = _oval; \
+ new__.var = _nval; \
+ \
+ asm qual (ALTERNATIVE("leaq %P[var], %%rsi; call this_cpu_cmpxchg16b_emu", \
+ "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
+ : [var] "+m" (_var), \
+ "+a" (old__.low), \
+ "+d" (old__.high) \
+ : "b" (new__.low), \
+ "c" (new__.high) \
+ : "memory", "rsi"); \
+ \
+ old__.var; \
+})
+
+#define raw_cpu_cmpxchg128(pcp, oval, nval) percpu_cmpxchg128_op(16, , pcp, oval, nval)
+#define this_cpu_cmpxchg128(pcp, oval, nval) percpu_cmpxchg128_op(16, volatile, pcp, oval, nval)
+#endif
+
/*
* this_cpu_read() makes gcc load the percpu variable every time it is
* accessed while this_cpu_read_stable() allows the value to be cached.
@@ -290,23 +351,6 @@ do { \
#define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(2, volatile, pcp, oval, nval)
#define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(4, volatile, pcp, oval, nval)
-#ifdef CONFIG_X86_CMPXCHG64
-#define percpu_cmpxchg8b_double(pcp1, pcp2, o1, o2, n1, n2) \
-({ \
- bool __ret; \
- typeof(pcp1) __o1 = (o1), __n1 = (n1); \
- typeof(pcp2) __o2 = (o2), __n2 = (n2); \
- asm volatile("cmpxchg8b "__percpu_arg(1) \
- CC_SET(z) \
- : CC_OUT(z) (__ret), "+m" (pcp1), "+m" (pcp2), "+a" (__o1), "+d" (__o2) \
- : "b" (__n1), "c" (__n2)); \
- __ret; \
-})
-
-#define raw_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double
-#define this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double
-#endif /* CONFIG_X86_CMPXCHG64 */
-
/*
* Per cpu atomic 64 bit operations are only available under 64 bit.
* 32 bit must fall back to generic operations.
@@ -329,30 +373,6 @@ do { \
#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(8, volatile, pcp, val)
#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(8, volatile, pcp, nval)
#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(8, volatile, pcp, oval, nval)
-
-/*
- * Pretty complex macro to generate cmpxchg16 instruction. The instruction
- * is not supported on early AMD64 processors so we must be able to emulate
- * it in software. The address used in the cmpxchg16 instruction must be
- * aligned to a 16 byte boundary.
- */
-#define percpu_cmpxchg16b_double(pcp1, pcp2, o1, o2, n1, n2) \
-({ \
- bool __ret; \
- typeof(pcp1) __o1 = (o1), __n1 = (n1); \
- typeof(pcp2) __o2 = (o2), __n2 = (n2); \
- alternative_io("leaq %P1,%%rsi\n\tcall this_cpu_cmpxchg16b_emu\n\t", \
- "cmpxchg16b " __percpu_arg(1) "\n\tsetz %0\n\t", \
- X86_FEATURE_CX16, \
- ASM_OUTPUT2("=a" (__ret), "+m" (pcp1), \
- "+m" (pcp2), "+d" (__o2)), \
- "b" (__n1), "c" (__n2), "a" (__o1) : "rsi"); \
- __ret; \
-})
-
-#define raw_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double
-#define this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double
-
#endif
static __always_inline bool x86_this_cpu_constant_test_bit(unsigned int nr,
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index a7e1ec50ad29..72646d75b6ff 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -1967,7 +1967,7 @@ struct bp_patching_desc *try_get_desc(void)
{
struct bp_patching_desc *desc = &bp_desc;
- if (!arch_atomic_inc_not_zero(&desc->refs))
+ if (!raw_atomic_inc_not_zero(&desc->refs))
return NULL;
return desc;
@@ -1978,7 +1978,7 @@ static __always_inline void put_desc(void)
struct bp_patching_desc *desc = &bp_desc;
smp_mb__before_atomic();
- arch_atomic_dec(&desc->refs);
+ raw_atomic_dec(&desc->refs);
}
static __always_inline void *text_poke_addr(struct text_poke_loc *tp)
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 22dfcb2adcd7..89e2aab5d34d 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1022,12 +1022,12 @@ static noinstr int mce_start(int *no_way_out)
if (!timeout)
return ret;
- arch_atomic_add(*no_way_out, &global_nwo);
+ raw_atomic_add(*no_way_out, &global_nwo);
/*
* Rely on the implied barrier below, such that global_nwo
* is updated before mce_callin.
*/
- order = arch_atomic_inc_return(&mce_callin);
+ order = raw_atomic_inc_return(&mce_callin);
arch_cpumask_clear_cpu(smp_processor_id(), &mce_missing_cpus);
/* Enable instrumentation around calls to external facilities */
@@ -1036,10 +1036,10 @@ static noinstr int mce_start(int *no_way_out)
/*
* Wait for everyone.
*/
- while (arch_atomic_read(&mce_callin) != num_online_cpus()) {
+ while (raw_atomic_read(&mce_callin) != num_online_cpus()) {
if (mce_timed_out(&timeout,
"Timeout: Not all CPUs entered broadcast exception handler")) {
- arch_atomic_set(&global_nwo, 0);
+ raw_atomic_set(&global_nwo, 0);
goto out;
}
ndelay(SPINUNIT);
@@ -1054,7 +1054,7 @@ static noinstr int mce_start(int *no_way_out)
/*
* Monarch: Starts executing now, the others wait.
*/
- arch_atomic_set(&mce_executing, 1);
+ raw_atomic_set(&mce_executing, 1);
} else {
/*
* Subject: Now start the scanning loop one by one in
@@ -1062,10 +1062,10 @@ static noinstr int mce_start(int *no_way_out)
* This way when there are any shared banks it will be
* only seen by one CPU before cleared, avoiding duplicates.
*/
- while (arch_atomic_read(&mce_executing) < order) {
+ while (raw_atomic_read(&mce_executing) < order) {
if (mce_timed_out(&timeout,
"Timeout: Subject CPUs unable to finish machine check processing")) {
- arch_atomic_set(&global_nwo, 0);
+ raw_atomic_set(&global_nwo, 0);
goto out;
}
ndelay(SPINUNIT);
@@ -1075,7 +1075,7 @@ static noinstr int mce_start(int *no_way_out)
/*
* Cache the global no_way_out state.
*/
- *no_way_out = arch_atomic_read(&global_nwo);
+ *no_way_out = raw_atomic_read(&global_nwo);
ret = order;
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 776f4b1e395b..a0c551846b35 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -496,7 +496,7 @@ DEFINE_IDTENTRY_RAW(exc_nmi)
*/
sev_es_nmi_complete();
if (IS_ENABLED(CONFIG_NMI_CHECK_CPU))
- arch_atomic_long_inc(&nsp->idt_calls);
+ raw_atomic_long_inc(&nsp->idt_calls);
if (IS_ENABLED(CONFIG_SMP) && arch_cpu_is_offline(smp_processor_id()))
return;
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 56acf53a782a..b3f81379c2fc 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -101,11 +101,11 @@ u64 __pvclock_clocksource_read(struct pvclock_vcpu_time_info *src, bool dowd)
* updating at the same time, and one of them could be slightly behind,
* making the assumption that last_value always go forward fail to hold.
*/
- last = arch_atomic64_read(&last_value);
+ last = raw_atomic64_read(&last_value);
do {
if (ret <= last)
return last;
- } while (!arch_atomic64_try_cmpxchg(&last_value, &last, ret));
+ } while (!raw_atomic64_try_cmpxchg(&last_value, &last, ret));
return ret;
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bc68a39efd70..7f70207e8689 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -13161,7 +13161,7 @@ EXPORT_SYMBOL_GPL(kvm_arch_end_assignment);
bool noinstr kvm_arch_has_assigned_device(struct kvm *kvm)
{
- return arch_atomic_read(&kvm->arch.assigned_device_count);
+ return raw_atomic_read(&kvm->arch.assigned_device_count);
}
EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device);
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 01932af64193..ea3a28e7b613 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -61,8 +61,9 @@ ifeq ($(CONFIG_X86_32),y)
lib-y += strstr_32.o
lib-y += string_32.o
lib-y += memmove_32.o
+ lib-y += cmpxchg8b_emu.o
ifneq ($(CONFIG_X86_CMPXCHG64),y)
- lib-y += cmpxchg8b_emu.o atomic64_386_32.o
+ lib-y += atomic64_386_32.o
endif
else
obj-y += iomap_copy_64.o
diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S
index 33c70c0160ea..6962df315793 100644
--- a/arch/x86/lib/cmpxchg16b_emu.S
+++ b/arch/x86/lib/cmpxchg16b_emu.S
@@ -1,47 +1,54 @@
/* SPDX-License-Identifier: GPL-2.0-only */
#include <linux/linkage.h>
#include <asm/percpu.h>
+#include <asm/processor-flags.h>
.text
/*
+ * Emulate 'cmpxchg16b %gs:(%rsi)'
+ *
* Inputs:
* %rsi : memory location to compare
* %rax : low 64 bits of old value
* %rdx : high 64 bits of old value
* %rbx : low 64 bits of new value
* %rcx : high 64 bits of new value
- * %al : Operation successful
+ *
+ * Notably this is not LOCK prefixed and is not safe against NMIs
*/
SYM_FUNC_START(this_cpu_cmpxchg16b_emu)
-#
-# Emulate 'cmpxchg16b %gs:(%rsi)' except we return the result in %al not
-# via the ZF. Caller will access %al to get result.
-#
-# Note that this is only useful for a cpuops operation. Meaning that we
-# do *not* have a fully atomic operation but just an operation that is
-# *atomic* on a single cpu (as provided by the this_cpu_xx class of
-# macros).
-#
pushfq
cli
- cmpq PER_CPU_VAR((%rsi)), %rax
- jne .Lnot_same
- cmpq PER_CPU_VAR(8(%rsi)), %rdx
- jne .Lnot_same
+ /* if (*ptr == old) */
+ cmpq PER_CPU_VAR(0(%rsi)), %rax
+ jne .Lnot_same
+ cmpq PER_CPU_VAR(8(%rsi)), %rdx
+ jne .Lnot_same
- movq %rbx, PER_CPU_VAR((%rsi))
- movq %rcx, PER_CPU_VAR(8(%rsi))
+ /* *ptr = new */
+ movq %rbx, PER_CPU_VAR(0(%rsi))
+ movq %rcx, PER_CPU_VAR(8(%rsi))
+
+ /* set ZF in EFLAGS to indicate success */
+ orl $X86_EFLAGS_ZF, (%rsp)
popfq
- mov $1, %al
RET
.Lnot_same:
+ /* *ptr != old */
+
+ /* old = *ptr */
+ movq PER_CPU_VAR(0(%rsi)), %rax
+ movq PER_CPU_VAR(8(%rsi)), %rdx
+
+ /* clear ZF in EFLAGS to indicate failure */
+ andl $(~X86_EFLAGS_ZF), (%rsp)
+
popfq
- xor %al,%al
RET
SYM_FUNC_END(this_cpu_cmpxchg16b_emu)
diff --git a/arch/x86/lib/cmpxchg8b_emu.S b/arch/x86/lib/cmpxchg8b_emu.S
index 6a912d58fecc..49805257b125 100644
--- a/arch/x86/lib/cmpxchg8b_emu.S
+++ b/arch/x86/lib/cmpxchg8b_emu.S
@@ -2,10 +2,16 @@
#include <linux/linkage.h>
#include <asm/export.h>
+#include <asm/percpu.h>
+#include <asm/processor-flags.h>
.text
+#ifndef CONFIG_X86_CMPXCHG64
+
/*
+ * Emulate 'cmpxchg8b (%esi)' on UP
+ *
* Inputs:
* %esi : memory location to compare
* %eax : low 32 bits of old value
@@ -15,32 +21,65 @@
*/
SYM_FUNC_START(cmpxchg8b_emu)
-#
-# Emulate 'cmpxchg8b (%esi)' on UP except we don't
-# set the whole ZF thing (caller will just compare
-# eax:edx with the expected value)
-#
pushfl
cli
- cmpl (%esi), %eax
- jne .Lnot_same
- cmpl 4(%esi), %edx
- jne .Lhalf_same
+ cmpl 0(%esi), %eax
+ jne .Lnot_same
+ cmpl 4(%esi), %edx
+ jne .Lnot_same
+
+ movl %ebx, 0(%esi)
+ movl %ecx, 4(%esi)
- movl %ebx, (%esi)
- movl %ecx, 4(%esi)
+ orl $X86_EFLAGS_ZF, (%esp)
popfl
RET
.Lnot_same:
- movl (%esi), %eax
-.Lhalf_same:
- movl 4(%esi), %edx
+ movl 0(%esi), %eax
+ movl 4(%esi), %edx
+
+ andl $(~X86_EFLAGS_ZF), (%esp)
popfl
RET
SYM_FUNC_END(cmpxchg8b_emu)
EXPORT_SYMBOL(cmpxchg8b_emu)
+
+#endif
+
+#ifndef CONFIG_UML
+
+SYM_FUNC_START(this_cpu_cmpxchg8b_emu)
+
+ pushfl
+ cli
+
+ cmpl PER_CPU_VAR(0(%esi)), %eax
+ jne .Lnot_same2
+ cmpl PER_CPU_VAR(4(%esi)), %edx
+ jne .Lnot_same2
+
+ movl %ebx, PER_CPU_VAR(0(%esi))
+ movl %ecx, PER_CPU_VAR(4(%esi))
+
+ orl $X86_EFLAGS_ZF, (%esp)
+
+ popfl
+ RET
+
+.Lnot_same2:
+ movl PER_CPU_VAR(0(%esi)), %eax
+ movl PER_CPU_VAR(4(%esi)), %edx
+
+ andl $(~X86_EFLAGS_ZF), (%esp)
+
+ popfl
+ RET
+
+SYM_FUNC_END(this_cpu_cmpxchg8b_emu)
+
+#endif