summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-08-03 14:39:35 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-08-03 14:39:35 -0700
commit9ba19ccd2d283a79dd29e8130819c59beca80f62 (patch)
tree8dece4ee137bc9f7a3e5f31ce5613f9b82b7d260 /include
parent8f0cb6660acb0d4756df880a3e60e73daa9c244e (diff)
parent992414a18cd4de05fa3f8ff7e1c29af758bdee1a (diff)
Merge tag 'locking-core-2020-08-03' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull locking updates from Ingo Molnar: - LKMM updates: mostly documentation changes, but also some new litmus tests for atomic ops. - KCSAN updates: the most important change is that GCC 11 now has all fixes in place to support KCSAN, so GCC support can be enabled again. Also more annotations. - futex updates: minor cleanups and simplifications - seqlock updates: merge preparatory changes/cleanups for the 'associated locks' facilities. - lockdep updates: - simplify IRQ trace event handling - add various new debug checks - simplify header dependencies, split out <linux/lockdep_types.h>, decouple lockdep from other low level headers some more - fix NMI handling - misc cleanups and smaller fixes * tag 'locking-core-2020-08-03' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (60 commits) kcsan: Improve IRQ state trace reporting lockdep: Refactor IRQ trace events fields into struct seqlock: lockdep assert non-preemptibility on seqcount_t write lockdep: Add preemption enabled/disabled assertion APIs seqlock: Implement raw_seqcount_begin() in terms of raw_read_seqcount() seqlock: Add kernel-doc for seqcount_t and seqlock_t APIs seqlock: Reorder seqcount_t and seqlock_t API definitions seqlock: seqcount_t latch: End read sections with read_seqcount_retry() seqlock: Properly format kernel-doc code samples Documentation: locking: Describe seqlock design and usage locking/qspinlock: Do not include atomic.h from qspinlock_types.h locking/atomic: Move ATOMIC_INIT into linux/types.h lockdep: Move list.h inclusion into lockdep.h locking/lockdep: Fix TRACE_IRQFLAGS vs. NMIs futex: Remove unused or redundant includes futex: Consistently use fshared as boolean futex: Remove needless goto's futex: Remove put_futex_key() rwsem: fix commas in initialisation docs: locking: Replace HTTP links with HTTPS ones ...
Diffstat (limited to 'include')
-rw-r--r--include/asm-generic/atomic.h2
-rw-r--r--include/asm-generic/qspinlock.h1
-rw-r--r--include/asm-generic/qspinlock_types.h8
-rw-r--r--include/linux/hardirq.h28
-rw-r--r--include/linux/irqflags.h36
-rw-r--r--include/linux/lockdep.h230
-rw-r--r--include/linux/lockdep_types.h194
-rw-r--r--include/linux/rculist.h2
-rw-r--r--include/linux/rwsem.h20
-rw-r--r--include/linux/sched.h17
-rw-r--r--include/linux/seqlock.h756
-rw-r--r--include/linux/spinlock.h1
-rw-r--r--include/linux/spinlock_types.h2
-rw-r--r--include/linux/types.h2
14 files changed, 823 insertions, 476 deletions
diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
index 286867f593d2..11f96f40f4a7 100644
--- a/include/asm-generic/atomic.h
+++ b/include/asm-generic/atomic.h
@@ -159,8 +159,6 @@ ATOMIC_OP(xor, ^)
* resource counting etc..
*/
-#define ATOMIC_INIT(i) { (i) }
-
/**
* atomic_read - read atomic variable
* @v: pointer of type atomic_t
diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
index fde943d180e0..2b26cd729b94 100644
--- a/include/asm-generic/qspinlock.h
+++ b/include/asm-generic/qspinlock.h
@@ -11,6 +11,7 @@
#define __ASM_GENERIC_QSPINLOCK_H
#include <asm-generic/qspinlock_types.h>
+#include <linux/atomic.h>
/**
* queued_spin_is_locked - is the spinlock locked?
diff --git a/include/asm-generic/qspinlock_types.h b/include/asm-generic/qspinlock_types.h
index 56d1309d32f8..2fd1fb89ec36 100644
--- a/include/asm-generic/qspinlock_types.h
+++ b/include/asm-generic/qspinlock_types.h
@@ -9,15 +9,7 @@
#ifndef __ASM_GENERIC_QSPINLOCK_TYPES_H
#define __ASM_GENERIC_QSPINLOCK_TYPES_H
-/*
- * Including atomic.h with PARAVIRT on will cause compilation errors because
- * of recursive header file incluson via paravirt_types.h. So don't include
- * it if PARAVIRT is on.
- */
-#ifndef CONFIG_PARAVIRT
#include <linux/types.h>
-#include <linux/atomic.h>
-#endif
typedef struct qspinlock {
union {
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 03c9fece7d43..754f67ac4326 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -111,32 +111,42 @@ extern void rcu_nmi_exit(void);
/*
* nmi_enter() can nest up to 15 times; see NMI_BITS.
*/
-#define nmi_enter() \
+#define __nmi_enter() \
do { \
+ lockdep_off(); \
arch_nmi_enter(); \
printk_nmi_enter(); \
- lockdep_off(); \
BUG_ON(in_nmi() == NMI_MASK); \
__preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET); \
- rcu_nmi_enter(); \
+ } while (0)
+
+#define nmi_enter() \
+ do { \
+ __nmi_enter(); \
lockdep_hardirq_enter(); \
+ rcu_nmi_enter(); \
instrumentation_begin(); \
ftrace_nmi_enter(); \
instrumentation_end(); \
} while (0)
+#define __nmi_exit() \
+ do { \
+ BUG_ON(!in_nmi()); \
+ __preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \
+ printk_nmi_exit(); \
+ arch_nmi_exit(); \
+ lockdep_on(); \
+ } while (0)
+
#define nmi_exit() \
do { \
instrumentation_begin(); \
ftrace_nmi_exit(); \
instrumentation_end(); \
- lockdep_hardirq_exit(); \
rcu_nmi_exit(); \
- BUG_ON(!in_nmi()); \
- __preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \
- lockdep_on(); \
- printk_nmi_exit(); \
- arch_nmi_exit(); \
+ lockdep_hardirq_exit(); \
+ __nmi_exit(); \
} while (0)
#endif /* LINUX_HARDIRQ_H */
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 6384d2813ded..bd5c55755447 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -14,6 +14,7 @@
#include <linux/typecheck.h>
#include <asm/irqflags.h>
+#include <asm/percpu.h>
/* Currently lockdep_softirqs_on/off is used only by lockdep */
#ifdef CONFIG_PROVE_LOCKING
@@ -31,18 +32,35 @@
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
+
+/* Per-task IRQ trace events information. */
+struct irqtrace_events {
+ unsigned int irq_events;
+ unsigned long hardirq_enable_ip;
+ unsigned long hardirq_disable_ip;
+ unsigned int hardirq_enable_event;
+ unsigned int hardirq_disable_event;
+ unsigned long softirq_disable_ip;
+ unsigned long softirq_enable_ip;
+ unsigned int softirq_disable_event;
+ unsigned int softirq_enable_event;
+};
+
+DECLARE_PER_CPU(int, hardirqs_enabled);
+DECLARE_PER_CPU(int, hardirq_context);
+
extern void trace_hardirqs_on_prepare(void);
extern void trace_hardirqs_off_finish(void);
extern void trace_hardirqs_on(void);
extern void trace_hardirqs_off(void);
-# define lockdep_hardirq_context(p) ((p)->hardirq_context)
+# define lockdep_hardirq_context() (this_cpu_read(hardirq_context))
# define lockdep_softirq_context(p) ((p)->softirq_context)
-# define lockdep_hardirqs_enabled(p) ((p)->hardirqs_enabled)
+# define lockdep_hardirqs_enabled() (this_cpu_read(hardirqs_enabled))
# define lockdep_softirqs_enabled(p) ((p)->softirqs_enabled)
-# define lockdep_hardirq_enter() \
-do { \
- if (!current->hardirq_context++) \
- current->hardirq_threaded = 0; \
+# define lockdep_hardirq_enter() \
+do { \
+ if (this_cpu_inc_return(hardirq_context) == 1) \
+ current->hardirq_threaded = 0; \
} while (0)
# define lockdep_hardirq_threaded() \
do { \
@@ -50,7 +68,7 @@ do { \
} while (0)
# define lockdep_hardirq_exit() \
do { \
- current->hardirq_context--; \
+ this_cpu_dec(hardirq_context); \
} while (0)
# define lockdep_softirq_enter() \
do { \
@@ -104,9 +122,9 @@ do { \
# define trace_hardirqs_off_finish() do { } while (0)
# define trace_hardirqs_on() do { } while (0)
# define trace_hardirqs_off() do { } while (0)
-# define lockdep_hardirq_context(p) 0
+# define lockdep_hardirq_context() 0
# define lockdep_softirq_context(p) 0
-# define lockdep_hardirqs_enabled(p) 0
+# define lockdep_hardirqs_enabled() 0
# define lockdep_softirqs_enabled(p) 0
# define lockdep_hardirq_enter() do { } while (0)
# define lockdep_hardirq_threaded() do { } while (0)
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 8fce5c98a4b0..39a35699d0d6 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -10,33 +10,15 @@
#ifndef __LINUX_LOCKDEP_H
#define __LINUX_LOCKDEP_H
+#include <linux/lockdep_types.h>
+#include <asm/percpu.h>
+
struct task_struct;
-struct lockdep_map;
/* for sysctl */
extern int prove_locking;
extern int lock_stat;
-#define MAX_LOCKDEP_SUBCLASSES 8UL
-
-#include <linux/types.h>
-
-enum lockdep_wait_type {
- LD_WAIT_INV = 0, /* not checked, catch all */
-
- LD_WAIT_FREE, /* wait free, rcu etc.. */
- LD_WAIT_SPIN, /* spin loops, raw_spinlock_t etc.. */
-
-#ifdef CONFIG_PROVE_RAW_LOCK_NESTING
- LD_WAIT_CONFIG, /* CONFIG_PREEMPT_LOCK, spinlock_t etc.. */
-#else
- LD_WAIT_CONFIG = LD_WAIT_SPIN,
-#endif
- LD_WAIT_SLEEP, /* sleeping locks, mutex_t etc.. */
-
- LD_WAIT_MAX, /* must be last */
-};
-
#ifdef CONFIG_LOCKDEP
#include <linux/linkage.h>
@@ -44,147 +26,6 @@ enum lockdep_wait_type {
#include <linux/debug_locks.h>
#include <linux/stacktrace.h>
-/*
- * We'd rather not expose kernel/lockdep_states.h this wide, but we do need
- * the total number of states... :-(
- */
-#define XXX_LOCK_USAGE_STATES (1+2*4)
-
-/*
- * NR_LOCKDEP_CACHING_CLASSES ... Number of classes
- * cached in the instance of lockdep_map
- *
- * Currently main class (subclass == 0) and signle depth subclass
- * are cached in lockdep_map. This optimization is mainly targeting
- * on rq->lock. double_rq_lock() acquires this highly competitive with
- * single depth.
- */
-#define NR_LOCKDEP_CACHING_CLASSES 2
-
-/*
- * A lockdep key is associated with each lock object. For static locks we use
- * the lock address itself as the key. Dynamically allocated lock objects can
- * have a statically or dynamically allocated key. Dynamically allocated lock
- * keys must be registered before being used and must be unregistered before
- * the key memory is freed.
- */
-struct lockdep_subclass_key {
- char __one_byte;
-} __attribute__ ((__packed__));
-
-/* hash_entry is used to keep track of dynamically allocated keys. */
-struct lock_class_key {
- union {
- struct hlist_node hash_entry;
- struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES];
- };
-};
-
-extern struct lock_class_key __lockdep_no_validate__;
-
-struct lock_trace;
-
-#define LOCKSTAT_POINTS 4
-
-/*
- * The lock-class itself. The order of the structure members matters.
- * reinit_class() zeroes the key member and all subsequent members.
- */
-struct lock_class {
- /*
- * class-hash:
- */
- struct hlist_node hash_entry;
-
- /*
- * Entry in all_lock_classes when in use. Entry in free_lock_classes
- * when not in use. Instances that are being freed are on one of the
- * zapped_classes lists.
- */
- struct list_head lock_entry;
-
- /*
- * These fields represent a directed graph of lock dependencies,
- * to every node we attach a list of "forward" and a list of
- * "backward" graph nodes.
- */
- struct list_head locks_after, locks_before;
-
- const struct lockdep_subclass_key *key;
- unsigned int subclass;
- unsigned int dep_gen_id;
-
- /*
- * IRQ/softirq usage tracking bits:
- */
- unsigned long usage_mask;
- const struct lock_trace *usage_traces[XXX_LOCK_USAGE_STATES];
-
- /*
- * Generation counter, when doing certain classes of graph walking,
- * to ensure that we check one node only once:
- */
- int name_version;
- const char *name;
-
- short wait_type_inner;
- short wait_type_outer;
-
-#ifdef CONFIG_LOCK_STAT
- unsigned long contention_point[LOCKSTAT_POINTS];
- unsigned long contending_point[LOCKSTAT_POINTS];
-#endif
-} __no_randomize_layout;
-
-#ifdef CONFIG_LOCK_STAT
-struct lock_time {
- s64 min;
- s64 max;
- s64 total;
- unsigned long nr;
-};
-
-enum bounce_type {
- bounce_acquired_write,
- bounce_acquired_read,
- bounce_contended_write,
- bounce_contended_read,
- nr_bounce_types,
-
- bounce_acquired = bounce_acquired_write,
- bounce_contended = bounce_contended_write,
-};
-
-struct lock_class_stats {
- unsigned long contention_point[LOCKSTAT_POINTS];
- unsigned long contending_point[LOCKSTAT_POINTS];
- struct lock_time read_waittime;
- struct lock_time write_waittime;
- struct lock_time read_holdtime;
- struct lock_time write_holdtime;
- unsigned long bounces[nr_bounce_types];
-};
-
-struct lock_class_stats lock_stats(struct lock_class *class);
-void clear_lock_stats(struct lock_class *class);
-#endif
-
-/*
- * Map the lock object (the lock instance) to the lock-class object.
- * This is embedded into specific lock instances:
- */
-struct lockdep_map {
- struct lock_class_key *key;
- struct lock_class *class_cache[NR_LOCKDEP_CACHING_CLASSES];
- const char *name;
- short wait_type_outer; /* can be taken in this context */
- short wait_type_inner; /* presents this context */
-#ifdef CONFIG_LOCK_STAT
- int cpu;
- unsigned long ip;
-#endif
-};
-
static inline void lockdep_copy_map(struct lockdep_map *to,
struct lockdep_map *from)
{
@@ -440,8 +281,6 @@ static inline void lock_set_subclass(struct lockdep_map *lock,
extern void lock_downgrade(struct lockdep_map *lock, unsigned long ip);
-struct pin_cookie { unsigned int val; };
-
#define NIL_COOKIE (struct pin_cookie){ .val = 0U, }
extern struct pin_cookie lock_pin_lock(struct lockdep_map *lock);
@@ -520,10 +359,6 @@ static inline void lockdep_set_selftest_task(struct task_struct *task)
# define lockdep_reset() do { debug_locks = 1; } while (0)
# define lockdep_free_key_range(start, size) do { } while (0)
# define lockdep_sys_exit() do { } while (0)
-/*
- * The class key takes no space if lockdep is disabled:
- */
-struct lock_class_key { };
static inline void lockdep_register_key(struct lock_class_key *key)
{
@@ -533,11 +368,6 @@ static inline void lockdep_unregister_key(struct lock_class_key *key)
{
}
-/*
- * The lockdep_map takes no space if lockdep is disabled:
- */
-struct lockdep_map { };
-
#define lockdep_depth(tsk) (0)
#define lockdep_is_held_type(l, r) (1)
@@ -549,8 +379,6 @@ struct lockdep_map { };
#define lockdep_recursing(tsk) (0)
-struct pin_cookie { };
-
#define NIL_COOKIE (struct pin_cookie){ }
#define lockdep_pin_lock(l) ({ struct pin_cookie cookie = { }; cookie; })
@@ -703,38 +531,58 @@ do { \
lock_release(&(lock)->dep_map, _THIS_IP_); \
} while (0)
-#define lockdep_assert_irqs_enabled() do { \
- WARN_ONCE(debug_locks && !current->lockdep_recursion && \
- !current->hardirqs_enabled, \
- "IRQs not enabled as expected\n"); \
- } while (0)
+DECLARE_PER_CPU(int, hardirqs_enabled);
+DECLARE_PER_CPU(int, hardirq_context);
-#define lockdep_assert_irqs_disabled() do { \
- WARN_ONCE(debug_locks && !current->lockdep_recursion && \
- current->hardirqs_enabled, \
- "IRQs not disabled as expected\n"); \
- } while (0)
+#define lockdep_assert_irqs_enabled() \
+do { \
+ WARN_ON_ONCE(debug_locks && !this_cpu_read(hardirqs_enabled)); \
+} while (0)
-#define lockdep_assert_in_irq() do { \
- WARN_ONCE(debug_locks && !current->lockdep_recursion && \
- !current->hardirq_context, \
- "Not in hardirq as expected\n"); \
- } while (0)
+#define lockdep_assert_irqs_disabled() \
+do { \
+ WARN_ON_ONCE(debug_locks && this_cpu_read(hardirqs_enabled)); \
+} while (0)
+
+#define lockdep_assert_in_irq() \
+do { \
+ WARN_ON_ONCE(debug_locks && !this_cpu_read(hardirq_context)); \
+} while (0)
+
+#define lockdep_assert_preemption_enabled() \
+do { \
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT) && \
+ debug_locks && \
+ (preempt_count() != 0 || \
+ !this_cpu_read(hardirqs_enabled))); \
+} while (0)
+
+#define lockdep_assert_preemption_disabled() \
+do { \
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT) && \
+ debug_locks && \
+ (preempt_count() == 0 && \
+ this_cpu_read(hardirqs_enabled))); \
+} while (0)
#else
# define might_lock(lock) do { } while (0)
# define might_lock_read(lock) do { } while (0)
# define might_lock_nested(lock, subclass) do { } while (0)
+
# define lockdep_assert_irqs_enabled() do { } while (0)
# define lockdep_assert_irqs_disabled() do { } while (0)
# define lockdep_assert_in_irq() do { } while (0)
+
+# define lockdep_assert_preemption_enabled() do { } while (0)
+# define lockdep_assert_preemption_disabled() do { } while (0)
#endif
#ifdef CONFIG_PROVE_RAW_LOCK_NESTING
# define lockdep_assert_RT_in_threaded_ctx() do { \
WARN_ONCE(debug_locks && !current->lockdep_recursion && \
- current->hardirq_context && \
+ lockdep_hardirq_context() && \
!(current->hardirq_threaded || current->irq_config), \
"Not in threaded context on PREEMPT_RT as expected\n"); \
} while (0)
diff --git a/include/linux/lockdep_types.h b/include/linux/lockdep_types.h
new file mode 100644
index 000000000000..bb35b449f533
--- /dev/null
+++ b/include/linux/lockdep_types.h
@@ -0,0 +1,194 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Runtime locking correctness validator
+ *
+ * Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
+ *
+ * see Documentation/locking/lockdep-design.rst for more details.
+ */
+#ifndef __LINUX_LOCKDEP_TYPES_H
+#define __LINUX_LOCKDEP_TYPES_H
+
+#include <linux/types.h>
+
+#define MAX_LOCKDEP_SUBCLASSES 8UL
+
+enum lockdep_wait_type {
+ LD_WAIT_INV = 0, /* not checked, catch all */
+
+ LD_WAIT_FREE, /* wait free, rcu etc.. */
+ LD_WAIT_SPIN, /* spin loops, raw_spinlock_t etc.. */
+
+#ifdef CONFIG_PROVE_RAW_LOCK_NESTING
+ LD_WAIT_CONFIG, /* CONFIG_PREEMPT_LOCK, spinlock_t etc.. */
+#else
+ LD_WAIT_CONFIG = LD_WAIT_SPIN,
+#endif
+ LD_WAIT_SLEEP, /* sleeping locks, mutex_t etc.. */
+
+ LD_WAIT_MAX, /* must be last */
+};
+
+#ifdef CONFIG_LOCKDEP
+
+/*
+ * We'd rather not expose kernel/lockdep_states.h this wide, but we do need
+ * the total number of states... :-(
+ */
+#define XXX_LOCK_USAGE_STATES (1+2*4)
+
+/*
+ * NR_LOCKDEP_CACHING_CLASSES ... Number of classes
+ * cached in the instance of lockdep_map
+ *
+ * Currently main class (subclass == 0) and signle depth subclass
+ * are cached in lockdep_map. This optimization is mainly targeting
+ * on rq->lock. double_rq_lock() acquires this highly competitive with
+ * single depth.
+ */
+#define NR_LOCKDEP_CACHING_CLASSES 2
+
+/*
+ * A lockdep key is associated with each lock object. For static locks we use
+ * the lock address itself as the key. Dynamically allocated lock objects can
+ * have a statically or dynamically allocated key. Dynamically allocated lock
+ * keys must be registered before being used and must be unregistered before
+ * the key memory is freed.
+ */
+struct lockdep_subclass_key {
+ char __one_byte;
+} __attribute__ ((__packed__));
+
+/* hash_entry is used to keep track of dynamically allocated keys. */
+struct lock_class_key {
+ union {
+ struct hlist_node hash_entry;
+ struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES];
+ };
+};
+
+extern struct lock_class_key __lockdep_no_validate__;
+
+struct lock_trace;
+
+#define LOCKSTAT_POINTS 4
+
+/*
+ * The lock-class itself. The order of the structure members matters.
+ * reinit_class() zeroes the key member and all subsequent members.
+ */
+struct lock_class {
+ /*
+ * class-hash:
+ */
+ struct hlist_node hash_entry;
+
+ /*
+ * Entry in all_lock_classes when in use. Entry in free_lock_classes
+ * when not in use. Instances that are being freed are on one of the
+ * zapped_classes lists.
+ */
+ struct list_head lock_entry;
+
+ /*
+ * These fields represent a directed graph of lock dependencies,
+ * to every node we attach a list of "forward" and a list of
+ * "backward" graph nodes.
+ */
+ struct list_head locks_after, locks_before;
+
+ const struct lockdep_subclass_key *key;
+ unsigned int subclass;
+ unsigned int dep_gen_id;
+
+ /*
+ * IRQ/softirq usage tracking bits:
+ */
+ unsigned long usage_mask;
+ const struct lock_trace *usage_traces[XXX_LOCK_USAGE_STATES];
+
+ /*
+ * Generation counter, when doing certain classes of graph walking,
+ * to ensure that we check one node only once:
+ */
+ int name_version;
+ const char *name;
+
+ short wait_type_inner;
+ short wait_type_outer;
+
+#ifdef CONFIG_LOCK_STAT
+ unsigned long contention_point[LOCKSTAT_POINTS];
+ unsigned long contending_point[LOCKSTAT_POINTS];
+#endif
+} __no_randomize_layout;
+
+#ifdef CONFIG_LOCK_STAT
+struct lock_time {
+ s64 min;
+ s64 max;
+ s64 total;
+ unsigned long nr;
+};
+
+enum bounce_type {
+ bounce_acquired_write,
+ bounce_acquired_read,
+ bounce_contended_write,
+ bounce_contended_read,
+ nr_bounce_types,
+
+ bounce_acquired = bounce_acquired_write,
+ bounce_contended = bounce_contended_write,
+};
+
+struct lock_class_stats {
+ unsigned long contention_point[LOCKSTAT_POINTS];
+ unsigned long contending_point[LOCKSTAT_POINTS];
+ struct lock_time read_waittime;
+ struct lock_time write_waittime;
+ struct lock_time read_holdtime;
+ struct lock_time write_holdtime;
+ unsigned long bounces[nr_bounce_types];
+};
+
+struct lock_class_stats lock_stats(struct lock_class *class);
+void clear_lock_stats(struct lock_class *class);
+#endif
+
+/*
+ * Map the lock object (the lock instance) to the lock-class object.
+ * This is embedded into specific lock instances:
+ */
+struct lockdep_map {
+ struct lock_class_key *key;
+ struct lock_class *class_cache[NR_LOCKDEP_CACHING_CLASSES];
+ const char *name;
+ short wait_type_outer; /* can be taken in this context */
+ short wait_type_inner; /* presents this context */
+#ifdef CONFIG_LOCK_STAT
+ int cpu;
+ unsigned long ip;
+#endif
+};
+
+struct pin_cookie { unsigned int val; };
+
+#else /* !CONFIG_LOCKDEP */
+
+/*
+ * The class key takes no space if lockdep is disabled:
+ */
+struct lock_class_key { };
+
+/*
+ * The lockdep_map takes no space if lockdep is disabled:
+ */
+struct lockdep_map { };
+
+struct pin_cookie { };
+
+#endif /* !LOCKDEP */
+
+#endif /* __LINUX_LOCKDEP_TYPES_H */
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 7eed65b5f713..7a6fc9956510 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -248,6 +248,8 @@ static inline void __list_splice_init_rcu(struct list_head *list,
*/
sync();
+ ASSERT_EXCLUSIVE_ACCESS(*first);
+ ASSERT_EXCLUSIVE_ACCESS(*last);
/*
* Readers are finished with the source list, so perform splice.
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 7e5b2a4eb560..25e3fde85617 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -60,39 +60,39 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem)
}
#define RWSEM_UNLOCKED_VALUE 0L
-#define __RWSEM_INIT_COUNT(name) .count = ATOMIC_LONG_INIT(RWSEM_UNLOCKED_VALUE)
+#define __RWSEM_COUNT_INIT(name) .count = ATOMIC_LONG_INIT(RWSEM_UNLOCKED_VALUE)
/* Common initializer macros and functions */
#ifdef CONFIG_DEBUG_LOCK_ALLOC
# define __RWSEM_DEP_MAP_INIT(lockname) \
- , .dep_map = { \
+ .dep_map = { \
.name = #lockname, \
.wait_type_inner = LD_WAIT_SLEEP, \
- }
+ },
#else
# define __RWSEM_DEP_MAP_INIT(lockname)
#endif
#ifdef CONFIG_DEBUG_RWSEMS
-# define __DEBUG_RWSEM_INITIALIZER(lockname) , .magic = &lockname
+# define __RWSEM_DEBUG_INIT(lockname) .magic = &lockname,
#else
-# define __DEBUG_RWSEM_INITIALIZER(lockname)
+# define __RWSEM_DEBUG_INIT(lockname)
#endif
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
-#define __RWSEM_OPT_INIT(lockname) , .osq = OSQ_LOCK_UNLOCKED
+#define __RWSEM_OPT_INIT(lockname) .osq = OSQ_LOCK_UNLOCKED,
#else
#define __RWSEM_OPT_INIT(lockname)
#endif
#define __RWSEM_INITIALIZER(name) \
- { __RWSEM_INIT_COUNT(name), \
+ { __RWSEM_COUNT_INIT(name), \
.owner = ATOMIC_LONG_INIT(0), \
- .wait_list = LIST_HEAD_INIT((name).wait_list), \
- .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock) \
__RWSEM_OPT_INIT(name) \
- __DEBUG_RWSEM_INITIALIZER(name) \
+ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock),\
+ .wait_list = LIST_HEAD_INIT((name).wait_list), \
+ __RWSEM_DEBUG_INIT(name) \
__RWSEM_DEP_MAP_INIT(name) }
#define DECLARE_RWSEM(name) \
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 683372943093..060e9214c8b5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -18,6 +18,7 @@
#include <linux/mutex.h>
#include <linux/plist.h>
#include <linux/hrtimer.h>
+#include <linux/irqflags.h>
#include <linux/seccomp.h>
#include <linux/nodemask.h>
#include <linux/rcupdate.h>
@@ -980,19 +981,9 @@ struct task_struct {
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
- unsigned int irq_events;
+ struct irqtrace_events irqtrace;
unsigned int hardirq_threaded;
- unsigned long hardirq_enable_ip;
- unsigned long hardirq_disable_ip;
- unsigned int hardirq_enable_event;
- unsigned int hardirq_disable_event;
- int hardirqs_enabled;
- int hardirq_context;
u64 hardirq_chain_key;
- unsigned long softirq_disable_ip;
- unsigned long softirq_enable_ip;
- unsigned int softirq_disable_event;
- unsigned int softirq_enable_event;
int softirqs_enabled;
int softirq_context;
int irq_config;
@@ -1193,8 +1184,12 @@ struct task_struct {
#ifdef CONFIG_KASAN
unsigned int kasan_depth;
#endif
+
#ifdef CONFIG_KCSAN
struct kcsan_ctx kcsan_ctx;
+#ifdef CONFIG_TRACE_IRQFLAGS
+ struct irqtrace_events kcsan_save_irqtrace;
+#endif
#endif
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 8b97204f35a7..54bc20496392 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -1,36 +1,15 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __LINUX_SEQLOCK_H
#define __LINUX_SEQLOCK_H
+
/*
- * Reader/writer consistent mechanism without starving writers. This type of
- * lock for data where the reader wants a consistent set of information
- * and is willing to retry if the information changes. There are two types
- * of readers:
- * 1. Sequence readers which never block a writer but they may have to retry
- * if a writer is in progress by detecting change in sequence number.
- * Writers do not wait for a sequence reader.
- * 2. Locking readers which will wait if a writer or another locking reader
- * is in progress. A locking reader in progress will also block a writer
- * from going forward. Unlike the regular rwlock, the read lock here is
- * exclusive so that only one locking reader can get it.
- *
- * This is not as cache friendly as brlock. Also, this may not work well
- * for data that contains pointers, because any writer could
- * invalidate a pointer that a reader was following.
- *
- * Expected non-blocking reader usage:
- * do {
- * seq = read_seqbegin(&foo);
- * ...
- * } while (read_seqretry(&foo, seq));
- *
+ * seqcount_t / seqlock_t - a reader-writer consistency mechanism with
+ * lockless readers (read-only retry loops), and no writer starvation.
*
- * On non-SMP the spin locks disappear but the writer still needs
- * to increment the sequence variables because an interrupt routine could
- * change the state of the data.
+ * See Documentation/locking/seqlock.rst
*
- * Based on x86_64 vsyscall gettimeofday
- * by Keith Owens and Andrea Arcangeli
+ * Copyrights:
+ * - Based on x86_64 vsyscall gettimeofday: Keith Owens, Andrea Arcangeli
*/
#include <linux/spinlock.h>
@@ -41,8 +20,8 @@
#include <asm/processor.h>
/*
- * The seqlock interface does not prescribe a precise sequence of read
- * begin/retry/end. For readers, typically there is a call to
+ * The seqlock seqcount_t interface does not prescribe a precise sequence of
+ * read begin/retry/end. For readers, typically there is a call to
* read_seqcount_begin() and read_seqcount_retry(), however, there are more
* esoteric cases which do not follow this pattern.
*
@@ -50,16 +29,30 @@
* via seqcount_t under KCSAN: upon beginning a seq-reader critical section,
* pessimistically mark the next KCSAN_SEQLOCK_REGION_MAX memory accesses as
* atomics; if there is a matching read_seqcount_retry() call, no following
- * memory operations are considered atomic. Usage of seqlocks via seqlock_t
- * interface is not affected.
+ * memory operations are considered atomic. Usage of the seqlock_t interface
+ * is not affected.
*/
#define KCSAN_SEQLOCK_REGION_MAX 1000
/*
- * Version using sequence counter only.
- * This can be used when code has its own mutex protecting the
- * updating starting before the write_seqcountbeqin() and ending
- * after the write_seqcount_end().
+ * Sequence counters (seqcount_t)
+ *
+ * This is the raw counting mechanism, without any writer protection.
+ *
+ * Write side critical sections must be serialized and non-preemptible.
+ *
+ * If readers can be invoked from hardirq or softirq contexts,
+ * interrupts or bottom halves must also be respectively disabled before
+ * entering the write section.
+ *
+ * This mechanism can't be used if the protected data contains pointers,
+ * as the writer can invalidate a pointer that a reader is following.
+ *
+ * If it's desired to automatically handle the sequence counter writer
+ * serialization and non-preemptibility requirements, use a sequential
+ * lock (seqlock_t) instead.
+ *
+ * See Documentation/locking/seqlock.rst
*/
typedef struct seqcount {
unsigned sequence;
@@ -82,6 +75,10 @@ static inline void __seqcount_init(seqcount_t *s, const char *name,
# define SEQCOUNT_DEP_MAP_INIT(lockname) \
.dep_map = { .name = #lockname } \
+/**
+ * seqcount_init() - runtime initializer for seqcount_t
+ * @s: Pointer to the seqcount_t instance
+ */
# define seqcount_init(s) \
do { \
static struct lock_class_key __key; \
@@ -105,13 +102,15 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
# define seqcount_lockdep_reader_access(x)
#endif
-#define SEQCNT_ZERO(lockname) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(lockname)}
-
+/**
+ * SEQCNT_ZERO() - static initializer for seqcount_t
+ * @name: Name of the seqcount_t instance
+ */
+#define SEQCNT_ZERO(name) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(name) }
/**
- * __read_seqcount_begin - begin a seq-read critical section (without barrier)
- * @s: pointer to seqcount_t
- * Returns: count to be passed to read_seqcount_retry
+ * __read_seqcount_begin() - begin a seqcount_t read section w/o barrier
+ * @s: Pointer to seqcount_t
*
* __read_seqcount_begin is like read_seqcount_begin, but has no smp_rmb()
* barrier. Callers should ensure that smp_rmb() or equivalent ordering is
@@ -120,6 +119,8 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
*
* Use carefully, only in critical code, and comment how the barrier is
* provided.
+ *
+ * Return: count to be passed to read_seqcount_retry()
*/
static inline unsigned __read_seqcount_begin(const seqcount_t *s)
{
@@ -136,30 +137,10 @@ repeat:
}
/**
- * raw_read_seqcount - Read the raw seqcount
- * @s: pointer to seqcount_t
- * Returns: count to be passed to read_seqcount_retry
+ * raw_read_seqcount_begin() - begin a seqcount_t read section w/o lockdep
+ * @s: Pointer to seqcount_t
*
- * raw_read_seqcount opens a read critical section of the given
- * seqcount without any lockdep checking and without checking or
- * masking the LSB. Calling code is responsible for handling that.
- */
-static inline unsigned raw_read_seqcount(const seqcount_t *s)
-{
- unsigned ret = READ_ONCE(s->sequence);
- smp_rmb();
- kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);
- return ret;
-}
-
-/**
- * raw_read_seqcount_begin - start seq-read critical section w/o lockdep
- * @s: pointer to seqcount_t
- * Returns: count to be passed to read_seqcount_retry
- *
- * raw_read_seqcount_begin opens a read critical section of the given
- * seqcount, but without any lockdep checking. Validity of the critical
- * section is tested by checking read_seqcount_retry function.
+ * Return: count to be passed to read_seqcount_retry()
*/
static inline unsigned raw_read_seqcount_begin(const seqcount_t *s)
{
@@ -169,13 +150,10 @@ static inline unsigned raw_read_seqcount_begin(const seqcount_t *s)
}
/**
- * read_seqcount_begin - begin a seq-read critical section
- * @s: pointer to seqcount_t
- * Returns: count to be passed to read_seqcount_retry
+ * read_seqcount_begin() - begin a seqcount_t read critical section
+ * @s: Pointer to seqcount_t
*
- * read_seqcount_begin opens a read critical section of the given seqcount.
- * Validity of the critical section is tested by checking read_seqcount_retry
- * function.
+ * Return: count to be passed to read_seqcount_retry()
*/
static inline unsigned read_seqcount_begin(const seqcount_t *s)
{
@@ -184,32 +162,54 @@ static inline unsigned read_seqcount_begin(const seqcount_t *s)
}
/**
- * raw_seqcount_begin - begin a seq-read critical section
- * @s: pointer to seqcount_t
- * Returns: count to be passed to read_seqcount_retry
+ * raw_read_seqcount() - read the raw seqcount_t counter value
+ * @s: Pointer to seqcount_t
*
- * raw_seqcount_begin opens a read critical section of the given seqcount.
- * Validity of the critical section is tested by checking read_seqcount_retry
- * function.
+ * raw_read_seqcount opens a read critical section of the given
+ * seqcount_t, without any lockdep checking, and without checking or
+ * masking the sequence counter LSB. Calling code is responsible for
+ * handling that.
*
- * Unlike read_seqcount_begin(), this function will not wait for the count
- * to stabilize. If a writer is active when we begin, we will fail the
- * read_seqcount_retry() instead of stabilizing at the beginning of the
- * critical section.
+ * Return: count to be passed to read_seqcount_retry()
*/
-static inline unsigned raw_seqcount_begin(const seqcount_t *s)
+static inline unsigned raw_read_seqcount(const seqcount_t *s)
{
unsigned ret = READ_ONCE(s->sequence);
smp_rmb();
kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);
- return ret & ~1;
+ return ret;
}
/**
- * __read_seqcount_retry - end a seq-read critical section (without barrier)
- * @s: pointer to seqcount_t
- * @start: count, from read_seqcount_begin
- * Returns: 1 if retry is required, else 0
+ * raw_seqcount_begin() - begin a seqcount_t read critical section w/o
+ * lockdep and w/o counter stabilization
+ * @s: Pointer to seqcount_t
+ *
+ * raw_seqcount_begin opens a read critical section of the given
+ * seqcount_t. Unlike read_seqcount_begin(), this function will not wait
+ * for the count to stabilize. If a writer is active when it begins, it
+ * will fail the read_seqcount_retry() at the end of the read critical
+ * section instead of stabilizing at the beginning of it.
+ *
+ * Use this only in special kernel hot paths where the read section is
+ * small and has a high probability of success through other external
+ * means. It will save a single branching instruction.
+ *
+ * Return: count to be passed to read_seqcount_retry()
+ */
+static inline unsigned raw_seqcount_begin(const seqcount_t *s)
+{
+ /*
+ * If the counter is odd, let read_seqcount_retry() fail
+ * by decrementing the counter.
+ */
+ return raw_read_seqcount(s) & ~1;
+}
+
+/**
+ * __read_seqcount_retry() - end a seqcount_t read section w/o barrier
+ * @s: Pointer to seqcount_t
+ * @start: count, from read_seqcount_begin()
*
* __read_seqcount_retry is like read_seqcount_retry, but has no smp_rmb()
* barrier. Callers should ensure that smp_rmb() or equivalent ordering is
@@ -218,6 +218,8 @@ static inline unsigned raw_seqcount_begin(const seqcount_t *s)
*
* Use carefully, only in critical code, and comment how the barrier is
* provided.
+ *
+ * Return: true if a read section retry is required, else false
*/
static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start)
{
@@ -226,14 +228,15 @@ static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start)
}
/**
- * read_seqcount_retry - end a seq-read critical section
- * @s: pointer to seqcount_t
- * @start: count, from read_seqcount_begin
- * Returns: 1 if retry is required, else 0
+ * read_seqcount_retry() - end a seqcount_t read critical section
+ * @s: Pointer to seqcount_t
+ * @start: count, from read_seqcount_begin()
*
- * read_seqcount_retry closes a read critical section of the given seqcount.
- * If the critical section was invalid, it must be ignored (and typically
- * retried).
+ * read_seqcount_retry closes the read critical section of given
+ * seqcount_t. If the critical section was invalid, it must be ignored
+ * (and typically retried).
+ *
+ * Return: true if a read section retry is required, else false
*/
static inline int read_seqcount_retry(const seqcount_t *s, unsigned start)
{
@@ -241,8 +244,10 @@ static inline int read_seqcount_retry(const seqcount_t *s, unsigned start)
return __read_seqcount_retry(s, start);
}
-
-
+/**
+ * raw_write_seqcount_begin() - start a seqcount_t write section w/o lockdep
+ * @s: Pointer to seqcount_t
+ */
static inline void raw_write_seqcount_begin(seqcount_t *s)
{
kcsan_nestable_atomic_begin();
@@ -250,6 +255,10 @@ static inline void raw_write_seqcount_begin(seqcount_t *s)
smp_wmb();
}
+/**
+ * raw_write_seqcount_end() - end a seqcount_t write section w/o lockdep
+ * @s: Pointer to seqcount_t
+ */
static inline void raw_write_seqcount_end(seqcount_t *s)
{
smp_wmb();
@@ -257,45 +266,104 @@ static inline void raw_write_seqcount_end(seqcount_t *s)
kcsan_nestable_atomic_end();
}
+static inline void __write_seqcount_begin_nested(seqcount_t *s, int subclass)
+{
+ raw_write_seqcount_begin(s);
+ seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_);
+}
+
/**
- * raw_write_seqcount_barrier - do a seq write barrier
- * @s: pointer to seqcount_t
+ * write_seqcount_begin_nested() - start a seqcount_t write section with
+ * custom lockdep nesting level
+ * @s: Pointer to seqcount_t
+ * @subclass: lockdep nesting level
*
- * This can be used to provide an ordering guarantee instead of the
- * usual consistency guarantee. It is one wmb cheaper, because we can
- * collapse the two back-to-back wmb()s.
+ * See Documentation/locking/lockdep-design.rst
+ */
+static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass)
+{
+ lockdep_assert_preemption_disabled();
+ __write_seqcount_begin_nested(s, subclass);
+}
+
+/*
+ * A write_seqcount_begin() variant w/o lockdep non-preemptibility checks.
+ *
+ * Use for internal seqlock.h code where it's known that preemption is
+ * already disabled. For example, seqlock_t write side functions.
+ */
+static inline void __write_seqcount_begin(seqcount_t *s)
+{
+ __write_seqcount_begin_nested(s, 0);
+}
+
+/**
+ * write_seqcount_begin() - start a seqcount_t write side critical section
+ * @s: Pointer to seqcount_t
+ *
+ * write_seqcount_begin opens a write side critical section of the given
+ * seqcount_t.
+ *
+ * Context: seqcount_t write side critical sections must be serialized and
+ * non-preemptible. If readers can be invoked from hardirq or softirq
+ * context, interrupts or bottom halves must be respectively disabled.
+ */
+static inline void write_seqcount_begin(seqcount_t *s)
+{
+ write_seqcount_begin_nested(s, 0);
+}
+
+/**
+ * write_seqcount_end() - end a seqcount_t write side critical section
+ * @s: Pointer to seqcount_t
+ *
+ * The write section must've been opened with write_seqcount_begin().
+ */
+static inline void write_seqcount_end(seqcount_t *s)
+{
+ seqcount_release(&s->dep_map, _RET_IP_);
+ raw_write_seqcount_end(s);
+}
+
+/**
+ * raw_write_seqcount_barrier() - do a seqcount_t write barrier
+ * @s: Pointer to seqcount_t
+ *
+ * This can be used to provide an ordering guarantee instead of the usual
+ * consistency guarantee. It is one wmb cheaper, because it can collapse
+ * the two back-to-back wmb()s.
*
* Note that writes surrounding the barrier should be declared atomic (e.g.
* via WRITE_ONCE): a) to ensure the writes become visible to other threads
* atomically, avoiding compiler optimizations; b) to document which writes are
* meant to propagate to the reader critical section. This is necessary because
* neither writes before and after the barrier are enclosed in a seq-writer
- * critical section that would ensure readers are aware of ongoing writes.
+ * critical section that would ensure readers are aware of ongoing writes::
*
- * seqcount_t seq;
- * bool X = true, Y = false;
+ * seqcount_t seq;
+ * bool X = true, Y = false;
*
- * void read(void)
- * {
- * bool x, y;
+ * void read(void)
+ * {
+ * bool x, y;
*
- * do {
- * int s = read_seqcount_begin(&seq);
+ * do {
+ * int s = read_seqcount_begin(&seq);
*
- * x = X; y = Y;
+ * x = X; y = Y;
*
- * } while (read_seqcount_retry(&seq, s));
+ * } while (read_seqcount_retry(&seq, s));
*
- * BUG_ON(!x && !y);
+ * BUG_ON(!x && !y);
* }
*
* void write(void)
* {
- * WRITE_ONCE(Y, true);
+ * WRITE_ONCE(Y, true);
*
- * raw_write_seqcount_barrier(seq);
+ * raw_write_seqcount_barrier(seq);
*
- * WRITE_ONCE(X, false);
+ * WRITE_ONCE(X, false);
* }
*/
static inline void raw_write_seqcount_barrier(seqcount_t *s)
@@ -307,6 +375,37 @@ static inline void raw_write_seqcount_barrier(seqcount_t *s)
kcsan_nestable_atomic_end();
}
+/**
+ * write_seqcount_invalidate() - invalidate in-progress seqcount_t read
+ * side operations
+ * @s: Pointer to seqcount_t
+ *
+ * After write_seqcount_invalidate, no seqcount_t read side operations
+ * will complete successfully and see data older than this.
+ */
+static inline void write_seqcount_invalidate(seqcount_t *s)
+{
+ smp_wmb();
+ kcsan_nestable_atomic_begin();
+ s->sequence+=2;
+ kcsan_nestable_atomic_end();
+}
+
+/**
+ * raw_read_seqcount_latch() - pick even/odd seqcount_t latch data copy
+ * @s: Pointer to seqcount_t
+ *
+ * Use seqcount_t latching to switch between two storage places protected
+ * by a sequence counter. Doing so allows having interruptible, preemptible,
+ * seqcount_t write side critical sections.
+ *
+ * Check raw_write_seqcount_latch() for more details and a full reader and
+ * writer usage example.
+ *
+ * Return: sequence counter raw value. Use the lowest bit as an index for
+ * picking which data copy to read. The full counter value must then be
+ * checked with read_seqcount_retry().
+ */
static inline int raw_read_seqcount_latch(seqcount_t *s)
{
/* Pairs with the first smp_wmb() in raw_write_seqcount_latch() */
@@ -315,8 +414,8 @@ static inline int raw_read_seqcount_latch(seqcount_t *s)
}
/**
- * raw_write_seqcount_latch - redirect readers to even/odd copy
- * @s: pointer to seqcount_t
+ * raw_write_seqcount_latch() - redirect readers to even/odd copy
+ * @s: Pointer to seqcount_t
*
* The latch technique is a multiversion concurrency control method that allows
* queries during non-atomic modifications. If you can guarantee queries never
@@ -332,64 +431,68 @@ static inline int raw_read_seqcount_latch(seqcount_t *s)
* Very simply put: we first modify one copy and then the other. This ensures
* there is always one copy in a stable state, ready to give us an answer.
*
- * The basic form is a data structure like:
+ * The basic form is a data structure like::
*
- * struct latch_struct {
- * seqcount_t seq;
- * struct data_struct data[2];
- * };
+ * struct latch_struct {
+ * seqcount_t seq;
+ * struct data_struct data[2];
+ * };
*
* Where a modification, which is assumed to be externally serialized, does the
- * following:
+ * following::
*
- * void latch_modify(struct latch_struct *latch, ...)
- * {
- * smp_wmb(); <- Ensure that the last data[1] update is visible
- * latch->seq++;
- * smp_wmb(); <- Ensure that the seqcount update is visible
+ * void latch_modify(struct latch_struct *latch, ...)
+ * {
+ * smp_wmb(); // Ensure that the last data[1] update is visible
+ * latch->seq++;
+ * smp_wmb(); // Ensure that the seqcount update is visible
*
- * modify(latch->data[0], ...);
+ * modify(latch->data[0], ...);
*
- * smp_wmb(); <- Ensure that the data[0] update is visible
- * latch->seq++;
- * smp_wmb(); <- Ensure that the seqcount update is visible
+ * smp_wmb(); // Ensure that the data[0] update is visible
+ * latch->seq++;
+ * smp_wmb(); // Ensure that the seqcount update is visible
*
- * modify(latch->data[1], ...);
- * }
+ * modify(latch->data[1], ...);
+ * }
*
- * The query will have a form like:
+ * The query will have a form like::
*
- * struct entry *latch_query(struct latch_struct *latch, ...)
- * {
- * struct entry *entry;
- * unsigned seq, idx;
+ * struct entry *latch_query(struct latch_struct *latch, ...)
+ * {
+ * struct entry *entry;
+ * unsigned seq, idx;
*
- * do {
- * seq = raw_read_seqcount_latch(&latch->seq);
+ * do {
+ * seq = raw_read_seqcount_latch(&latch->seq);
*
- * idx = seq & 0x01;
- * entry = data_query(latch->data[idx], ...);
+ * idx = seq & 0x01;
+ * entry = data_query(latch->data[idx], ...);
*
- * smp_rmb();
- * } while (seq != latch->seq);
+ * // read_seqcount_retry() includes needed smp_rmb()
+ * } while (read_seqcount_retry(&latch->seq, seq));
*
- * return entry;
- * }
+ * return entry;
+ * }
*
* So during the modification, queries are first redirected to data[1]. Then we
* modify data[0]. When that is complete, we redirect queries back to data[0]
* and we can modify data[1].
*
- * NOTE: The non-requirement for atomic modifications does _NOT_ include
- * the publishing of new entries in the case where data is a dynamic
- * data structure.
+ * NOTE:
*
- * An iteration might start in data[0] and get suspended long enough
- * to miss an entire modification sequence, once it resumes it might
- * observe the new entry.
+ * The non-requirement for atomic modifications does _NOT_ include
+ * the publishing of new entries in the case where data is a dynamic
+ * data structure.
*
- * NOTE: When data is a dynamic data structure; one should use regular RCU
- * patterns to manage the lifetimes of the objects within.
+ * An iteration might start in data[0] and get suspended long enough
+ * to miss an entire modification sequence, once it resumes it might
+ * observe the new entry.
+ *
+ * NOTE:
+ *
+ * When data is a dynamic data structure; one should use regular RCU
+ * patterns to manage the lifetimes of the objects within.
*/
static inline void raw_write_seqcount_latch(seqcount_t *s)
{
@@ -399,67 +502,48 @@ static inline void raw_write_seqcount_latch(seqcount_t *s)
}
/*
- * Sequence counter only version assumes that callers are using their
- * own mutexing.
- */
-static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass)
-{
- raw_write_seqcount_begin(s);
- seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_);
-}
-
-static inline void write_seqcount_begin(seqcount_t *s)
-{
- write_seqcount_begin_nested(s, 0);
-}
-
-static inline void write_seqcount_end(seqcount_t *s)
-{
- seqcount_release(&s->dep_map, _RET_IP_);
- raw_write_seqcount_end(s);
-}
-
-/**
- * write_seqcount_invalidate - invalidate in-progress read-side seq operations
- * @s: pointer to seqcount_t
+ * Sequential locks (seqlock_t)
*
- * After write_seqcount_invalidate, no read-side seq operations will complete
- * successfully and see data older than this.
+ * Sequence counters with an embedded spinlock for writer serialization
+ * and non-preemptibility.
+ *
+ * For more info, see:
+ * - Comments on top of seqcount_t
+ * - Documentation/locking/seqlock.rst
*/
-static inline void write_seqcount_invalidate(seqcount_t *s)
-{
- smp_wmb();
- kcsan_nestable_atomic_begin();
- s->sequence+=2;
- kcsan_nestable_atomic_end();
-}
-
typedef struct {
struct seqcount seqcount;
spinlock_t lock;
} seqlock_t;
-/*
- * These macros triggered gcc-3.x compile-time problems. We think these are
- * OK now. Be cautious.
- */
#define __SEQLOCK_UNLOCKED(lockname) \
{ \
.seqcount = SEQCNT_ZERO(lockname), \
.lock = __SPIN_LOCK_UNLOCKED(lockname) \
}
-#define seqlock_init(x) \
+/**
+ * seqlock_init() - dynamic initializer for seqlock_t
+ * @sl: Pointer to the seqlock_t instance
+ */
+#define seqlock_init(sl) \
do { \
- seqcount_init(&(x)->seqcount); \
- spin_lock_init(&(x)->lock); \
+ seqcount_init(&(sl)->seqcount); \
+ spin_lock_init(&(sl)->lock); \
} while (0)
-#define DEFINE_SEQLOCK(x) \
- seqlock_t x = __SEQLOCK_UNLOCKED(x)
+/**
+ * DEFINE_SEQLOCK() - Define a statically allocated seqlock_t
+ * @sl: Name of the seqlock_t instance
+ */
+#define DEFINE_SEQLOCK(sl) \
+ seqlock_t sl = __SEQLOCK_UNLOCKED(sl)
-/*
- * Read side functions for starting and finalizing a read side section.
+/**
+ * read_seqbegin() - start a seqlock_t read side critical section
+ * @sl: Pointer to seqlock_t
+ *
+ * Return: count, to be passed to read_seqretry()
*/
static inline unsigned read_seqbegin(const seqlock_t *sl)
{
@@ -470,6 +554,17 @@ static inline unsigned read_seqbegin(const seqlock_t *sl)
return ret;
}
+/**
+ * read_seqretry() - end a seqlock_t read side section
+ * @sl: Pointer to seqlock_t
+ * @start: count, from read_seqbegin()
+ *
+ * read_seqretry closes the read side critical section of given seqlock_t.
+ * If the critical section was invalid, it must be ignored (and typically
+ * retried).
+ *
+ * Return: true if a read section retry is required, else false
+ */
static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
{
/*
@@ -481,41 +576,85 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
return read_seqcount_retry(&sl->seqcount, start);
}
-/*
- * Lock out other writers and update the count.
- * Acts like a normal spin_lock/unlock.
- * Don't need preempt_disable() because that is in the spin_lock already.
+/**
+ * write_seqlock() - start a seqlock_t write side critical section
+ * @sl: Pointer to seqlock_t
+ *
+ * write_seqlock opens a write side critical section for the given
+ * seqlock_t. It also implicitly acquires the spinlock_t embedded inside
+ * that sequential lock. All seqlock_t write side sections are thus
+ * automatically serialized and non-preemptible.
+ *
+ * Context: if the seqlock_t read section, or other write side critical
+ * sections, can be invoked from hardirq or softirq contexts, use the
+ * _irqsave or _bh variants of this function instead.
*/
static inline void write_seqlock(seqlock_t *sl)
{
spin_lock(&sl->lock);
- write_seqcount_begin(&sl->seqcount);
+ __write_seqcount_begin(&sl->seqcount);
}
+/**
+ * write_sequnlock() - end a seqlock_t write side critical section
+ * @sl: Pointer to seqlock_t
+ *
+ * write_sequnlock closes the (serialized and non-preemptible) write side
+ * critical section of given seqlock_t.
+ */
static inline void write_sequnlock(seqlock_t *sl)
{
write_seqcount_end(&sl->seqcount);
spin_unlock(&sl->lock);
}
+/**
+ * write_seqlock_bh() - start a softirqs-disabled seqlock_t write section
+ * @sl: Pointer to seqlock_t
+ *
+ * _bh variant of write_seqlock(). Use only if the read side section, or
+ * other write side sections, can be invoked from softirq contexts.
+ */
static inline void write_seqlock_bh(seqlock_t *sl)
{
spin_lock_bh(&sl->lock);
- write_seqcount_begin(&sl->seqcount);
+ __write_seqcount_begin(&sl->seqcount);
}
+/**
+ * write_sequnlock_bh() - end a softirqs-disabled seqlock_t write section
+ * @sl: Pointer to seqlock_t
+ *
+ * write_sequnlock_bh closes the serialized, non-preemptible, and
+ * softirqs-disabled, seqlock_t write side critical section opened with
+ * write_seqlock_bh().
+ */
static inline void write_sequnlock_bh(seqlock_t *sl)
{
write_seqcount_end(&sl->seqcount);
spin_unlock_bh(&sl->lock);
}
+/**
+ * write_seqlock_irq() - start a non-interruptible seqlock_t write section
+ * @sl: Pointer to seqlock_t
+ *
+ * _irq variant of write_seqlock(). Use only if the read side section, or
+ * other write sections, can be invoked from hardirq contexts.
+ */
static inline void write_seqlock_irq(seqlock_t *sl)
{
spin_lock_irq(&sl->lock);
- write_seqcount_begin(&sl->seqcount);
+ __write_seqcount_begin(&sl->seqcount);
}
+/**
+ * write_sequnlock_irq() - end a non-interruptible seqlock_t write section
+ * @sl: Pointer to seqlock_t
+ *
+ * write_sequnlock_irq closes the serialized and non-interruptible
+ * seqlock_t write side section opened with write_seqlock_irq().
+ */
static inline void write_sequnlock_irq(seqlock_t *sl)
{
write_seqcount_end(&sl->seqcount);
@@ -527,13 +666,32 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl)
unsigned long flags;
spin_lock_irqsave(&sl->lock, flags);
- write_seqcount_begin(&sl->seqcount);
+ __write_seqcount_begin(&sl->seqcount);
return flags;
}
+/**
+ * write_seqlock_irqsave() - start a non-interruptible seqlock_t write
+ * section
+ * @lock: Pointer to seqlock_t
+ * @flags: Stack-allocated storage for saving caller's local interrupt
+ * state, to be passed to write_sequnlock_irqrestore().
+ *
+ * _irqsave variant of write_seqlock(). Use it only if the read side
+ * section, or other write sections, can be invoked from hardirq context.
+ */
#define write_seqlock_irqsave(lock, flags) \
do { flags = __write_seqlock_irqsave(lock); } while (0)
+/**
+ * write_sequnlock_irqrestore() - end non-interruptible seqlock_t write
+ * section
+ * @sl: Pointer to seqlock_t
+ * @flags: Caller's saved interrupt state, from write_seqlock_irqsave()
+ *
+ * write_sequnlock_irqrestore closes the serialized and non-interruptible
+ * seqlock_t write section previously opened with write_seqlock_irqsave().
+ */
static inline void
write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags)
{
@@ -541,65 +699,79 @@ write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags)
spin_unlock_irqrestore(&sl->lock, flags);
}
-/*
- * A locking reader exclusively locks out other writers and locking readers,
- * but doesn't update the sequence number. Acts like a normal spin_lock/unlock.
- * Don't need preempt_disable() because that is in the spin_lock already.
+/**
+ * read_seqlock_excl() - begin a seqlock_t locking reader section
+ * @sl: Pointer to seqlock_t
+ *
+ * read_seqlock_excl opens a seqlock_t locking reader critical section. A
+ * locking reader exclusively locks out *both* other writers *and* other
+ * locking readers, but it does not update the embedded sequence number.
+ *
+ * Locking readers act like a normal spin_lock()/spin_unlock().
+ *
+ * Context: if the seqlock_t write section, *or other read sections*, can
+ * be invoked from hardirq or softirq contexts, use the _irqsave or _bh
+ * variant of this function instead.
+ *
+ * The opened read section must be closed with read_sequnlock_excl().
*/
static inline void read_seqlock_excl(seqlock_t *sl)
{
spin_lock(&sl->lock);
}
+/**
+ * read_sequnlock_excl() - end a seqlock_t locking reader critical section
+ * @sl: Pointer to seqlock_t
+ */
static inline void read_sequnlock_excl(seqlock_t *sl)
{
spin_unlock(&sl->lock);
}
/**
- * read_seqbegin_or_lock - begin a sequence number check or locking block
- * @lock: sequence lock
- * @seq : sequence number to be checked
+ * read_seqlock_excl_bh() - start a seqlock_t locking reader section with
+ * softirqs disabled
+ * @sl: Pointer to seqlock_t
*
- * First try it once optimistically without taking the lock. If that fails,
- * take the lock. The sequence number is also used as a marker for deciding
- * whether to be a reader (even) or writer (odd).
- * N.B. seq must be initialized to an even number to begin with.
+ * _bh variant of read_seqlock_excl(). Use this variant only if the
+ * seqlock_t write side section, *or other read sections*, can be invoked
+ * from softirq contexts.
*/
-static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq)
-{
- if (!(*seq & 1)) /* Even */
- *seq = read_seqbegin(lock);
- else /* Odd */
- read_seqlock_excl(lock);
-}
-
-static inline int need_seqretry(seqlock_t *lock, int seq)
-{
- return !(seq & 1) && read_seqretry(lock, seq);
-}
-
-static inline void done_seqretry(seqlock_t *lock, int seq)
-{
- if (seq & 1)
- read_sequnlock_excl(lock);
-}
-
static inline void read_seqlock_excl_bh(seqlock_t *sl)
{
spin_lock_bh(&sl->lock);
}
+/**
+ * read_sequnlock_excl_bh() - stop a seqlock_t softirq-disabled locking
+ * reader section
+ * @sl: Pointer to seqlock_t
+ */
static inline void read_sequnlock_excl_bh(seqlock_t *sl)
{
spin_unlock_bh(&sl->lock);
}
+/**
+ * read_seqlock_excl_irq() - start a non-interruptible seqlock_t locking
+ * reader section
+ * @sl: Pointer to seqlock_t
+ *
+ * _irq variant of read_seqlock_excl(). Use this only if the seqlock_t
+ * write side section, *or other read sections*, can be invoked from a
+ * hardirq context.
+ */
static inline void read_seqlock_excl_irq(seqlock_t *sl)
{
spin_lock_irq(&sl->lock);
}
+/**
+ * read_sequnlock_excl_irq() - end an interrupts-disabled seqlock_t
+ * locking reader section
+ * @sl: Pointer to seqlock_t
+ */
static inline void read_sequnlock_excl_irq(seqlock_t *sl)
{
spin_unlock_irq(&sl->lock);
@@ -613,15 +785,117 @@ static inline unsigned long __read_seqlock_excl_irqsave(seqlock_t *sl)
return flags;
}
+/**
+ * read_seqlock_excl_irqsave() - start a non-interruptible seqlock_t
+ * locking reader section
+ * @lock: Pointer to seqlock_t
+ * @flags: Stack-allocated storage for saving caller's local interrupt
+ * state, to be passed to read_sequnlock_excl_irqrestore().
+ *
+ * _irqsave variant of read_seqlock_excl(). Use this only if the seqlock_t
+ * write side section, *or other read sections*, can be invoked from a
+ * hardirq context.
+ */
#define read_seqlock_excl_irqsave(lock, flags) \
do { flags = __read_seqlock_excl_irqsave(lock); } while (0)
+/**
+ * read_sequnlock_excl_irqrestore() - end non-interruptible seqlock_t
+ * locking reader section
+ * @sl: Pointer to seqlock_t
+ * @flags: Caller saved interrupt state, from read_seqlock_excl_irqsave()
+ */
static inline void
read_sequnlock_excl_irqrestore(seqlock_t *sl, unsigned long flags)
{
spin_unlock_irqrestore(&sl->lock, flags);
}
+/**
+ * read_seqbegin_or_lock() - begin a seqlock_t lockless or locking reader
+ * @lock: Pointer to seqlock_t
+ * @seq : Marker and return parameter. If the passed value is even, the
+ * reader will become a *lockless* seqlock_t reader as in read_seqbegin().
+ * If the passed value is odd, the reader will become a *locking* reader
+ * as in read_seqlock_excl(). In the first call to this function, the
+ * caller *must* initialize and pass an even value to @seq; this way, a
+ * lockless read can be optimistically tried first.
+ *
+ * read_seqbegin_or_lock is an API designed to optimistically try a normal
+ * lockless seqlock_t read section first. If an odd counter is found, the
+ * lockless read trial has failed, and the next read iteration transforms
+ * itself into a full seqlock_t locking reader.
+ *
+ * This is typically used to avoid seqlock_t lockless readers starvation
+ * (too much retry loops) in the case of a sharp spike in write side
+ * activity.
+ *
+ * Context: if the seqlock_t write section, *or other read sections*, can
+ * be invoked from hardirq or softirq contexts, use the _irqsave or _bh
+ * variant of this function instead.
+ *
+ * Check Documentation/locking/seqlock.rst for template example code.
+ *
+ * Return: the encountered sequence counter value, through the @seq
+ * parameter, which is overloaded as a return parameter. This returned
+ * value must be checked with need_seqretry(). If the read section need to
+ * be retried, this returned value must also be passed as the @seq
+ * parameter of the next read_seqbegin_or_lock() iteration.
+ */
+static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq)
+{
+ if (!(*seq & 1)) /* Even */
+ *seq = read_seqbegin(lock);
+ else /* Odd */
+ read_seqlock_excl(lock);
+}
+
+/**
+ * need_seqretry() - validate seqlock_t "locking or lockless" read section
+ * @lock: Pointer to seqlock_t
+ * @seq: sequence count, from read_seqbegin_or_lock()
+ *
+ * Return: true if a read section retry is required, false otherwise
+ */
+static inline int need_seqretry(seqlock_t *lock, int seq)
+{
+ return !(seq & 1) && read_seqretry(lock, seq);
+}
+
+/**
+ * done_seqretry() - end seqlock_t "locking or lockless" reader section
+ * @lock: Pointer to seqlock_t
+ * @seq: count, from read_seqbegin_or_lock()
+ *
+ * done_seqretry finishes the seqlock_t read side critical section started
+ * with read_seqbegin_or_lock() and validated by need_seqretry().
+ */
+static inline void done_seqretry(seqlock_t *lock, int seq)
+{
+ if (seq & 1)
+ read_sequnlock_excl(lock);
+}
+
+/**
+ * read_seqbegin_or_lock_irqsave() - begin a seqlock_t lockless reader, or
+ * a non-interruptible locking reader
+ * @lock: Pointer to seqlock_t
+ * @seq: Marker and return parameter. Check read_seqbegin_or_lock().
+ *
+ * This is the _irqsave variant of read_seqbegin_or_lock(). Use it only if
+ * the seqlock_t write section, *or other read sections*, can be invoked
+ * from hardirq context.
+ *
+ * Note: Interrupts will be disabled only for "locking reader" mode.
+ *
+ * Return:
+ *
+ * 1. The saved local interrupts state in case of a locking reader, to
+ * be passed to done_seqretry_irqrestore().
+ *
+ * 2. The encountered sequence counter value, returned through @seq
+ * overloaded as a return parameter. Check read_seqbegin_or_lock().
+ */
static inline unsigned long
read_seqbegin_or_lock_irqsave(seqlock_t *lock, int *seq)
{
@@ -635,6 +909,18 @@ read_seqbegin_or_lock_irqsave(seqlock_t *lock, int *seq)
return flags;
}
+/**
+ * done_seqretry_irqrestore() - end a seqlock_t lockless reader, or a
+ * non-interruptible locking reader section
+ * @lock: Pointer to seqlock_t
+ * @seq: Count, from read_seqbegin_or_lock_irqsave()
+ * @flags: Caller's saved local interrupt state in case of a locking
+ * reader, also from read_seqbegin_or_lock_irqsave()
+ *
+ * This is the _irqrestore variant of done_seqretry(). The read section
+ * must've been opened with read_seqbegin_or_lock_irqsave(), and validated
+ * by need_seqretry().
+ */
static inline void
done_seqretry_irqrestore(seqlock_t *lock, int seq, unsigned long flags)
{
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index d3770b3f9d9a..f2f12d746dbd 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -56,6 +56,7 @@
#include <linux/kernel.h>
#include <linux/stringify.h>
#include <linux/bottom_half.h>
+#include <linux/lockdep.h>
#include <asm/barrier.h>
#include <asm/mmiowb.h>
diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h
index 6102e6bff3ae..b981caafe8bf 100644
--- a/include/linux/spinlock_types.h
+++ b/include/linux/spinlock_types.h
@@ -15,7 +15,7 @@
# include <linux/spinlock_types_up.h>
#endif
-#include <linux/lockdep.h>
+#include <linux/lockdep_types.h>
typedef struct raw_spinlock {
arch_spinlock_t raw_lock;
diff --git a/include/linux/types.h b/include/linux/types.h
index d3021c879179..a147977602b5 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -167,6 +167,8 @@ typedef struct {
int counter;
} atomic_t;
+#define ATOMIC_INIT(i) { (i) }
+
#ifdef CONFIG_64BIT
typedef struct {
s64 counter;