summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-02-21 10:45:51 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2023-02-21 10:45:51 -0800
commit8cc01d43f882fa1f44d8aa6727a6ea783d8fbe3f (patch)
tree053ed1940a0ddb7ff2972c05637edf820772cbb8 /include
parent8ca8d89b43caf9a02a18414d6eeff966d2b14512 (diff)
parentbba8d3d17dc2678f9647962900aa421a18c25320 (diff)
Merge tag 'rcu.2023.02.10a' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu
Pull RCU updates from Paul McKenney: - Documentation updates - Miscellaneous fixes, perhaps most notably: - Throttling callback invocation based on the number of callbacks that are now ready to invoke instead of on the total number of callbacks - Several patches that suppress false-positive boot-time diagnostics, for example, due to lockdep not yet being initialized - Make expedited RCU CPU stall warnings dump stacks of any tasks that are blocking the stalled grace period. (Normal RCU CPU stall warnings have done this for many years) - Lazy-callback fixes to avoid delays during boot, suspend, and resume. (Note that lazy callbacks must be explicitly enabled, so this should not (yet) affect production use cases) - Make kfree_rcu() and friends take advantage of polled grace periods, thus reducing memory footprint by almost two orders of magnitude, admittedly on a microbenchmark This also begins the transition from kfree_rcu(p) to kfree_rcu_mightsleep(p). This transition was motivated by bugs where kfree_rcu(p), which can block, was typed instead of the intended kfree_rcu(p, rh) - SRCU updates, perhaps most notably fixing a bug that causes SRCU to fail when booted on a system with a non-zero boot CPU. This surprising situation actually happens for kdump kernels on the powerpc architecture This also adds an srcu_down_read() and srcu_up_read(), which act like srcu_read_lock() and srcu_read_unlock(), but allow an SRCU read-side critical section to be handed off from one task to another - Clean up the now-useless SRCU Kconfig option There are a few more commits that are not yet acked or pulled into maintainer trees, and these will be in a pull request for a later merge window - RCU-tasks updates, perhaps most notably these fixes: - A strange interaction between PID-namespace unshare and the RCU-tasks grace period that results in a low-probability but very real hang - A race between an RCU tasks rude grace period on a single-CPU system and CPU-hotplug addition of the second CPU that can result in a too-short grace period - A race between shrinking RCU tasks down to a single callback list and queuing a new callback to some other CPU, but where that queuing is delayed for more than an RCU grace period. This can result in that callback being stranded on the non-boot CPU - Torture-test updates and fixes - Torture-test scripting updates and fixes - Provide additional RCU CPU stall-warning information in kernels built with CONFIG_RCU_CPU_STALL_CPUTIME=y, and restore the full five-minute timeout limit for expedited RCU CPU stall warnings * tag 'rcu.2023.02.10a' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu: (80 commits) rcu/kvfree: Add kvfree_rcu_mightsleep() and kfree_rcu_mightsleep() kernel/notifier: Remove CONFIG_SRCU init: Remove "select SRCU" fs/quota: Remove "select SRCU" fs/notify: Remove "select SRCU" fs/btrfs: Remove "select SRCU" fs: Remove CONFIG_SRCU drivers/pci/controller: Remove "select SRCU" drivers/net: Remove "select SRCU" drivers/md: Remove "select SRCU" drivers/hwtracing/stm: Remove "select SRCU" drivers/dax: Remove "select SRCU" drivers/base: Remove CONFIG_SRCU rcu: Disable laziness if lazy-tracking says so rcu: Track laziness during boot and suspend rcu: Remove redundant call to rcu_boost_kthread_setaffinity() rcu: Allow up to five minutes expedited RCU CPU stall-warning timeouts rcu: Align the output of RCU CPU stall warning messages rcu: Add RCU stall diagnosis information sched: Add helper nr_context_switches_cpu() ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/kernel_stat.h14
-rw-r--r--include/linux/rculist_nulls.h2
-rw-r--r--include/linux/rcupdate.h19
-rw-r--r--include/linux/rcutiny.h12
-rw-r--r--include/linux/rcutree.h2
-rw-r--r--include/linux/srcu.h45
-rw-r--r--include/linux/srcutree.h2
7 files changed, 82 insertions, 14 deletions
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index ddb5a358fd82..9935f7ecbfb9 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -52,6 +52,7 @@ DECLARE_PER_CPU(struct kernel_cpustat, kernel_cpustat);
#define kstat_cpu(cpu) per_cpu(kstat, cpu)
#define kcpustat_cpu(cpu) per_cpu(kernel_cpustat, cpu)
+extern unsigned long long nr_context_switches_cpu(int cpu);
extern unsigned long long nr_context_switches(void);
extern unsigned int kstat_irqs_cpu(unsigned int irq, int cpu);
@@ -67,6 +68,17 @@ static inline unsigned int kstat_softirqs_cpu(unsigned int irq, int cpu)
return kstat_cpu(cpu).softirqs[irq];
}
+static inline unsigned int kstat_cpu_softirqs_sum(int cpu)
+{
+ int i;
+ unsigned int sum = 0;
+
+ for (i = 0; i < NR_SOFTIRQS; i++)
+ sum += kstat_softirqs_cpu(i, cpu);
+
+ return sum;
+}
+
/*
* Number of interrupts per specific IRQ source, since bootup
*/
@@ -75,7 +87,7 @@ extern unsigned int kstat_irqs_usr(unsigned int irq);
/*
* Number of interrupts per cpu, since bootup
*/
-static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu)
+static inline unsigned long kstat_cpu_irqs_sum(unsigned int cpu)
{
return kstat_cpu(cpu).irqs_sum;
}
diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h
index d8afdb8784c1..ba4c00dd8005 100644
--- a/include/linux/rculist_nulls.h
+++ b/include/linux/rculist_nulls.h
@@ -139,7 +139,7 @@ static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n,
if (last) {
n->next = last->next;
n->pprev = &last->next;
- rcu_assign_pointer(hlist_next_rcu(last), n);
+ rcu_assign_pointer(hlist_nulls_next_rcu(last), n);
} else {
hlist_nulls_add_head_rcu(n, h);
}
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 03abf883a281..094321c17e48 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -238,6 +238,7 @@ void synchronize_rcu_tasks_rude(void);
#define rcu_note_voluntary_context_switch(t) rcu_tasks_qs(t, false)
void exit_tasks_rcu_start(void);
+void exit_tasks_rcu_stop(void);
void exit_tasks_rcu_finish(void);
#else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
#define rcu_tasks_classic_qs(t, preempt) do { } while (0)
@@ -246,6 +247,7 @@ void exit_tasks_rcu_finish(void);
#define call_rcu_tasks call_rcu
#define synchronize_rcu_tasks synchronize_rcu
static inline void exit_tasks_rcu_start(void) { }
+static inline void exit_tasks_rcu_stop(void) { }
static inline void exit_tasks_rcu_finish(void) { }
#endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */
@@ -374,11 +376,18 @@ static inline int debug_lockdep_rcu_enabled(void)
* RCU_LOCKDEP_WARN - emit lockdep splat if specified condition is met
* @c: condition to check
* @s: informative message
+ *
+ * This checks debug_lockdep_rcu_enabled() before checking (c) to
+ * prevent early boot splats due to lockdep not yet being initialized,
+ * and rechecks it after checking (c) to prevent false-positive splats
+ * due to races with lockdep being disabled. See commit 3066820034b5dd
+ * ("rcu: Reject RCU_LOCKDEP_WARN() false positives") for more detail.
*/
#define RCU_LOCKDEP_WARN(c, s) \
do { \
static bool __section(".data.unlikely") __warned; \
- if ((c) && debug_lockdep_rcu_enabled() && !__warned) { \
+ if (debug_lockdep_rcu_enabled() && (c) && \
+ debug_lockdep_rcu_enabled() && !__warned) { \
__warned = true; \
lockdep_rcu_suspicious(__FILE__, __LINE__, s); \
} \
@@ -1004,6 +1013,9 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
#define kvfree_rcu(...) KVFREE_GET_MACRO(__VA_ARGS__, \
kvfree_rcu_arg_2, kvfree_rcu_arg_1)(__VA_ARGS__)
+#define kvfree_rcu_mightsleep(ptr) kvfree_rcu_arg_1(ptr)
+#define kfree_rcu_mightsleep(ptr) kvfree_rcu_mightsleep(ptr)
+
#define KVFREE_GET_MACRO(_1, _2, NAME, ...) NAME
#define kvfree_rcu_arg_2(ptr, rhf) \
do { \
@@ -1011,8 +1023,7 @@ do { \
\
if (___p) { \
BUILD_BUG_ON(!__is_kvfree_rcu_offset(offsetof(typeof(*(ptr)), rhf))); \
- kvfree_call_rcu(&((___p)->rhf), (rcu_callback_t)(unsigned long) \
- (offsetof(typeof(*(ptr)), rhf))); \
+ kvfree_call_rcu(&((___p)->rhf), (void *) (___p)); \
} \
} while (0)
@@ -1021,7 +1032,7 @@ do { \
typeof(ptr) ___p = (ptr); \
\
if (___p) \
- kvfree_call_rcu(NULL, (rcu_callback_t) (___p)); \
+ kvfree_call_rcu(NULL, (void *) (___p)); \
} while (0)
/*
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 68f9070aa111..7f17acf29dda 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -98,25 +98,25 @@ static inline void synchronize_rcu_expedited(void)
*/
extern void kvfree(const void *addr);
-static inline void __kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
+static inline void __kvfree_call_rcu(struct rcu_head *head, void *ptr)
{
if (head) {
- call_rcu(head, func);
+ call_rcu(head, (rcu_callback_t) ((void *) head - ptr));
return;
}
// kvfree_rcu(one_arg) call.
might_sleep();
synchronize_rcu();
- kvfree((void *) func);
+ kvfree(ptr);
}
#ifdef CONFIG_KASAN_GENERIC
-void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func);
+void kvfree_call_rcu(struct rcu_head *head, void *ptr);
#else
-static inline void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
+static inline void kvfree_call_rcu(struct rcu_head *head, void *ptr)
{
- __kvfree_call_rcu(head, func);
+ __kvfree_call_rcu(head, ptr);
}
#endif
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 4003bf6cfa1c..56bccb5a8fde 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -33,7 +33,7 @@ static inline void rcu_virt_note_context_switch(void)
}
void synchronize_rcu_expedited(void);
-void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func);
+void kvfree_call_rcu(struct rcu_head *head, void *ptr);
void rcu_barrier(void);
bool rcu_eqs_special_set(int cpu);
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 9b9d0bbf1d3c..74796cd7e7a9 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -215,6 +215,34 @@ srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp)
}
/**
+ * srcu_down_read - register a new reader for an SRCU-protected structure.
+ * @ssp: srcu_struct in which to register the new reader.
+ *
+ * Enter a semaphore-like SRCU read-side critical section. Note that
+ * SRCU read-side critical sections may be nested. However, it is
+ * illegal to call anything that waits on an SRCU grace period for the
+ * same srcu_struct, whether directly or indirectly. Please note that
+ * one way to indirectly wait on an SRCU grace period is to acquire
+ * a mutex that is held elsewhere while calling synchronize_srcu() or
+ * synchronize_srcu_expedited(). But if you want lockdep to help you
+ * keep this stuff straight, you should instead use srcu_read_lock().
+ *
+ * The semaphore-like nature of srcu_down_read() means that the matching
+ * srcu_up_read() can be invoked from some other context, for example,
+ * from some other task or from an irq handler. However, neither
+ * srcu_down_read() nor srcu_up_read() may be invoked from an NMI handler.
+ *
+ * Calls to srcu_down_read() may be nested, similar to the manner in
+ * which calls to down_read() may be nested.
+ */
+static inline int srcu_down_read(struct srcu_struct *ssp) __acquires(ssp)
+{
+ WARN_ON_ONCE(in_nmi());
+ srcu_check_nmi_safety(ssp, false);
+ return __srcu_read_lock(ssp);
+}
+
+/**
* srcu_read_unlock - unregister a old reader from an SRCU-protected structure.
* @ssp: srcu_struct in which to unregister the old reader.
* @idx: return value from corresponding srcu_read_lock().
@@ -255,6 +283,23 @@ srcu_read_unlock_notrace(struct srcu_struct *ssp, int idx) __releases(ssp)
}
/**
+ * srcu_up_read - unregister a old reader from an SRCU-protected structure.
+ * @ssp: srcu_struct in which to unregister the old reader.
+ * @idx: return value from corresponding srcu_read_lock().
+ *
+ * Exit an SRCU read-side critical section, but not necessarily from
+ * the same context as the maching srcu_down_read().
+ */
+static inline void srcu_up_read(struct srcu_struct *ssp, int idx)
+ __releases(ssp)
+{
+ WARN_ON_ONCE(idx & ~0x1);
+ WARN_ON_ONCE(in_nmi());
+ srcu_check_nmi_safety(ssp, false);
+ __srcu_read_unlock(ssp, idx);
+}
+
+/**
* smp_mb__after_srcu_read_unlock - ensure full ordering after srcu_read_unlock
*
* Converts the preceding srcu_read_unlock into a two-way memory barrier.
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index c689a81752c9..558057b517b7 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -49,7 +49,7 @@ struct srcu_data {
struct srcu_node {
spinlock_t __private lock;
unsigned long srcu_have_cbs[4]; /* GP seq for children having CBs, but only */
- /* if greater than ->srcu_gq_seq. */
+ /* if greater than ->srcu_gp_seq. */
unsigned long srcu_data_have_cbs[4]; /* Which srcu_data structs have CBs for given GP? */
unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */
struct srcu_node *srcu_parent; /* Next up in tree. */