From 9cf422a8e71455032b61c6c0ea56a1e96206aab0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 20 Nov 2018 10:43:34 -0800 Subject: rcu: Accommodate zero jiffies_till_first_fqs and kthread kicking It is perfectly fine to set the rcutree.jiffies_till_first_fqs boot parameter to zero, in fact, this can be useful on specialty systems that usually have at least one idle CPU and that need fast grace periods. This is because this setting causes the RCU grace-period kthread to scan for idle threads immediately after grace-period initialization, as opposed to waiting several jiffies to do so. It is also perfectly fine to set the rcutree.rcu_kick_kthreads kernel parameter, which gives the RCU grace-period kthread an extra wakeup if it doesn't make progress for a period of three times the setting of the rcutree.jiffies_till_first_fqs boot parameter. This is of course problematic when the value of this parameter is zero, as it can result in unnecessary wakeup IPIs along with unnecessary WARN_ONCE() invocations. This commit therefore defers kthread kicking for at least two jiffies, regardless of the setting of rcutree.jiffies_till_first_fqs. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 9180158756d2..b003a3cfe192 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1939,7 +1939,7 @@ static void rcu_gp_fqs_loop(void) if (!ret) { rcu_state.jiffies_force_qs = jiffies + j; WRITE_ONCE(rcu_state.jiffies_kick_kthreads, - jiffies + 3 * j); + jiffies + (j ? 3 * j : 2)); } trace_rcu_grace_period(rcu_state.name, READ_ONCE(rcu_state.gp_seq), -- cgit From 37f62d7cf00c085e1d7a91a6af286c4e8d32e1e1 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 30 Nov 2018 16:11:14 -0800 Subject: rcu: Move rcu_cpu_kthread_task to rcu_data structure Given that RCU has a perfectly good per-CPU rcu_data structure, most per-CPU quantities should be stored there. This commit therefore moves the rcu_cpu_kthread_task per-CPU variable to the rcu_data structure. This also makes this variable unconditionally present, which should be acceptable given the memory reduction due to the RCU flavor consolidation and also due to simplifications this will enable. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.h | 6 +++++- kernel/rcu/tree_plugin.h | 11 +++++------ 2 files changed, 10 insertions(+), 7 deletions(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index d90b02b53c0e..ef517ba25192 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -234,7 +234,11 @@ struct rcu_data { /* Leader CPU takes GP-end wakeups. */ #endif /* #ifdef CONFIG_RCU_NOCB_CPU */ - /* 6) Diagnostic data, including RCU CPU stall warnings. */ + /* 6) RCU priority boosting. */ + struct task_struct *rcu_cpu_kthread_task; + /* rcuc per-CPU kthread or NULL. */ + + /* 7) Diagnostic data, including RCU CPU stall warnings. */ unsigned int softirq_snap; /* Snapshot of softirq activity. */ /* ->rcu_iw* fields protected by leaf rcu_node ->lock. */ struct irq_work rcu_iw; /* Check for non-irq activity. */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 1b3dd2fc0cd6..359bf1f6f8e0 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -40,7 +40,6 @@ /* * Control variables for per-CPU and per-rcu_node kthreads. */ -static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task); DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); DEFINE_PER_CPU(char, rcu_cpu_has_work); @@ -1308,9 +1307,9 @@ static void invoke_rcu_callbacks_kthread(void) local_irq_save(flags); __this_cpu_write(rcu_cpu_has_work, 1); - if (__this_cpu_read(rcu_cpu_kthread_task) != NULL && - current != __this_cpu_read(rcu_cpu_kthread_task)) { - rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task), + if (__this_cpu_read(rcu_data.rcu_cpu_kthread_task) != NULL && + current != __this_cpu_read(rcu_data.rcu_cpu_kthread_task)) { + rcu_wake_cond(__this_cpu_read(rcu_data.rcu_cpu_kthread_task), __this_cpu_read(rcu_cpu_kthread_status)); } local_irq_restore(flags); @@ -1322,7 +1321,7 @@ static void invoke_rcu_callbacks_kthread(void) */ static bool rcu_is_callbacks_kthread(void) { - return __this_cpu_read(rcu_cpu_kthread_task) == current; + return __this_cpu_read(rcu_data.rcu_cpu_kthread_task) == current; } #define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000) @@ -1459,7 +1458,7 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) } static struct smp_hotplug_thread rcu_cpu_thread_spec = { - .store = &rcu_cpu_kthread_task, + .store = &rcu_data.rcu_cpu_kthread_task, .thread_should_run = rcu_cpu_kthread_should_run, .thread_fn = rcu_cpu_kthread, .thread_comm = "rcuc/%u", -- cgit From 6ffdde28b7558ec48f4e0eee01821a66a67a8e25 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 30 Nov 2018 16:43:05 -0800 Subject: rcu: Move rcu_cpu_kthread_status to rcu_data structure Given that RCU has a perfectly good per-CPU rcu_data structure, most per-CPU quantities should be stored there. This commit therefore moves the rcu_cpu_kthread_status per-CPU variable to the rcu_data structure. This also makes this variable unconditionally present, which should be acceptable given the memory reduction due to the RCU flavor consolidation and also due to simplifications this will enable. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.h | 3 ++- kernel/rcu/tree_plugin.h | 7 +++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index ef517ba25192..047f5e9350d1 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -237,6 +237,8 @@ struct rcu_data { /* 6) RCU priority boosting. */ struct task_struct *rcu_cpu_kthread_task; /* rcuc per-CPU kthread or NULL. */ + unsigned int rcu_cpu_kthread_status; + /* Running status for rcuc. */ /* 7) Diagnostic data, including RCU CPU stall warnings. */ unsigned int softirq_snap; /* Snapshot of softirq activity. */ @@ -407,7 +409,6 @@ static const char *tp_rcu_varname __used __tracepoint_string = rcu_name; int rcu_dynticks_snap(struct rcu_data *rdp); #ifdef CONFIG_RCU_BOOST -DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status); DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu); DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); DECLARE_PER_CPU(char, rcu_cpu_has_work); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 359bf1f6f8e0..935dc594cf30 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -40,7 +40,6 @@ /* * Control variables for per-CPU and per-rcu_node kthreads. */ -DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); DEFINE_PER_CPU(char, rcu_cpu_has_work); @@ -1310,7 +1309,7 @@ static void invoke_rcu_callbacks_kthread(void) if (__this_cpu_read(rcu_data.rcu_cpu_kthread_task) != NULL && current != __this_cpu_read(rcu_data.rcu_cpu_kthread_task)) { rcu_wake_cond(__this_cpu_read(rcu_data.rcu_cpu_kthread_task), - __this_cpu_read(rcu_cpu_kthread_status)); + __this_cpu_read(rcu_data.rcu_cpu_kthread_status)); } local_irq_restore(flags); } @@ -1383,7 +1382,7 @@ static void rcu_cpu_kthread_setup(unsigned int cpu) static void rcu_cpu_kthread_park(unsigned int cpu) { - per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU; + per_cpu(rcu_data.rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU; } static int rcu_cpu_kthread_should_run(unsigned int cpu) @@ -1398,7 +1397,7 @@ static int rcu_cpu_kthread_should_run(unsigned int cpu) */ static void rcu_cpu_kthread(unsigned int cpu) { - unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status); + unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status); char work, *workp = this_cpu_ptr(&rcu_cpu_has_work); int spincnt; -- cgit From 8b4d0f4858864af6a0753740f00353035bd058de Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 30 Nov 2018 17:17:21 -0800 Subject: rcu: Remove unused rcu_cpu_kthread_loops per-CPU variable The rcu_cpu_kthread_loops variable used to provide debugfs information, but is no longer used. This commit therefore removes it. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.h | 1 - kernel/rcu/tree_plugin.h | 2 -- 2 files changed, 3 deletions(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 047f5e9350d1..e50b0a5a94bc 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -410,7 +410,6 @@ int rcu_dynticks_snap(struct rcu_data *rdp); #ifdef CONFIG_RCU_BOOST DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu); -DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); DECLARE_PER_CPU(char, rcu_cpu_has_work); #endif /* #ifdef CONFIG_RCU_BOOST */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 935dc594cf30..d1f32c63c789 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -40,7 +40,6 @@ /* * Control variables for per-CPU and per-rcu_node kthreads. */ -DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); DEFINE_PER_CPU(char, rcu_cpu_has_work); #else /* #ifdef CONFIG_RCU_BOOST */ @@ -1405,7 +1404,6 @@ static void rcu_cpu_kthread(unsigned int cpu) trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait")); local_bh_disable(); *statusp = RCU_KTHREAD_RUNNING; - this_cpu_inc(rcu_cpu_kthread_loops); local_irq_disable(); work = *workp; *workp = 0; -- cgit From f7e972ee128e0a65784b13ec1459fe35b817eed7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 30 Nov 2018 18:21:32 -0800 Subject: rcu: Move rcu_cpu_has_work to rcu_data structure Given that RCU has a perfectly good per-CPU rcu_data structure, most per-CPU quantities should be stored there. This commit therefore moves the rcu_cpu_has_work per-CPU variable to the rcu_data structure. This also makes this variable unconditionally present, which should be acceptable given the memory reduction due to the RCU flavor consolidation and also due to simplifications this will enable. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.h | 3 +-- kernel/rcu/tree_plugin.h | 15 ++++----------- 2 files changed, 5 insertions(+), 13 deletions(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index e50b0a5a94bc..7ae6774a920e 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -238,7 +238,7 @@ struct rcu_data { struct task_struct *rcu_cpu_kthread_task; /* rcuc per-CPU kthread or NULL. */ unsigned int rcu_cpu_kthread_status; - /* Running status for rcuc. */ + char rcu_cpu_has_work; /* 7) Diagnostic data, including RCU CPU stall warnings. */ unsigned int softirq_snap; /* Snapshot of softirq activity. */ @@ -410,7 +410,6 @@ int rcu_dynticks_snap(struct rcu_data *rdp); #ifdef CONFIG_RCU_BOOST DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu); -DECLARE_PER_CPU(char, rcu_cpu_has_work); #endif /* #ifdef CONFIG_RCU_BOOST */ /* Forward declarations for rcutree_plugin.h */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index d1f32c63c789..b241c4b20549 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -34,14 +34,7 @@ #include "../time/tick-internal.h" #ifdef CONFIG_RCU_BOOST - #include "../locking/rtmutex_common.h" - -/* - * Control variables for per-CPU and per-rcu_node kthreads. - */ -DEFINE_PER_CPU(char, rcu_cpu_has_work); - #else /* #ifdef CONFIG_RCU_BOOST */ /* @@ -1304,7 +1297,7 @@ static void invoke_rcu_callbacks_kthread(void) unsigned long flags; local_irq_save(flags); - __this_cpu_write(rcu_cpu_has_work, 1); + __this_cpu_write(rcu_data.rcu_cpu_has_work, 1); if (__this_cpu_read(rcu_data.rcu_cpu_kthread_task) != NULL && current != __this_cpu_read(rcu_data.rcu_cpu_kthread_task)) { rcu_wake_cond(__this_cpu_read(rcu_data.rcu_cpu_kthread_task), @@ -1386,7 +1379,7 @@ static void rcu_cpu_kthread_park(unsigned int cpu) static int rcu_cpu_kthread_should_run(unsigned int cpu) { - return __this_cpu_read(rcu_cpu_has_work); + return __this_cpu_read(rcu_data.rcu_cpu_has_work); } /* @@ -1397,7 +1390,7 @@ static int rcu_cpu_kthread_should_run(unsigned int cpu) static void rcu_cpu_kthread(unsigned int cpu) { unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status); - char work, *workp = this_cpu_ptr(&rcu_cpu_has_work); + char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work); int spincnt; for (spincnt = 0; spincnt < 10; spincnt++) { @@ -1472,7 +1465,7 @@ static void __init rcu_spawn_boost_kthreads(void) int cpu; for_each_possible_cpu(cpu) - per_cpu(rcu_cpu_has_work, cpu) = 0; + per_cpu(rcu_data.rcu_cpu_has_work, cpu) = 0; if (WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec), "%s: Could not start rcub kthread, OOM is now expected behavior\n", __func__)) return; rcu_for_each_leaf_node(rnp) -- cgit From b2c1955b88495c1531b2080ba4fad119c0a03cc1 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 30 Nov 2018 19:12:04 -0800 Subject: rcu: Remove unused rcu_cpu_kthread_cpu per-CPU variable The rcu_cpu_kthread_cpu used to provide debugfs information, but is no longer used. This commit therefore removes it. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 7ae6774a920e..008c356c7033 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -408,10 +408,6 @@ static const char *tp_rcu_varname __used __tracepoint_string = rcu_name; int rcu_dynticks_snap(struct rcu_data *rdp); -#ifdef CONFIG_RCU_BOOST -DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu); -#endif /* #ifdef CONFIG_RCU_BOOST */ - /* Forward declarations for rcutree_plugin.h */ static void rcu_bootup_announce(void); static void rcu_qs(void); -- cgit From a9fefdb257259ac2f0f5fcd916edecc5c9427635 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 3 Dec 2018 14:07:17 -0800 Subject: rcu: Update NOCB comments This commit updates a few obsolete comments in the RCU callback-offload code. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index b241c4b20549..f0019c2a2cbc 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1857,22 +1857,24 @@ static void zero_cpu_stall_ticks(struct rcu_data *rdp) /* * Offload callback processing from the boot-time-specified set of CPUs - * specified by rcu_nocb_mask. For each CPU in the set, there is a - * kthread created that pulls the callbacks from the corresponding CPU, - * waits for a grace period to elapse, and invokes the callbacks. - * The no-CBs CPUs do a wake_up() on their kthread when they insert - * a callback into any empty list, unless the rcu_nocb_poll boot parameter - * has been specified, in which case each kthread actively polls its - * CPU. (Which isn't so great for energy efficiency, but which does - * reduce RCU's overhead on that CPU.) + * specified by rcu_nocb_mask. For the CPUs in the set, there are kthreads + * created that pull the callbacks from the corresponding CPU, wait for + * a grace period to elapse, and invoke the callbacks. These kthreads + * are organized into leaders, which manage incoming callbacks, wait for + * grace periods, and awaken followers, and the followers, which only + * invoke callbacks. Each leader is its own follower. The no-CBs CPUs + * do a wake_up() on their kthread when they insert a callback into any + * empty list, unless the rcu_nocb_poll boot parameter has been specified, + * in which case each kthread actively polls its CPU. (Which isn't so great + * for energy efficiency, but which does reduce RCU's overhead on that CPU.) * * This is intended to be used in conjunction with Frederic Weisbecker's * adaptive-idle work, which would seriously reduce OS jitter on CPUs * running CPU-bound user-mode computations. * - * Offloading of callback processing could also in theory be used as - * an energy-efficiency measure because CPUs with no RCU callbacks - * queued are more aggressive about entering dyntick-idle mode. + * Offloading of callbacks can also be used as an energy-efficiency + * measure because CPUs with no RCU callbacks queued are more aggressive + * about entering dyntick-idle mode. */ @@ -1976,10 +1978,7 @@ static void wake_nocb_leader_defer(struct rcu_data *rdp, int waketype, raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags); } -/* - * Does the specified CPU need an RCU callback for this invocation - * of rcu_barrier()? - */ +/* Does rcu_barrier need to queue an RCU callback on the specified CPU? */ static bool rcu_nocb_cpu_needs_barrier(int cpu) { struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); @@ -1995,8 +1994,8 @@ static bool rcu_nocb_cpu_needs_barrier(int cpu) * callbacks would be posted. In the worst case, the first * barrier in rcu_barrier() suffices (but the caller cannot * necessarily rely on this, not a substitute for the caller - * getting the concurrency design right!). There must also be - * a barrier between the following load an posting of a callback + * getting the concurrency design right!). There must also be a + * barrier between the following load and posting of a callback * (if a callback is in fact needed). This is associated with an * atomic_inc() in the caller. */ -- cgit From fd897573fa4cfe66ebddf5f4444f36710cf0cad0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 10 Dec 2018 16:09:49 -0800 Subject: rcu: Improve diagnostics for failed RCU grace-period start If a grace period fails to start (for example, because you commented out the last two lines of rcu_accelerate_cbs_unlocked()), rcu_core() will invoke rcu_check_gp_start_stall(), which will notice and complain. However, this complaint is lacking crucial debugging information such as when the last wakeup executed and what the value of ->gp_seq was at that time. This commit therefore removes the current pr_alert() from rcu_check_gp_start_stall(), instead invoking show_rcu_gp_kthreads(), which has been updated to print the needed information, which is collected by rcu_gp_kthread_wake(). Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 55 ++++++++++++++++++++++++++++++++----------------------- kernel/rcu/tree.h | 2 ++ 2 files changed, 34 insertions(+), 23 deletions(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index b003a3cfe192..8543a90d53f2 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -512,6 +512,14 @@ void rcu_force_quiescent_state(void) } EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); +/* + * Return the root node of the rcu_state structure. + */ +static struct rcu_node *rcu_get_root(void) +{ + return &rcu_state.node[0]; +} + /* * Convert a ->gp_state value to a character string. */ @@ -529,19 +537,30 @@ void show_rcu_gp_kthreads(void) { int cpu; unsigned long j; + unsigned long ja; + unsigned long jr; + unsigned long jw; struct rcu_data *rdp; struct rcu_node *rnp; - j = jiffies - READ_ONCE(rcu_state.gp_activity); - pr_info("%s: wait state: %s(%d) ->state: %#lx delta ->gp_activity %ld\n", + j = jiffies; + ja = j - READ_ONCE(rcu_state.gp_activity); + jr = j - READ_ONCE(rcu_state.gp_req_activity); + jw = j - READ_ONCE(rcu_state.gp_wake_time); + pr_info("%s: wait state: %s(%d) ->state: %#lx delta ->gp_activity %lu ->gp_req_activity %lu ->gp_wake_time %lu ->gp_wake_seq %ld ->gp_seq %ld ->gp_seq_needed %ld ->gp_flags %#x\n", rcu_state.name, gp_state_getname(rcu_state.gp_state), - rcu_state.gp_state, rcu_state.gp_kthread->state, j); + rcu_state.gp_state, + rcu_state.gp_kthread ? rcu_state.gp_kthread->state : 0x1ffffL, + ja, jr, jw, (long)READ_ONCE(rcu_state.gp_wake_seq), + (long)READ_ONCE(rcu_state.gp_seq), + (long)READ_ONCE(rcu_get_root()->gp_seq_needed), + READ_ONCE(rcu_state.gp_flags)); rcu_for_each_node_breadth_first(rnp) { if (ULONG_CMP_GE(rcu_state.gp_seq, rnp->gp_seq_needed)) continue; - pr_info("\trcu_node %d:%d ->gp_seq %lu ->gp_seq_needed %lu\n", - rnp->grplo, rnp->grphi, rnp->gp_seq, - rnp->gp_seq_needed); + pr_info("\trcu_node %d:%d ->gp_seq %ld ->gp_seq_needed %ld\n", + rnp->grplo, rnp->grphi, (long)rnp->gp_seq, + (long)rnp->gp_seq_needed); if (!rcu_is_leaf_node(rnp)) continue; for_each_leaf_node_possible_cpu(rnp, cpu) { @@ -550,8 +569,8 @@ void show_rcu_gp_kthreads(void) ULONG_CMP_GE(rcu_state.gp_seq, rdp->gp_seq_needed)) continue; - pr_info("\tcpu %d ->gp_seq_needed %lu\n", - cpu, rdp->gp_seq_needed); + pr_info("\tcpu %d ->gp_seq_needed %ld\n", + cpu, (long)rdp->gp_seq_needed); } } /* sched_show_task(rcu_state.gp_kthread); */ @@ -577,14 +596,6 @@ void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags, } EXPORT_SYMBOL_GPL(rcutorture_get_gp_data); -/* - * Return the root node of the rcu_state structure. - */ -static struct rcu_node *rcu_get_root(void) -{ - return &rcu_state.node[0]; -} - /* * Enter an RCU extended quiescent state, which can be either the * idle loop or adaptive-tickless usermode execution. @@ -1560,7 +1571,8 @@ static bool rcu_future_gp_cleanup(struct rcu_node *rnp) * Awaken the grace-period kthread. Don't do a self-awaken, and don't * bother awakening when there is nothing for the grace-period kthread * to do (as in several CPUs raced to awaken, and we lost), and finally - * don't try to awaken a kthread that has not yet been created. + * don't try to awaken a kthread that has not yet been created. If + * all those checks are passed, track some debug information and awaken. */ static void rcu_gp_kthread_wake(void) { @@ -1568,6 +1580,8 @@ static void rcu_gp_kthread_wake(void) !READ_ONCE(rcu_state.gp_flags) || !rcu_state.gp_kthread) return; + WRITE_ONCE(rcu_state.gp_wake_time, jiffies); + WRITE_ONCE(rcu_state.gp_wake_seq, READ_ONCE(rcu_state.gp_seq)); swake_up_one(&rcu_state.gp_wq); } @@ -2657,16 +2671,11 @@ rcu_check_gp_start_stall(struct rcu_node *rnp, struct rcu_data *rdp, raw_spin_unlock_irqrestore_rcu_node(rnp, flags); return; } - pr_alert("%s: g%ld->%ld gar:%lu ga:%lu f%#x gs:%d %s->state:%#lx\n", - __func__, (long)READ_ONCE(rcu_state.gp_seq), - (long)READ_ONCE(rnp_root->gp_seq_needed), - j - rcu_state.gp_req_activity, j - rcu_state.gp_activity, - rcu_state.gp_flags, rcu_state.gp_state, rcu_state.name, - rcu_state.gp_kthread ? rcu_state.gp_kthread->state : 0x1ffffL); WARN_ON(1); if (rnp_root != rnp) raw_spin_unlock_rcu_node(rnp_root); raw_spin_unlock_irqrestore_rcu_node(rnp, flags); + show_rcu_gp_kthreads(); } /* diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 008c356c7033..1f2ada7ef7d7 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -309,6 +309,8 @@ struct rcu_state { struct swait_queue_head gp_wq; /* Where GP task waits. */ short gp_flags; /* Commands for GP task. */ short gp_state; /* GP kthread sleep state. */ + unsigned long gp_wake_time; /* Last GP kthread wake. */ + unsigned long gp_wake_seq; /* ->gp_seq at ^^^. */ /* End of fields guarded by root rcu_node's lock. */ -- cgit From 3b6505fd8eb86e3ef5ce12b34fe81e9edeb84475 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 12 Dec 2018 07:20:07 -0800 Subject: rcu: Protect rcu_check_gp_kthread_starvation() access to ->gp_flags The rcu_check_gp_kthread_starvation() function can be invoked without holding locks, so the access to the rcu_state structure's ->gp_flags field must be protected with READ_ONCE(). This commit therefore adds this protection. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 8543a90d53f2..238150684fed 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1192,7 +1192,7 @@ static void rcu_check_gp_kthread_starvation(void) pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx ->cpu=%d\n", rcu_state.name, j, (long)rcu_seq_current(&rcu_state.gp_seq), - rcu_state.gp_flags, + READ_ONCE(rcu_state.gp_flags), gp_state_getname(rcu_state.gp_state), rcu_state.gp_state, gpk ? gpk->state : ~0, gpk ? task_cpu(gpk) : -1); if (gpk) { -- cgit From 2ccaff10f71307ad4f75ce673b815d3fe6254e3d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 12 Dec 2018 12:32:06 -0800 Subject: rcu: Add sysrq rcu_node-dump capability Life is hard if RCU manages to get stuck without triggering RCU CPU stall warnings or triggering the rcu_check_gp_start_stall() checks for failing to start a grace period. This commit therefore adds a boot-time-selectable sysrq key (commandeering "y") that allows manually dumping Tree RCU state. The new rcutree.sysrq_rcu kernel boot parameter must be set for this sysrq to be available. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 238150684fed..9ceb93f848cd 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -62,6 +62,7 @@ #include #include #include +#include #include "tree.h" #include "rcu.h" @@ -115,6 +116,9 @@ int num_rcu_lvl[] = NUM_RCU_LVL_INIT; int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */ /* panic() on RCU Stall sysctl. */ int sysctl_panic_on_rcu_stall __read_mostly; +/* Commandeer a sysrq key to dump RCU's tree. */ +static bool sysrq_rcu; +module_param(sysrq_rcu, bool, 0444); /* * The rcu_scheduler_active variable is initialized to the value @@ -577,6 +581,27 @@ void show_rcu_gp_kthreads(void) } EXPORT_SYMBOL_GPL(show_rcu_gp_kthreads); +/* Dump grace-period-request information due to commandeered sysrq. */ +static void sysrq_show_rcu(int key) +{ + show_rcu_gp_kthreads(); +} + +static struct sysrq_key_op sysrq_rcudump_op = { + .handler = sysrq_show_rcu, + .help_msg = "show-rcu(y)", + .action_msg = "Show RCU tree", + .enable_mask = SYSRQ_ENABLE_DUMP, +}; + +static int __init rcu_sysrq_init(void) +{ + if (sysrq_rcu) + return register_sysrq_key('y', &sysrq_rcudump_op); + return 0; +} +early_initcall(rcu_sysrq_init); + /* * Send along grace-period-related data for rcutorture diagnostics. */ -- cgit From 1d1f898df6586c5ea9aeaf349f13089c6fa37903 Mon Sep 17 00:00:00 2001 From: "Zhang, Jun" Date: Tue, 18 Dec 2018 06:55:01 -0800 Subject: rcu: Do RCU GP kthread self-wakeup from softirq and interrupt The rcu_gp_kthread_wake() function is invoked when it might be necessary to wake the RCU grace-period kthread. Because self-wakeups are normally a useless waste of CPU cycles, if rcu_gp_kthread_wake() is invoked from this kthread, it naturally refuses to do the wakeup. Unfortunately, natural though it might be, this heuristic fails when rcu_gp_kthread_wake() is invoked from an interrupt or softirq handler that interrupted the grace-period kthread just after the final check of the wait-event condition but just before the schedule() call. In this case, a wakeup is required, even though the call to rcu_gp_kthread_wake() is within the RCU grace-period kthread's context. Failing to provide this wakeup can result in grace periods failing to start, which in turn results in out-of-memory conditions. This race window is quite narrow, but it actually did happen during real testing. It would of course need to be fixed even if it was strictly theoretical in nature. This patch does not Cc stable because it does not apply cleanly to earlier kernel versions. Fixes: 48a7639ce80c ("rcu: Make callers awaken grace-period kthread") Reported-by: "He, Bo" Co-developed-by: "Zhang, Jun" Co-developed-by: "He, Bo" Co-developed-by: "xiao, jin" Co-developed-by: Bai, Jie A Signed-off: "Zhang, Jun" Signed-off: "He, Bo" Signed-off: "xiao, jin" Signed-off: Bai, Jie A Signed-off-by: "Zhang, Jun" [ paulmck: Switch from !in_softirq() to "!in_interrupt() && !in_serving_softirq() to avoid redundant wakeups and to also handle the interrupt-handler scenario as well as the softirq-handler scenario that actually occurred in testing. ] Signed-off-by: Paul E. McKenney Link: https://lkml.kernel.org/r/CD6925E8781EFD4D8E11882D20FC406D52A11F61@SHSMSX104.ccr.corp.intel.com --- kernel/rcu/tree.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 9ceb93f848cd..21775eebb8f0 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1593,15 +1593,23 @@ static bool rcu_future_gp_cleanup(struct rcu_node *rnp) } /* - * Awaken the grace-period kthread. Don't do a self-awaken, and don't - * bother awakening when there is nothing for the grace-period kthread - * to do (as in several CPUs raced to awaken, and we lost), and finally - * don't try to awaken a kthread that has not yet been created. If - * all those checks are passed, track some debug information and awaken. + * Awaken the grace-period kthread. Don't do a self-awaken (unless in + * an interrupt or softirq handler), and don't bother awakening when there + * is nothing for the grace-period kthread to do (as in several CPUs raced + * to awaken, and we lost), and finally don't try to awaken a kthread that + * has not yet been created. If all those checks are passed, track some + * debug information and awaken. + * + * So why do the self-wakeup when in an interrupt or softirq handler + * in the grace-period kthread's context? Because the kthread might have + * been interrupted just as it was going to sleep, and just after the final + * pre-sleep check of the awaken condition. In this case, a wakeup really + * is required, and is therefore supplied. */ static void rcu_gp_kthread_wake(void) { - if (current == rcu_state.gp_kthread || + if ((current == rcu_state.gp_kthread && + !in_interrupt() && !in_serving_softirq()) || !READ_ONCE(rcu_state.gp_flags) || !rcu_state.gp_kthread) return; -- cgit From 13dc7d0c7a2ed438f0ec8e9fb365a1256d87cf87 Mon Sep 17 00:00:00 2001 From: "Zhang, Jun" Date: Wed, 19 Dec 2018 10:37:34 -0800 Subject: rcu: Prevent needless ->gp_seq_needed update in __note_gp_changes() Currently, __note_gp_changes() checks to see if the rcu_node structure's ->gp_seq_needed is greater than or equal to that of the rcu_data structure, and if so, updates the rcu_data structure's ->gp_seq_needed field. This results in a useless store in the case where the two fields are equal. This commit therefore carries out this store only in the case where the rcu_node structure's ->gp_seq_needed is strictly greater than that of the rcu_data structure. Signed-off-by: "Zhang, Jun" Signed-off-by: Paul E. McKenney Link: https://lkml.kernel.org/r/88DC34334CA3444C85D647DBFA962C2735AD5F77@SHSMSX104.ccr.corp.intel.com --- kernel/rcu/tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 21775eebb8f0..9d0e2ac9356e 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1758,7 +1758,7 @@ static bool __note_gp_changes(struct rcu_node *rnp, struct rcu_data *rdp) zero_cpu_stall_ticks(rdp); } rdp->gp_seq = rnp->gp_seq; /* Remember new grace-period state. */ - if (ULONG_CMP_GE(rnp->gp_seq_needed, rdp->gp_seq_needed) || rdp->gpwrap) + if (ULONG_CMP_LT(rdp->gp_seq_needed, rnp->gp_seq_needed) || rdp->gpwrap) rdp->gp_seq_needed = rnp->gp_seq_needed; WRITE_ONCE(rdp->gpwrap, false); rcu_gpnum_ovf(rnp, rdp); -- cgit