rcu/nocb: Add bypass callback queueing

Use of the rcu_data structure's segmented ->cblist for no-CBs CPUs takes advantage of unrelated grace periods, thus reducing the memory footprint in the face of floods of call_rcu() invocations. However, the ->cblist field is a more-complex rcu_segcblist structure which must be protected via locking. Even though there are only three entities which can acquire this lock (the CPU invoking call_rcu(), the no-CBs grace-period kthread, and the no-CBs callbacks kthread), the contention on this lock is excessive under heavy stress. This commit therefore greatly reduces contention by provisioning an rcu_cblist structure field named ->nocb_bypass within the rcu_data structure. Each no-CBs CPU is permitted only a limited number of enqueues onto the ->cblist per jiffy, controlled by a new nocb_nobypass_lim_per_jiffy kernel boot parameter that defaults to about 16 enqueues per millisecond (16 * 1000 / HZ). When that limit is exceeded, the CPU instead enqueues onto the new ->nocb_bypass. The ->nocb_bypass is flushed into the ->cblist every jiffy or when the number of callbacks on ->nocb_bypass exceeds qhimark, whichever happens first. During call_rcu() floods, this flushing is carried out by the CPU during the course of its call_rcu() invocations. However, a CPU could simply stop invoking call_rcu() at any time. The no-CBs grace-period kthread therefore carries out less-aggressive flushing (every few jiffies or when the number of callbacks on ->nocb_bypass exceeds (2 * qhimark), whichever comes first). This means that the no-CBs grace-period kthread cannot be permitted to do unbounded waits while there are callbacks on ->nocb_bypass. A ->nocb_bypass_timer is used to provide the needed wakeups. [ paulmck: Apply Coverity feedback reported by Colin Ian King. ] Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
author: Paul E. McKenney <paulmck@linux.ibm.com> 2019-07-02 16:03:33 -0700
committer: Paul E. McKenney <paulmck@linux.ibm.com> 2019-08-13 14:37:32 -0700
commit: d1b222c6be1f8bfc77099e034219732ecaeaaf96 (patch)
tree: e9da6ce477c530ed6584bd922484ad992f994cda /kernel/rcu/tree.h
parent: eda669a6a2c517fd6db41d0fe3c95c1b749c60bd (diff)
1 files changed, 18 insertions, 10 deletions
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 2c3e9068671c..e4df86db8137 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -200,18 +200,26 @@ struct rcu_data {
 	atomic_t nocb_lock_contended;	/* Contention experienced. */
 	int nocb_defer_wakeup;		/* Defer wakeup of nocb_kthread. */
 	struct timer_list nocb_timer;	/* Enforce finite deferral. */
+	unsigned long nocb_gp_adv_time;	/* Last call_rcu() CB adv (jiffies). */
+
+	/* The following fields are used by call_rcu, hence own cacheline. */
+	raw_spinlock_t nocb_bypass_lock ____cacheline_internodealigned_in_smp;
+	struct rcu_cblist nocb_bypass;	/* Lock-contention-bypass CB list. */
+	unsigned long nocb_bypass_first; /* Time (jiffies) of first enqueue. */
+	unsigned long nocb_nobypass_last; /* Last ->cblist enqueue (jiffies). */
+	int nocb_nobypass_count;	/* # ->cblist enqueues at ^^^ time. */
 
 	/* The following fields are used by GP kthread, hence own cacheline. */
 	raw_spinlock_t nocb_gp_lock ____cacheline_internodealigned_in_smp;
-	bool nocb_gp_sleep;
-					/* Is the nocb GP thread asleep? */
+	struct timer_list nocb_bypass_timer; /* Force nocb_bypass flush. */
+	bool nocb_gp_sleep;		/* Is the nocb GP thread asleep? */
 	struct swait_queue_head nocb_gp_wq; /* For nocb kthreads to sleep on. */
 	bool nocb_cb_sleep;		/* Is the nocb CB thread asleep? */
 	struct task_struct *nocb_cb_kthread;
 	struct rcu_data *nocb_next_cb_rdp;
 					/* Next rcu_data in wakeup chain. */
 
-	/* The following fields are used by CB kthread, hence new cachline. */
+	/* The following fields are used by CB kthread, hence new cacheline. */
 	struct rcu_data *nocb_gp_rdp ____cacheline_internodealigned_in_smp;
 					/* GP rdp takes GP-end wakeups. */
 #endif /* #ifdef CONFIG_RCU_NOCB_CPU */
@@ -419,6 +427,10 @@ static void zero_cpu_stall_ticks(struct rcu_data *rdp);
 static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp);
 static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq);
 static void rcu_init_one_nocb(struct rcu_node *rnp);
+static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
+				  unsigned long j);
+static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
+				bool *was_alldone, unsigned long flags);
 static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty,
 				 unsigned long flags);
 static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp);
@@ -430,19 +442,15 @@ static void rcu_nocb_lock(struct rcu_data *rdp);
 static void rcu_nocb_unlock(struct rcu_data *rdp);
 static void rcu_nocb_unlock_irqrestore(struct rcu_data *rdp,
 				       unsigned long flags);
+static void rcu_lockdep_assert_cblist_protected(struct rcu_data *rdp);
 #ifdef CONFIG_RCU_NOCB_CPU
 static void __init rcu_organize_nocb_kthreads(void);
 #define rcu_nocb_lock_irqsave(rdp, flags)				\
 do {									\
-	if (!rcu_segcblist_is_offloaded(&(rdp)->cblist)) {		\
+	if (!rcu_segcblist_is_offloaded(&(rdp)->cblist))		\
 		local_irq_save(flags);					\
-	} else if (!raw_spin_trylock_irqsave(&(rdp)->nocb_lock, (flags))) {\
-		atomic_inc(&(rdp)->nocb_lock_contended);		\
-		smp_mb__after_atomic(); /* atomic_inc() before lock. */	\
+	else								\
 		raw_spin_lock_irqsave(&(rdp)->nocb_lock, (flags));	\
-		smp_mb__before_atomic(); /* atomic_dec() after lock. */	\
-		atomic_dec(&(rdp)->nocb_lock_contended);		\
-	}								\
 } while (0)
 #else /* #ifdef CONFIG_RCU_NOCB_CPU */
 #define rcu_nocb_lock_irqsave(rdp, flags) local_irq_save(flags)
author	Paul E. McKenney <paulmck@linux.ibm.com>	2019-07-02 16:03:33 -0700
committer	Paul E. McKenney <paulmck@linux.ibm.com>	2019-08-13 14:37:32 -0700
commit	d1b222c6be1f8bfc77099e034219732ecaeaaf96 (patch)
tree	e9da6ce477c530ed6584bd922484ad992f994cda /kernel/rcu/tree.h
parent	eda669a6a2c517fd6db41d0fe3c95c1b749c60bd (diff)