diff options
Diffstat (limited to 'kernel/rcu/tree.c')
| -rw-r--r-- | kernel/rcu/tree.c | 364 | 
1 files changed, 135 insertions, 229 deletions
| diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 2a734692a581..aa7cade1b9f3 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -524,8 +524,6 @@ module_param(rcu_kick_kthreads, bool, 0644);  static ulong jiffies_till_sched_qs = HZ / 10;  module_param(jiffies_till_sched_qs, ulong, 0444); -static bool rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, -				  struct rcu_data *rdp);  static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *rsp));  static void force_quiescent_state(struct rcu_state *rsp);  static int rcu_pending(void); @@ -711,44 +709,6 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp)  }  /* - * Is there any need for future grace periods? - * Interrupts must be disabled.  If the caller does not hold the root - * rnp_node structure's ->lock, the results are advisory only. - */ -static int rcu_future_needs_gp(struct rcu_state *rsp) -{ -	struct rcu_node *rnp = rcu_get_root(rsp); -	int idx = (READ_ONCE(rnp->completed) + 1) & 0x1; -	int *fp = &rnp->need_future_gp[idx]; - -	lockdep_assert_irqs_disabled(); -	return READ_ONCE(*fp); -} - -/* - * Does the current CPU require a not-yet-started grace period? - * The caller must have disabled interrupts to prevent races with - * normal callback registry. - */ -static bool -cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp) -{ -	lockdep_assert_irqs_disabled(); -	if (rcu_gp_in_progress(rsp)) -		return false;  /* No, a grace period is already in progress. */ -	if (rcu_future_needs_gp(rsp)) -		return true;  /* Yes, a no-CBs CPU needs one. */ -	if (!rcu_segcblist_is_enabled(&rdp->cblist)) -		return false;  /* No, this is a no-CBs (or offline) CPU. */ -	if (!rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL)) -		return true;  /* Yes, CPU has newly registered callbacks. */ -	if (rcu_segcblist_future_gp_needed(&rdp->cblist, -					   READ_ONCE(rsp->completed))) -		return true;  /* Yes, CBs for future grace period. */ -	return false; /* No grace period needed. */ -} - -/*   * Enter an RCU extended quiescent state, which can be either the   * idle loop or adaptive-tickless usermode execution.   * @@ -1234,10 +1194,10 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)  	}  	/* -	 * Has this CPU encountered a cond_resched_rcu_qs() since the -	 * beginning of the grace period?  For this to be the case, -	 * the CPU has to have noticed the current grace period.  This -	 * might not be the case for nohz_full CPUs looping in the kernel. +	 * Has this CPU encountered a cond_resched() since the beginning +	 * of the grace period?  For this to be the case, the CPU has to +	 * have noticed the current grace period.  This might not be the +	 * case for nohz_full CPUs looping in the kernel.  	 */  	jtsq = jiffies_till_sched_qs;  	ruqp = per_cpu_ptr(&rcu_dynticks.rcu_urgent_qs, rdp->cpu); @@ -1642,18 +1602,30 @@ static unsigned long rcu_cbs_completed(struct rcu_state *rsp,  		return rnp->completed + 1;  	/* +	 * If the current rcu_node structure believes that RCU is +	 * idle, and if the rcu_state structure does not yet reflect +	 * the start of a new grace period, then the next grace period +	 * will suffice.  The memory barrier is needed to accurately +	 * sample the rsp->gpnum, and pairs with the second lock +	 * acquisition in rcu_gp_init(), which is augmented with +	 * smp_mb__after_unlock_lock() for this purpose. +	 */ +	if (rnp->gpnum == rnp->completed) { +		smp_mb(); /* See above block comment. */ +		if (READ_ONCE(rsp->gpnum) == rnp->completed) +			return rnp->completed + 1; +	} + +	/*  	 * Otherwise, wait for a possible partial grace period and  	 * then the subsequent full grace period.  	 */  	return rnp->completed + 2;  } -/* - * Trace-event helper function for rcu_start_future_gp() and - * rcu_nocb_wait_gp(). - */ -static void trace_rcu_future_gp(struct rcu_node *rnp, struct rcu_data *rdp, -				unsigned long c, const char *s) +/* Trace-event wrapper function for trace_rcu_future_grace_period.  */ +static void trace_rcu_this_gp(struct rcu_node *rnp, struct rcu_data *rdp, +			      unsigned long c, const char *s)  {  	trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum,  				      rnp->completed, c, rnp->level, @@ -1661,96 +1633,67 @@ static void trace_rcu_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,  }  /* - * Start some future grace period, as needed to handle newly arrived + * Start the specified grace period, as needed to handle newly arrived   * callbacks.  The required future grace periods are recorded in each - * rcu_node structure's ->need_future_gp field.  Returns true if there + * rcu_node structure's ->need_future_gp[] field.  Returns true if there   * is reason to awaken the grace-period kthread.   * - * The caller must hold the specified rcu_node structure's ->lock. + * The caller must hold the specified rcu_node structure's ->lock, which + * is why the caller is responsible for waking the grace-period kthread.   */ -static bool __maybe_unused -rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp, -		    unsigned long *c_out) +static bool rcu_start_this_gp(struct rcu_node *rnp, struct rcu_data *rdp, +			      unsigned long c)  { -	unsigned long c;  	bool ret = false; -	struct rcu_node *rnp_root = rcu_get_root(rdp->rsp); - -	raw_lockdep_assert_held_rcu_node(rnp); - -	/* -	 * Pick up grace-period number for new callbacks.  If this -	 * grace period is already marked as needed, return to the caller. -	 */ -	c = rcu_cbs_completed(rdp->rsp, rnp); -	trace_rcu_future_gp(rnp, rdp, c, TPS("Startleaf")); -	if (rnp->need_future_gp[c & 0x1]) { -		trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartleaf")); -		goto out; -	} +	struct rcu_state *rsp = rdp->rsp; +	struct rcu_node *rnp_root;  	/* -	 * If either this rcu_node structure or the root rcu_node structure -	 * believe that a grace period is in progress, then we must wait -	 * for the one following, which is in "c".  Because our request -	 * will be noticed at the end of the current grace period, we don't -	 * need to explicitly start one.  We only do the lockless check -	 * of rnp_root's fields if the current rcu_node structure thinks -	 * there is no grace period in flight, and because we hold rnp->lock, -	 * the only possible change is when rnp_root's two fields are -	 * equal, in which case rnp_root->gpnum might be concurrently -	 * incremented.  But that is OK, as it will just result in our -	 * doing some extra useless work. +	 * Use funnel locking to either acquire the root rcu_node +	 * structure's lock or bail out if the need for this grace period +	 * has already been recorded -- or has already started.  If there +	 * is already a grace period in progress in a non-leaf node, no +	 * recording is needed because the end of the grace period will +	 * scan the leaf rcu_node structures.  Note that rnp->lock must +	 * not be released.  	 */ -	if (rnp->gpnum != rnp->completed || -	    READ_ONCE(rnp_root->gpnum) != READ_ONCE(rnp_root->completed)) { -		rnp->need_future_gp[c & 0x1]++; -		trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleaf")); -		goto out; +	raw_lockdep_assert_held_rcu_node(rnp); +	trace_rcu_this_gp(rnp, rdp, c, TPS("Startleaf")); +	for (rnp_root = rnp; 1; rnp_root = rnp_root->parent) { +		if (rnp_root != rnp) +			raw_spin_lock_rcu_node(rnp_root); +		WARN_ON_ONCE(ULONG_CMP_LT(rnp_root->gpnum + +					  need_future_gp_mask(), c)); +		if (need_future_gp_element(rnp_root, c) || +		    ULONG_CMP_GE(rnp_root->gpnum, c) || +		    (rnp != rnp_root && +		     rnp_root->gpnum != rnp_root->completed)) { +			trace_rcu_this_gp(rnp_root, rdp, c, TPS("Prestarted")); +			goto unlock_out; +		} +		need_future_gp_element(rnp_root, c) = true; +		if (rnp_root != rnp && rnp_root->parent != NULL) +			raw_spin_unlock_rcu_node(rnp_root); +		if (!rnp_root->parent) +			break;  /* At root, and perhaps also leaf. */  	} -	/* -	 * There might be no grace period in progress.  If we don't already -	 * hold it, acquire the root rcu_node structure's lock in order to -	 * start one (if needed). -	 */ -	if (rnp != rnp_root) -		raw_spin_lock_rcu_node(rnp_root); - -	/* -	 * Get a new grace-period number.  If there really is no grace -	 * period in progress, it will be smaller than the one we obtained -	 * earlier.  Adjust callbacks as needed. -	 */ -	c = rcu_cbs_completed(rdp->rsp, rnp_root); -	if (!rcu_is_nocb_cpu(rdp->cpu)) -		(void)rcu_segcblist_accelerate(&rdp->cblist, c); - -	/* -	 * If the needed for the required grace period is already -	 * recorded, trace and leave. -	 */ -	if (rnp_root->need_future_gp[c & 0x1]) { -		trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartedroot")); +	/* If GP already in progress, just leave, otherwise start one. */ +	if (rnp_root->gpnum != rnp_root->completed) { +		trace_rcu_this_gp(rnp_root, rdp, c, TPS("Startedleafroot"));  		goto unlock_out;  	} - -	/* Record the need for the future grace period. */ -	rnp_root->need_future_gp[c & 0x1]++; - -	/* If a grace period is not already in progress, start one. */ -	if (rnp_root->gpnum != rnp_root->completed) { -		trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleafroot")); -	} else { -		trace_rcu_future_gp(rnp, rdp, c, TPS("Startedroot")); -		ret = rcu_start_gp_advanced(rdp->rsp, rnp_root, rdp); +	trace_rcu_this_gp(rnp_root, rdp, c, TPS("Startedroot")); +	WRITE_ONCE(rsp->gp_flags, rsp->gp_flags | RCU_GP_FLAG_INIT); +	if (!rsp->gp_kthread) { +		trace_rcu_this_gp(rnp_root, rdp, c, TPS("NoGPkthread")); +		goto unlock_out;  	} +	trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gpnum), TPS("newreq")); +	ret = true;  /* Caller must wake GP kthread. */  unlock_out:  	if (rnp != rnp_root)  		raw_spin_unlock_rcu_node(rnp_root); -out: -	if (c_out != NULL) -		*c_out = c;  	return ret;  } @@ -1758,16 +1701,16 @@ out:   * Clean up any old requests for the just-ended grace period.  Also return   * whether any additional grace periods have been requested.   */ -static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) +static bool rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)  { -	int c = rnp->completed; -	int needmore; +	unsigned long c = rnp->completed; +	bool needmore;  	struct rcu_data *rdp = this_cpu_ptr(rsp->rda); -	rnp->need_future_gp[c & 0x1] = 0; -	needmore = rnp->need_future_gp[(c + 1) & 0x1]; -	trace_rcu_future_gp(rnp, rdp, c, -			    needmore ? TPS("CleanupMore") : TPS("Cleanup")); +	need_future_gp_element(rnp, c) = false; +	needmore = need_any_future_gp(rnp); +	trace_rcu_this_gp(rnp, rdp, c, +			  needmore ? TPS("CleanupMore") : TPS("Cleanup"));  	return needmore;  } @@ -1802,6 +1745,7 @@ static void rcu_gp_kthread_wake(struct rcu_state *rsp)  static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,  			       struct rcu_data *rdp)  { +	unsigned long c;  	bool ret = false;  	raw_lockdep_assert_held_rcu_node(rnp); @@ -1820,8 +1764,9 @@ static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,  	 * accelerating callback invocation to an earlier grace-period  	 * number.  	 */ -	if (rcu_segcblist_accelerate(&rdp->cblist, rcu_cbs_completed(rsp, rnp))) -		ret = rcu_start_future_gp(rnp, rdp, NULL); +	c = rcu_cbs_completed(rsp, rnp); +	if (rcu_segcblist_accelerate(&rdp->cblist, c)) +		ret = rcu_start_this_gp(rnp, rdp, c);  	/* Trace depending on how much we were able to accelerate. */  	if (rcu_segcblist_restempty(&rdp->cblist, RCU_WAIT_TAIL)) @@ -2049,7 +1994,7 @@ static bool rcu_gp_init(struct rcu_state *rsp)  					    rnp->level, rnp->grplo,  					    rnp->grphi, rnp->qsmask);  		raw_spin_unlock_irq_rcu_node(rnp); -		cond_resched_rcu_qs(); +		cond_resched_tasks_rcu_qs();  		WRITE_ONCE(rsp->gp_activity, jiffies);  	} @@ -2108,7 +2053,6 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)  {  	unsigned long gp_duration;  	bool needgp = false; -	int nocb = 0;  	struct rcu_data *rdp;  	struct rcu_node *rnp = rcu_get_root(rsp);  	struct swait_queue_head *sq; @@ -2147,31 +2091,35 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)  		if (rnp == rdp->mynode)  			needgp = __note_gp_changes(rsp, rnp, rdp) || needgp;  		/* smp_mb() provided by prior unlock-lock pair. */ -		nocb += rcu_future_gp_cleanup(rsp, rnp); +		needgp = rcu_future_gp_cleanup(rsp, rnp) || needgp;  		sq = rcu_nocb_gp_get(rnp);  		raw_spin_unlock_irq_rcu_node(rnp);  		rcu_nocb_gp_cleanup(sq); -		cond_resched_rcu_qs(); +		cond_resched_tasks_rcu_qs();  		WRITE_ONCE(rsp->gp_activity, jiffies);  		rcu_gp_slow(rsp, gp_cleanup_delay);  	}  	rnp = rcu_get_root(rsp);  	raw_spin_lock_irq_rcu_node(rnp); /* Order GP before ->completed update. */ -	rcu_nocb_gp_set(rnp, nocb);  	/* Declare grace period done. */  	WRITE_ONCE(rsp->completed, rsp->gpnum);  	trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end"));  	rsp->gp_state = RCU_GP_IDLE; +	/* Check for GP requests since above loop. */  	rdp = this_cpu_ptr(rsp->rda); +	if (need_any_future_gp(rnp)) { +		trace_rcu_this_gp(rnp, rdp, rsp->completed - 1, +				  TPS("CleanupMore")); +		needgp = true; +	}  	/* Advance CBs to reduce false positives below. */ -	needgp = rcu_advance_cbs(rsp, rnp, rdp) || needgp; -	if (needgp || cpu_needs_another_gp(rsp, rdp)) { +	if (!rcu_accelerate_cbs(rsp, rnp, rdp) && needgp) {  		WRITE_ONCE(rsp->gp_flags, RCU_GP_FLAG_INIT); -		trace_rcu_grace_period(rsp->name, -				       READ_ONCE(rsp->gpnum), +		trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gpnum),  				       TPS("newreq"));  	} +	WRITE_ONCE(rsp->gp_flags, rsp->gp_flags & RCU_GP_FLAG_INIT);  	raw_spin_unlock_irq_rcu_node(rnp);  } @@ -2202,7 +2150,7 @@ static int __noreturn rcu_gp_kthread(void *arg)  			/* Locking provides needed memory barrier. */  			if (rcu_gp_init(rsp))  				break; -			cond_resched_rcu_qs(); +			cond_resched_tasks_rcu_qs();  			WRITE_ONCE(rsp->gp_activity, jiffies);  			WARN_ON(signal_pending(current));  			trace_rcu_grace_period(rsp->name, @@ -2247,7 +2195,7 @@ static int __noreturn rcu_gp_kthread(void *arg)  				trace_rcu_grace_period(rsp->name,  						       READ_ONCE(rsp->gpnum),  						       TPS("fqsend")); -				cond_resched_rcu_qs(); +				cond_resched_tasks_rcu_qs();  				WRITE_ONCE(rsp->gp_activity, jiffies);  				ret = 0; /* Force full wait till next FQS. */  				j = jiffies_till_next_fqs; @@ -2260,7 +2208,7 @@ static int __noreturn rcu_gp_kthread(void *arg)  				}  			} else {  				/* Deal with stray signal. */ -				cond_resched_rcu_qs(); +				cond_resched_tasks_rcu_qs();  				WRITE_ONCE(rsp->gp_activity, jiffies);  				WARN_ON(signal_pending(current));  				trace_rcu_grace_period(rsp->name, @@ -2283,71 +2231,6 @@ static int __noreturn rcu_gp_kthread(void *arg)  }  /* - * Start a new RCU grace period if warranted, re-initializing the hierarchy - * in preparation for detecting the next grace period.  The caller must hold - * the root node's ->lock and hard irqs must be disabled. - * - * Note that it is legal for a dying CPU (which is marked as offline) to - * invoke this function.  This can happen when the dying CPU reports its - * quiescent state. - * - * Returns true if the grace-period kthread must be awakened. - */ -static bool -rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, -		      struct rcu_data *rdp) -{ -	raw_lockdep_assert_held_rcu_node(rnp); -	if (!rsp->gp_kthread || !cpu_needs_another_gp(rsp, rdp)) { -		/* -		 * Either we have not yet spawned the grace-period -		 * task, this CPU does not need another grace period, -		 * or a grace period is already in progress. -		 * Either way, don't start a new grace period. -		 */ -		return false; -	} -	WRITE_ONCE(rsp->gp_flags, RCU_GP_FLAG_INIT); -	trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gpnum), -			       TPS("newreq")); - -	/* -	 * We can't do wakeups while holding the rnp->lock, as that -	 * could cause possible deadlocks with the rq->lock. Defer -	 * the wakeup to our caller. -	 */ -	return true; -} - -/* - * Similar to rcu_start_gp_advanced(), but also advance the calling CPU's - * callbacks.  Note that rcu_start_gp_advanced() cannot do this because it - * is invoked indirectly from rcu_advance_cbs(), which would result in - * endless recursion -- or would do so if it wasn't for the self-deadlock - * that is encountered beforehand. - * - * Returns true if the grace-period kthread needs to be awakened. - */ -static bool rcu_start_gp(struct rcu_state *rsp) -{ -	struct rcu_data *rdp = this_cpu_ptr(rsp->rda); -	struct rcu_node *rnp = rcu_get_root(rsp); -	bool ret = false; - -	/* -	 * If there is no grace period in progress right now, any -	 * callbacks we have up to this point will be satisfied by the -	 * next grace period.  Also, advancing the callbacks reduces the -	 * probability of false positives from cpu_needs_another_gp() -	 * resulting in pointless grace periods.  So, advance callbacks -	 * then start the grace period! -	 */ -	ret = rcu_advance_cbs(rsp, rnp, rdp) || ret; -	ret = rcu_start_gp_advanced(rsp, rnp, rdp) || ret; -	return ret; -} - -/*   * Report a full set of quiescent states to the specified rcu_state data   * structure.  Invoke rcu_gp_kthread_wake() to awaken the grace-period   * kthread if another grace period is required.  Whether we wake @@ -2398,7 +2281,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,  			return;  		}  		WARN_ON_ONCE(oldmask); /* Any child must be all zeroed! */ -		WARN_ON_ONCE(rnp->level != rcu_num_lvls - 1 && +		WARN_ON_ONCE(!rcu_is_leaf_node(rnp) &&  			     rcu_preempt_blocked_readers_cgp(rnp));  		rnp->qsmask &= ~mask;  		trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum, @@ -2782,7 +2665,7 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *rsp))  	struct rcu_node *rnp;  	rcu_for_each_leaf_node(rsp, rnp) { -		cond_resched_rcu_qs(); +		cond_resched_tasks_rcu_qs();  		mask = 0;  		raw_spin_lock_irqsave_rcu_node(rnp, flags);  		if (rnp->qsmask == 0) { @@ -2874,22 +2757,27 @@ __rcu_process_callbacks(struct rcu_state *rsp)  	unsigned long flags;  	bool needwake;  	struct rcu_data *rdp = raw_cpu_ptr(rsp->rda); +	struct rcu_node *rnp;  	WARN_ON_ONCE(!rdp->beenonline);  	/* Update RCU state based on any recent quiescent states. */  	rcu_check_quiescent_state(rsp, rdp); -	/* Does this CPU require a not-yet-started grace period? */ -	local_irq_save(flags); -	if (cpu_needs_another_gp(rsp, rdp)) { -		raw_spin_lock_rcu_node(rcu_get_root(rsp)); /* irqs disabled. */ -		needwake = rcu_start_gp(rsp); -		raw_spin_unlock_irqrestore_rcu_node(rcu_get_root(rsp), flags); -		if (needwake) -			rcu_gp_kthread_wake(rsp); -	} else { -		local_irq_restore(flags); +	/* No grace period and unregistered callbacks? */ +	if (!rcu_gp_in_progress(rsp) && +	    rcu_segcblist_is_enabled(&rdp->cblist)) { +		local_irq_save(flags); +		if (rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL)) { +			local_irq_restore(flags); +		} else { +			rnp = rdp->mynode; +			raw_spin_lock_rcu_node(rnp); /* irqs disabled. */ +			needwake = rcu_accelerate_cbs(rsp, rnp, rdp); +			raw_spin_unlock_irqrestore_rcu_node(rnp, flags); +			if (needwake) +				rcu_gp_kthread_wake(rsp); +		}  	}  	/* If there are callbacks ready, invoke them. */ @@ -2973,11 +2861,11 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,  		/* Start a new grace period if one not already started. */  		if (!rcu_gp_in_progress(rsp)) { -			struct rcu_node *rnp_root = rcu_get_root(rsp); +			struct rcu_node *rnp = rdp->mynode; -			raw_spin_lock_rcu_node(rnp_root); -			needwake = rcu_start_gp(rsp); -			raw_spin_unlock_rcu_node(rnp_root); +			raw_spin_lock_rcu_node(rnp); +			needwake = rcu_accelerate_cbs(rsp, rnp, rdp); +			raw_spin_unlock_rcu_node(rnp);  			if (needwake)  				rcu_gp_kthread_wake(rsp);  		} else { @@ -3368,7 +3256,9 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)  		return 1;  	/* Has RCU gone idle with this CPU needing another grace period? */ -	if (cpu_needs_another_gp(rsp, rdp)) +	if (!rcu_gp_in_progress(rsp) && +	    rcu_segcblist_is_enabled(&rdp->cblist) && +	    !rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL))  		return 1;  	/* Has another RCU grace period completed?  */ @@ -3775,6 +3665,8 @@ int rcutree_dead_cpu(unsigned int cpu)  	return 0;  } +static DEFINE_PER_CPU(int, rcu_cpu_started); +  /*   * Mark the specified CPU as being online so that subsequent grace periods   * (both expedited and normal) will wait on it.  Note that this means that @@ -3796,6 +3688,11 @@ void rcu_cpu_starting(unsigned int cpu)  	struct rcu_node *rnp;  	struct rcu_state *rsp; +	if (per_cpu(rcu_cpu_started, cpu)) +		return; + +	per_cpu(rcu_cpu_started, cpu) = 1; +  	for_each_rcu_flavor(rsp) {  		rdp = per_cpu_ptr(rsp->rda, cpu);  		rnp = rdp->mynode; @@ -3852,6 +3749,8 @@ void rcu_report_dead(unsigned int cpu)  	preempt_enable();  	for_each_rcu_flavor(rsp)  		rcu_cleanup_dying_idle_cpu(cpu, rsp); + +	per_cpu(rcu_cpu_started, cpu) = 0;  }  /* Migrate the dead CPU's callbacks to the current CPU. */ @@ -3861,6 +3760,7 @@ static void rcu_migrate_callbacks(int cpu, struct rcu_state *rsp)  	struct rcu_data *my_rdp;  	struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);  	struct rcu_node *rnp_root = rcu_get_root(rdp->rsp); +	bool needwake;  	if (rcu_is_nocb_cpu(cpu) || rcu_segcblist_empty(&rdp->cblist))  		return;  /* No callbacks to migrate. */ @@ -3872,12 +3772,15 @@ static void rcu_migrate_callbacks(int cpu, struct rcu_state *rsp)  		return;  	}  	raw_spin_lock_rcu_node(rnp_root); /* irqs already disabled. */ -	rcu_advance_cbs(rsp, rnp_root, rdp); /* Leverage recent GPs. */ -	rcu_advance_cbs(rsp, rnp_root, my_rdp); /* Assign GP to pending CBs. */ +	/* Leverage recent GPs and set GP for new callbacks. */ +	needwake = rcu_advance_cbs(rsp, rnp_root, rdp) || +		   rcu_advance_cbs(rsp, rnp_root, my_rdp);  	rcu_segcblist_merge(&my_rdp->cblist, &rdp->cblist);  	WARN_ON_ONCE(rcu_segcblist_empty(&my_rdp->cblist) !=  		     !rcu_segcblist_n_cbs(&my_rdp->cblist));  	raw_spin_unlock_irqrestore_rcu_node(rnp_root, flags); +	if (needwake) +		rcu_gp_kthread_wake(rsp);  	WARN_ONCE(rcu_segcblist_n_cbs(&rdp->cblist) != 0 ||  		  !rcu_segcblist_empty(&rdp->cblist),  		  "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, 1stCB=%p\n", @@ -4056,7 +3959,7 @@ static void __init rcu_init_one(struct rcu_state *rsp)  	init_swait_queue_head(&rsp->gp_wq);  	init_swait_queue_head(&rsp->expedited_wq); -	rnp = rsp->level[rcu_num_lvls - 1]; +	rnp = rcu_first_leaf_node(rsp);  	for_each_possible_cpu(i) {  		while (i > rnp->grphi)  			rnp++; @@ -4168,6 +4071,7 @@ static void __init rcu_dump_rcu_node_tree(struct rcu_state *rsp)  }  struct workqueue_struct *rcu_gp_wq; +struct workqueue_struct *rcu_par_gp_wq;  void __init rcu_init(void)  { @@ -4199,6 +4103,8 @@ void __init rcu_init(void)  	/* Create workqueue for expedited GPs and for Tree SRCU. */  	rcu_gp_wq = alloc_workqueue("rcu_gp", WQ_MEM_RECLAIM, 0);  	WARN_ON(!rcu_gp_wq); +	rcu_par_gp_wq = alloc_workqueue("rcu_par_gp", WQ_MEM_RECLAIM, 0); +	WARN_ON(!rcu_par_gp_wq);  }  #include "tree_exp.h" | 
