diff options
31 files changed, 631 insertions, 629 deletions
| diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt index a27fbfb0efb8..65eb856526b7 100644 --- a/Documentation/RCU/whatisRCU.txt +++ b/Documentation/RCU/whatisRCU.txt @@ -1,3 +1,5 @@ +What is RCU?  --  "Read, Copy, Update" +  Please note that the "What is RCU?" LWN series is an excellent place  to start learning about RCU: diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 7468de429087..3ea0047beb40 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -46,6 +46,7 @@  #include <linux/pci.h>  #include <linux/smp.h>  #include <linux/syscore_ops.h> +#include <linux/rcupdate.h>  #include <asm/cpufeature.h>  #include <asm/e820/api.h> @@ -793,6 +794,9 @@ void mtrr_ap_init(void)  	if (!use_intel() || mtrr_aps_delayed_init)  		return; + +	rcu_cpu_starting(smp_processor_id()); +  	/*  	 * Ideally we should hold mtrr_mutex here to avoid mtrr entries  	 * changed, but this routine will be called in cpu boot time, diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 04a20da76786..c8b30067b6ae 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -357,7 +357,7 @@ static void nvme_free_ns_head(struct kref *ref)  	nvme_mpath_remove_disk(head);  	ida_simple_remove(&head->subsys->ns_ida, head->instance);  	list_del_init(&head->entry); -	cleanup_srcu_struct(&head->srcu); +	cleanup_srcu_struct_quiesced(&head->srcu);  	nvme_put_subsystem(head->subsys);  	kfree(head);  } diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 36360d07f25b..e679b175b411 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -108,7 +108,6 @@ void rcu_sched_qs(void);  void rcu_bh_qs(void);  void rcu_check_callbacks(int user);  void rcu_report_dead(unsigned int cpu); -void rcu_cpu_starting(unsigned int cpu);  void rcutree_migrate_callbacks(int cpu);  #ifdef CONFIG_RCU_STALL_COMMON @@ -188,13 +187,13 @@ static inline void exit_tasks_rcu_finish(void) { }  #endif /* #else #ifdef CONFIG_TASKS_RCU */  /** - * cond_resched_rcu_qs - Report potential quiescent states to RCU + * cond_resched_tasks_rcu_qs - Report potential quiescent states to RCU   *   * This macro resembles cond_resched(), except that it is defined to   * report potential quiescent states to RCU-tasks even if the cond_resched()   * machinery were to be shut off, as some advocate for PREEMPT kernels.   */ -#define cond_resched_rcu_qs() \ +#define cond_resched_tasks_rcu_qs() \  do { \  	if (!cond_resched()) \  		rcu_note_voluntary_context_switch_lite(current); \ diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index ce9beec35e34..7b3c82e8a625 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -132,5 +132,6 @@ static inline void rcu_all_qs(void) { barrier(); }  #define rcutree_offline_cpu      NULL  #define rcutree_dead_cpu         NULL  #define rcutree_dying_cpu        NULL +static inline void rcu_cpu_starting(unsigned int cpu) { }  #endif /* __LINUX_RCUTINY_H */ diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index fd996cdf1833..914655848ef6 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -74,6 +74,7 @@ static inline void synchronize_rcu_bh_expedited(void)  void rcu_barrier(void);  void rcu_barrier_bh(void);  void rcu_barrier_sched(void); +bool rcu_eqs_special_set(int cpu);  unsigned long get_state_synchronize_rcu(void);  void cond_synchronize_rcu(unsigned long oldstate);  unsigned long get_state_synchronize_sched(void); @@ -100,5 +101,6 @@ int rcutree_online_cpu(unsigned int cpu);  int rcutree_offline_cpu(unsigned int cpu);  int rcutree_dead_cpu(unsigned int cpu);  int rcutree_dying_cpu(unsigned int cpu); +void rcu_cpu_starting(unsigned int cpu);  #endif /* __LINUX_RCUTREE_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index ca3f3eae8980..5a0c10b45273 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1661,7 +1661,6 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)   * explicit rescheduling in places that are safe. The return   * value indicates whether a reschedule was done in fact.   * cond_resched_lock() will drop the spinlock before scheduling, - * cond_resched_softirq() will enable bhs before scheduling.   */  #ifndef CONFIG_PREEMPT  extern int _cond_resched(void); @@ -1681,13 +1680,6 @@ extern int __cond_resched_lock(spinlock_t *lock);  	__cond_resched_lock(lock);				\  }) -extern int __cond_resched_softirq(void); - -#define cond_resched_softirq() ({					\ -	___might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET);	\ -	__cond_resched_softirq();					\ -}) -  static inline void cond_resched_rcu(void)  {  #if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU) diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 33c1c698df09..91494d7e8e41 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -69,11 +69,45 @@ struct srcu_struct { };  void call_srcu(struct srcu_struct *sp, struct rcu_head *head,  		void (*func)(struct rcu_head *head)); -void cleanup_srcu_struct(struct srcu_struct *sp); +void _cleanup_srcu_struct(struct srcu_struct *sp, bool quiesced);  int __srcu_read_lock(struct srcu_struct *sp) __acquires(sp);  void __srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp);  void synchronize_srcu(struct srcu_struct *sp); +/** + * cleanup_srcu_struct - deconstruct a sleep-RCU structure + * @sp: structure to clean up. + * + * Must invoke this after you are finished using a given srcu_struct that + * was initialized via init_srcu_struct(), else you leak memory. + */ +static inline void cleanup_srcu_struct(struct srcu_struct *sp) +{ +	_cleanup_srcu_struct(sp, false); +} + +/** + * cleanup_srcu_struct_quiesced - deconstruct a quiesced sleep-RCU structure + * @sp: structure to clean up. + * + * Must invoke this after you are finished using a given srcu_struct that + * was initialized via init_srcu_struct(), else you leak memory.  Also, + * all grace-period processing must have completed. + * + * "Completed" means that the last synchronize_srcu() and + * synchronize_srcu_expedited() calls must have returned before the call + * to cleanup_srcu_struct_quiesced().  It also means that the callback + * from the last call_srcu() must have been invoked before the call to + * cleanup_srcu_struct_quiesced(), but you can use srcu_barrier() to help + * with this last.  Violating these rules will get you a WARN_ON() splat + * (with high probability, anyway), and will also cause the srcu_struct + * to be leaked. + */ +static inline void cleanup_srcu_struct_quiesced(struct srcu_struct *sp) +{ +	_cleanup_srcu_struct(sp, true); +} +  #ifdef CONFIG_DEBUG_LOCK_ALLOC  /** diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index d8c33298c153..5936aac357ab 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -84,20 +84,21 @@ TRACE_EVENT(rcu_grace_period,  );  /* - * Tracepoint for future grace-period events, including those for no-callbacks - * CPUs.  The caller should pull the data from the rcu_node structure, - * other than rcuname, which comes from the rcu_state structure, and event, - * which is one of the following: + * Tracepoint for future grace-period events.  The caller should pull + * the data from the rcu_node structure, other than rcuname, which comes + * from the rcu_state structure, and event, which is one of the following:   * - * "Startleaf": Request a nocb grace period based on leaf-node data. + * "Startleaf": Request a grace period based on leaf-node data. + * "Prestarted": Someone beat us to the request   * "Startedleaf": Leaf-node start proved sufficient.   * "Startedleafroot": Leaf-node start proved sufficient after checking root.   * "Startedroot": Requested a nocb grace period based on root-node data. + * "NoGPkthread": The RCU grace-period kthread has not yet started.   * "StartWait": Start waiting for the requested grace period.   * "ResumeWait": Resume waiting after signal.   * "EndWait": Complete wait.   * "Cleanup": Clean up rcu_node structure after previous GP. - * "CleanupMore": Clean up, and another no-CB GP is needed. + * "CleanupMore": Clean up, and another GP is needed.   */  TRACE_EVENT(rcu_future_grace_period, diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 7a693e31184a..40cea6735c2d 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -270,6 +270,12 @@ static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt)  	}  } +/* Returns first leaf rcu_node of the specified RCU flavor. */ +#define rcu_first_leaf_node(rsp) ((rsp)->level[rcu_num_lvls - 1]) + +/* Is this rcu_node a leaf? */ +#define rcu_is_leaf_node(rnp) ((rnp)->level == rcu_num_lvls - 1) +  /*   * Do a full breadth-first scan of the rcu_node structures for the   * specified rcu_state structure. @@ -284,8 +290,7 @@ static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt)   * rcu_node tree with but one rcu_node structure, this loop is a no-op.   */  #define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \ -	for ((rnp) = &(rsp)->node[0]; \ -	     (rnp) < (rsp)->level[rcu_num_lvls - 1]; (rnp)++) +	for ((rnp) = &(rsp)->node[0]; !rcu_is_leaf_node(rsp, rnp); (rnp)++)  /*   * Scan the leaves of the rcu_node hierarchy for the specified rcu_state @@ -294,7 +299,7 @@ static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt)   * It is still a leaf node, even if it is also the root node.   */  #define rcu_for_each_leaf_node(rsp, rnp) \ -	for ((rnp) = (rsp)->level[rcu_num_lvls - 1]; \ +	for ((rnp) = rcu_first_leaf_node(rsp); \  	     (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)  /* @@ -486,6 +491,7 @@ void rcu_force_quiescent_state(void);  void rcu_bh_force_quiescent_state(void);  void rcu_sched_force_quiescent_state(void);  extern struct workqueue_struct *rcu_gp_wq; +extern struct workqueue_struct *rcu_par_gp_wq;  #endif /* #else #ifdef CONFIG_TINY_RCU */  #ifdef CONFIG_RCU_NOCB_CPU diff --git a/kernel/rcu/rcu_segcblist.c b/kernel/rcu/rcu_segcblist.c index 88cba7c2956c..5aff271adf1e 100644 --- a/kernel/rcu/rcu_segcblist.c +++ b/kernel/rcu/rcu_segcblist.c @@ -404,24 +404,6 @@ bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq)  }  /* - * Scan the specified rcu_segcblist structure for callbacks that need - * a grace period later than the one specified by "seq".  We don't look - * at the RCU_DONE_TAIL or RCU_NEXT_TAIL segments because they don't - * have a grace-period sequence number. - */ -bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp, -				    unsigned long seq) -{ -	int i; - -	for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) -		if (rsclp->tails[i - 1] != rsclp->tails[i] && -		    ULONG_CMP_LT(seq, rsclp->gp_seq[i])) -			return true; -	return false; -} - -/*   * Merge the source rcu_segcblist structure into the destination   * rcu_segcblist structure, then initialize the source.  Any pending   * callbacks from the source get to start over.  It is best to diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h index 581c12b63544..948470cef385 100644 --- a/kernel/rcu/rcu_segcblist.h +++ b/kernel/rcu/rcu_segcblist.h @@ -134,7 +134,5 @@ void rcu_segcblist_insert_pend_cbs(struct rcu_segcblist *rsclp,  				   struct rcu_cblist *rclp);  void rcu_segcblist_advance(struct rcu_segcblist *rsclp, unsigned long seq);  bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq); -bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp, -				    unsigned long seq);  void rcu_segcblist_merge(struct rcu_segcblist *dst_rsclp,  			 struct rcu_segcblist *src_rsclp); diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c index 777e7a6a0292..e232846516b3 100644 --- a/kernel/rcu/rcuperf.c +++ b/kernel/rcu/rcuperf.c @@ -369,7 +369,7 @@ static bool __maybe_unused torturing_tasks(void)   */  static void rcu_perf_wait_shutdown(void)  { -	cond_resched_rcu_qs(); +	cond_resched_tasks_rcu_qs();  	if (atomic_read(&n_rcu_perf_writer_finished) < nrealwriters)  		return;  	while (!torture_must_stop()) diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 680c96d8c00f..e628fcfd1bde 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -593,7 +593,12 @@ static void srcu_torture_init(void)  static void srcu_torture_cleanup(void)  { -	cleanup_srcu_struct(&srcu_ctld); +	static DEFINE_TORTURE_RANDOM(rand); + +	if (torture_random(&rand) & 0x800) +		cleanup_srcu_struct(&srcu_ctld); +	else +		cleanup_srcu_struct_quiesced(&srcu_ctld);  	srcu_ctlp = &srcu_ctl; /* In case of a later rcutorture run. */  } @@ -1609,6 +1614,9 @@ static enum cpuhp_state rcutor_hp;  static void  rcu_torture_cleanup(void)  { +	int flags = 0; +	unsigned long gpnum = 0; +	unsigned long completed = 0;  	int i;  	rcutorture_record_test_transition(); @@ -1639,6 +1647,11 @@ rcu_torture_cleanup(void)  		fakewriter_tasks = NULL;  	} +	rcutorture_get_gp_data(cur_ops->ttype, &flags, &gpnum, &completed); +	srcutorture_get_gp_data(cur_ops->ttype, srcu_ctlp, +				&flags, &gpnum, &completed); +	pr_alert("%s:  End-test grace-period state: g%lu c%lu f%#x\n", +		 cur_ops->name, gpnum, completed, flags);  	torture_stop_kthread(rcu_torture_stats, stats_task);  	torture_stop_kthread(rcu_torture_fqs, fqs_task);  	for (i = 0; i < ncbflooders; i++) diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c index 76ac5f50b2c7..622792abe41a 100644 --- a/kernel/rcu/srcutiny.c +++ b/kernel/rcu/srcutiny.c @@ -86,16 +86,19 @@ EXPORT_SYMBOL_GPL(init_srcu_struct);   * Must invoke this after you are finished using a given srcu_struct that   * was initialized via init_srcu_struct(), else you leak memory.   */ -void cleanup_srcu_struct(struct srcu_struct *sp) +void _cleanup_srcu_struct(struct srcu_struct *sp, bool quiesced)  {  	WARN_ON(sp->srcu_lock_nesting[0] || sp->srcu_lock_nesting[1]); -	flush_work(&sp->srcu_work); +	if (quiesced) +		WARN_ON(work_pending(&sp->srcu_work)); +	else +		flush_work(&sp->srcu_work);  	WARN_ON(sp->srcu_gp_running);  	WARN_ON(sp->srcu_gp_waiting);  	WARN_ON(sp->srcu_cb_head);  	WARN_ON(&sp->srcu_cb_head != sp->srcu_cb_tail);  } -EXPORT_SYMBOL_GPL(cleanup_srcu_struct); +EXPORT_SYMBOL_GPL(_cleanup_srcu_struct);  /*   * Removes the count for the old reader from the appropriate element of diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index fb560fca9ef4..b4123d7a2cec 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -366,24 +366,28 @@ static unsigned long srcu_get_delay(struct srcu_struct *sp)  	return SRCU_INTERVAL;  } -/** - * cleanup_srcu_struct - deconstruct a sleep-RCU structure - * @sp: structure to clean up. - * - * Must invoke this after you are finished using a given srcu_struct that - * was initialized via init_srcu_struct(), else you leak memory. - */ -void cleanup_srcu_struct(struct srcu_struct *sp) +/* Helper for cleanup_srcu_struct() and cleanup_srcu_struct_quiesced(). */ +void _cleanup_srcu_struct(struct srcu_struct *sp, bool quiesced)  {  	int cpu;  	if (WARN_ON(!srcu_get_delay(sp))) -		return; /* Leakage unless caller handles error. */ +		return; /* Just leak it! */  	if (WARN_ON(srcu_readers_active(sp))) -		return; /* Leakage unless caller handles error. */ -	flush_delayed_work(&sp->work); +		return; /* Just leak it! */ +	if (quiesced) { +		if (WARN_ON(delayed_work_pending(&sp->work))) +			return; /* Just leak it! */ +	} else { +		flush_delayed_work(&sp->work); +	}  	for_each_possible_cpu(cpu) -		flush_delayed_work(&per_cpu_ptr(sp->sda, cpu)->work); +		if (quiesced) { +			if (WARN_ON(delayed_work_pending(&per_cpu_ptr(sp->sda, cpu)->work))) +				return; /* Just leak it! */ +		} else { +			flush_delayed_work(&per_cpu_ptr(sp->sda, cpu)->work); +		}  	if (WARN_ON(rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) != SRCU_STATE_IDLE) ||  	    WARN_ON(srcu_readers_active(sp))) {  		pr_info("%s: Active srcu_struct %p state: %d\n", __func__, sp, rcu_seq_state(READ_ONCE(sp->srcu_gp_seq))); @@ -392,7 +396,7 @@ void cleanup_srcu_struct(struct srcu_struct *sp)  	free_percpu(sp->sda);  	sp->sda = NULL;  } -EXPORT_SYMBOL_GPL(cleanup_srcu_struct); +EXPORT_SYMBOL_GPL(_cleanup_srcu_struct);  /*   * Counts the new reader in the appropriate per-CPU element of the diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 2a734692a581..aa7cade1b9f3 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -524,8 +524,6 @@ module_param(rcu_kick_kthreads, bool, 0644);  static ulong jiffies_till_sched_qs = HZ / 10;  module_param(jiffies_till_sched_qs, ulong, 0444); -static bool rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, -				  struct rcu_data *rdp);  static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *rsp));  static void force_quiescent_state(struct rcu_state *rsp);  static int rcu_pending(void); @@ -711,44 +709,6 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp)  }  /* - * Is there any need for future grace periods? - * Interrupts must be disabled.  If the caller does not hold the root - * rnp_node structure's ->lock, the results are advisory only. - */ -static int rcu_future_needs_gp(struct rcu_state *rsp) -{ -	struct rcu_node *rnp = rcu_get_root(rsp); -	int idx = (READ_ONCE(rnp->completed) + 1) & 0x1; -	int *fp = &rnp->need_future_gp[idx]; - -	lockdep_assert_irqs_disabled(); -	return READ_ONCE(*fp); -} - -/* - * Does the current CPU require a not-yet-started grace period? - * The caller must have disabled interrupts to prevent races with - * normal callback registry. - */ -static bool -cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp) -{ -	lockdep_assert_irqs_disabled(); -	if (rcu_gp_in_progress(rsp)) -		return false;  /* No, a grace period is already in progress. */ -	if (rcu_future_needs_gp(rsp)) -		return true;  /* Yes, a no-CBs CPU needs one. */ -	if (!rcu_segcblist_is_enabled(&rdp->cblist)) -		return false;  /* No, this is a no-CBs (or offline) CPU. */ -	if (!rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL)) -		return true;  /* Yes, CPU has newly registered callbacks. */ -	if (rcu_segcblist_future_gp_needed(&rdp->cblist, -					   READ_ONCE(rsp->completed))) -		return true;  /* Yes, CBs for future grace period. */ -	return false; /* No grace period needed. */ -} - -/*   * Enter an RCU extended quiescent state, which can be either the   * idle loop or adaptive-tickless usermode execution.   * @@ -1234,10 +1194,10 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)  	}  	/* -	 * Has this CPU encountered a cond_resched_rcu_qs() since the -	 * beginning of the grace period?  For this to be the case, -	 * the CPU has to have noticed the current grace period.  This -	 * might not be the case for nohz_full CPUs looping in the kernel. +	 * Has this CPU encountered a cond_resched() since the beginning +	 * of the grace period?  For this to be the case, the CPU has to +	 * have noticed the current grace period.  This might not be the +	 * case for nohz_full CPUs looping in the kernel.  	 */  	jtsq = jiffies_till_sched_qs;  	ruqp = per_cpu_ptr(&rcu_dynticks.rcu_urgent_qs, rdp->cpu); @@ -1642,18 +1602,30 @@ static unsigned long rcu_cbs_completed(struct rcu_state *rsp,  		return rnp->completed + 1;  	/* +	 * If the current rcu_node structure believes that RCU is +	 * idle, and if the rcu_state structure does not yet reflect +	 * the start of a new grace period, then the next grace period +	 * will suffice.  The memory barrier is needed to accurately +	 * sample the rsp->gpnum, and pairs with the second lock +	 * acquisition in rcu_gp_init(), which is augmented with +	 * smp_mb__after_unlock_lock() for this purpose. +	 */ +	if (rnp->gpnum == rnp->completed) { +		smp_mb(); /* See above block comment. */ +		if (READ_ONCE(rsp->gpnum) == rnp->completed) +			return rnp->completed + 1; +	} + +	/*  	 * Otherwise, wait for a possible partial grace period and  	 * then the subsequent full grace period.  	 */  	return rnp->completed + 2;  } -/* - * Trace-event helper function for rcu_start_future_gp() and - * rcu_nocb_wait_gp(). - */ -static void trace_rcu_future_gp(struct rcu_node *rnp, struct rcu_data *rdp, -				unsigned long c, const char *s) +/* Trace-event wrapper function for trace_rcu_future_grace_period.  */ +static void trace_rcu_this_gp(struct rcu_node *rnp, struct rcu_data *rdp, +			      unsigned long c, const char *s)  {  	trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum,  				      rnp->completed, c, rnp->level, @@ -1661,96 +1633,67 @@ static void trace_rcu_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,  }  /* - * Start some future grace period, as needed to handle newly arrived + * Start the specified grace period, as needed to handle newly arrived   * callbacks.  The required future grace periods are recorded in each - * rcu_node structure's ->need_future_gp field.  Returns true if there + * rcu_node structure's ->need_future_gp[] field.  Returns true if there   * is reason to awaken the grace-period kthread.   * - * The caller must hold the specified rcu_node structure's ->lock. + * The caller must hold the specified rcu_node structure's ->lock, which + * is why the caller is responsible for waking the grace-period kthread.   */ -static bool __maybe_unused -rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp, -		    unsigned long *c_out) +static bool rcu_start_this_gp(struct rcu_node *rnp, struct rcu_data *rdp, +			      unsigned long c)  { -	unsigned long c;  	bool ret = false; -	struct rcu_node *rnp_root = rcu_get_root(rdp->rsp); - -	raw_lockdep_assert_held_rcu_node(rnp); - -	/* -	 * Pick up grace-period number for new callbacks.  If this -	 * grace period is already marked as needed, return to the caller. -	 */ -	c = rcu_cbs_completed(rdp->rsp, rnp); -	trace_rcu_future_gp(rnp, rdp, c, TPS("Startleaf")); -	if (rnp->need_future_gp[c & 0x1]) { -		trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartleaf")); -		goto out; -	} +	struct rcu_state *rsp = rdp->rsp; +	struct rcu_node *rnp_root;  	/* -	 * If either this rcu_node structure or the root rcu_node structure -	 * believe that a grace period is in progress, then we must wait -	 * for the one following, which is in "c".  Because our request -	 * will be noticed at the end of the current grace period, we don't -	 * need to explicitly start one.  We only do the lockless check -	 * of rnp_root's fields if the current rcu_node structure thinks -	 * there is no grace period in flight, and because we hold rnp->lock, -	 * the only possible change is when rnp_root's two fields are -	 * equal, in which case rnp_root->gpnum might be concurrently -	 * incremented.  But that is OK, as it will just result in our -	 * doing some extra useless work. +	 * Use funnel locking to either acquire the root rcu_node +	 * structure's lock or bail out if the need for this grace period +	 * has already been recorded -- or has already started.  If there +	 * is already a grace period in progress in a non-leaf node, no +	 * recording is needed because the end of the grace period will +	 * scan the leaf rcu_node structures.  Note that rnp->lock must +	 * not be released.  	 */ -	if (rnp->gpnum != rnp->completed || -	    READ_ONCE(rnp_root->gpnum) != READ_ONCE(rnp_root->completed)) { -		rnp->need_future_gp[c & 0x1]++; -		trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleaf")); -		goto out; +	raw_lockdep_assert_held_rcu_node(rnp); +	trace_rcu_this_gp(rnp, rdp, c, TPS("Startleaf")); +	for (rnp_root = rnp; 1; rnp_root = rnp_root->parent) { +		if (rnp_root != rnp) +			raw_spin_lock_rcu_node(rnp_root); +		WARN_ON_ONCE(ULONG_CMP_LT(rnp_root->gpnum + +					  need_future_gp_mask(), c)); +		if (need_future_gp_element(rnp_root, c) || +		    ULONG_CMP_GE(rnp_root->gpnum, c) || +		    (rnp != rnp_root && +		     rnp_root->gpnum != rnp_root->completed)) { +			trace_rcu_this_gp(rnp_root, rdp, c, TPS("Prestarted")); +			goto unlock_out; +		} +		need_future_gp_element(rnp_root, c) = true; +		if (rnp_root != rnp && rnp_root->parent != NULL) +			raw_spin_unlock_rcu_node(rnp_root); +		if (!rnp_root->parent) +			break;  /* At root, and perhaps also leaf. */  	} -	/* -	 * There might be no grace period in progress.  If we don't already -	 * hold it, acquire the root rcu_node structure's lock in order to -	 * start one (if needed). -	 */ -	if (rnp != rnp_root) -		raw_spin_lock_rcu_node(rnp_root); - -	/* -	 * Get a new grace-period number.  If there really is no grace -	 * period in progress, it will be smaller than the one we obtained -	 * earlier.  Adjust callbacks as needed. -	 */ -	c = rcu_cbs_completed(rdp->rsp, rnp_root); -	if (!rcu_is_nocb_cpu(rdp->cpu)) -		(void)rcu_segcblist_accelerate(&rdp->cblist, c); - -	/* -	 * If the needed for the required grace period is already -	 * recorded, trace and leave. -	 */ -	if (rnp_root->need_future_gp[c & 0x1]) { -		trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartedroot")); +	/* If GP already in progress, just leave, otherwise start one. */ +	if (rnp_root->gpnum != rnp_root->completed) { +		trace_rcu_this_gp(rnp_root, rdp, c, TPS("Startedleafroot"));  		goto unlock_out;  	} - -	/* Record the need for the future grace period. */ -	rnp_root->need_future_gp[c & 0x1]++; - -	/* If a grace period is not already in progress, start one. */ -	if (rnp_root->gpnum != rnp_root->completed) { -		trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleafroot")); -	} else { -		trace_rcu_future_gp(rnp, rdp, c, TPS("Startedroot")); -		ret = rcu_start_gp_advanced(rdp->rsp, rnp_root, rdp); +	trace_rcu_this_gp(rnp_root, rdp, c, TPS("Startedroot")); +	WRITE_ONCE(rsp->gp_flags, rsp->gp_flags | RCU_GP_FLAG_INIT); +	if (!rsp->gp_kthread) { +		trace_rcu_this_gp(rnp_root, rdp, c, TPS("NoGPkthread")); +		goto unlock_out;  	} +	trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gpnum), TPS("newreq")); +	ret = true;  /* Caller must wake GP kthread. */  unlock_out:  	if (rnp != rnp_root)  		raw_spin_unlock_rcu_node(rnp_root); -out: -	if (c_out != NULL) -		*c_out = c;  	return ret;  } @@ -1758,16 +1701,16 @@ out:   * Clean up any old requests for the just-ended grace period.  Also return   * whether any additional grace periods have been requested.   */ -static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) +static bool rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)  { -	int c = rnp->completed; -	int needmore; +	unsigned long c = rnp->completed; +	bool needmore;  	struct rcu_data *rdp = this_cpu_ptr(rsp->rda); -	rnp->need_future_gp[c & 0x1] = 0; -	needmore = rnp->need_future_gp[(c + 1) & 0x1]; -	trace_rcu_future_gp(rnp, rdp, c, -			    needmore ? TPS("CleanupMore") : TPS("Cleanup")); +	need_future_gp_element(rnp, c) = false; +	needmore = need_any_future_gp(rnp); +	trace_rcu_this_gp(rnp, rdp, c, +			  needmore ? TPS("CleanupMore") : TPS("Cleanup"));  	return needmore;  } @@ -1802,6 +1745,7 @@ static void rcu_gp_kthread_wake(struct rcu_state *rsp)  static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,  			       struct rcu_data *rdp)  { +	unsigned long c;  	bool ret = false;  	raw_lockdep_assert_held_rcu_node(rnp); @@ -1820,8 +1764,9 @@ static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,  	 * accelerating callback invocation to an earlier grace-period  	 * number.  	 */ -	if (rcu_segcblist_accelerate(&rdp->cblist, rcu_cbs_completed(rsp, rnp))) -		ret = rcu_start_future_gp(rnp, rdp, NULL); +	c = rcu_cbs_completed(rsp, rnp); +	if (rcu_segcblist_accelerate(&rdp->cblist, c)) +		ret = rcu_start_this_gp(rnp, rdp, c);  	/* Trace depending on how much we were able to accelerate. */  	if (rcu_segcblist_restempty(&rdp->cblist, RCU_WAIT_TAIL)) @@ -2049,7 +1994,7 @@ static bool rcu_gp_init(struct rcu_state *rsp)  					    rnp->level, rnp->grplo,  					    rnp->grphi, rnp->qsmask);  		raw_spin_unlock_irq_rcu_node(rnp); -		cond_resched_rcu_qs(); +		cond_resched_tasks_rcu_qs();  		WRITE_ONCE(rsp->gp_activity, jiffies);  	} @@ -2108,7 +2053,6 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)  {  	unsigned long gp_duration;  	bool needgp = false; -	int nocb = 0;  	struct rcu_data *rdp;  	struct rcu_node *rnp = rcu_get_root(rsp);  	struct swait_queue_head *sq; @@ -2147,31 +2091,35 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)  		if (rnp == rdp->mynode)  			needgp = __note_gp_changes(rsp, rnp, rdp) || needgp;  		/* smp_mb() provided by prior unlock-lock pair. */ -		nocb += rcu_future_gp_cleanup(rsp, rnp); +		needgp = rcu_future_gp_cleanup(rsp, rnp) || needgp;  		sq = rcu_nocb_gp_get(rnp);  		raw_spin_unlock_irq_rcu_node(rnp);  		rcu_nocb_gp_cleanup(sq); -		cond_resched_rcu_qs(); +		cond_resched_tasks_rcu_qs();  		WRITE_ONCE(rsp->gp_activity, jiffies);  		rcu_gp_slow(rsp, gp_cleanup_delay);  	}  	rnp = rcu_get_root(rsp);  	raw_spin_lock_irq_rcu_node(rnp); /* Order GP before ->completed update. */ -	rcu_nocb_gp_set(rnp, nocb);  	/* Declare grace period done. */  	WRITE_ONCE(rsp->completed, rsp->gpnum);  	trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end"));  	rsp->gp_state = RCU_GP_IDLE; +	/* Check for GP requests since above loop. */  	rdp = this_cpu_ptr(rsp->rda); +	if (need_any_future_gp(rnp)) { +		trace_rcu_this_gp(rnp, rdp, rsp->completed - 1, +				  TPS("CleanupMore")); +		needgp = true; +	}  	/* Advance CBs to reduce false positives below. */ -	needgp = rcu_advance_cbs(rsp, rnp, rdp) || needgp; -	if (needgp || cpu_needs_another_gp(rsp, rdp)) { +	if (!rcu_accelerate_cbs(rsp, rnp, rdp) && needgp) {  		WRITE_ONCE(rsp->gp_flags, RCU_GP_FLAG_INIT); -		trace_rcu_grace_period(rsp->name, -				       READ_ONCE(rsp->gpnum), +		trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gpnum),  				       TPS("newreq"));  	} +	WRITE_ONCE(rsp->gp_flags, rsp->gp_flags & RCU_GP_FLAG_INIT);  	raw_spin_unlock_irq_rcu_node(rnp);  } @@ -2202,7 +2150,7 @@ static int __noreturn rcu_gp_kthread(void *arg)  			/* Locking provides needed memory barrier. */  			if (rcu_gp_init(rsp))  				break; -			cond_resched_rcu_qs(); +			cond_resched_tasks_rcu_qs();  			WRITE_ONCE(rsp->gp_activity, jiffies);  			WARN_ON(signal_pending(current));  			trace_rcu_grace_period(rsp->name, @@ -2247,7 +2195,7 @@ static int __noreturn rcu_gp_kthread(void *arg)  				trace_rcu_grace_period(rsp->name,  						       READ_ONCE(rsp->gpnum),  						       TPS("fqsend")); -				cond_resched_rcu_qs(); +				cond_resched_tasks_rcu_qs();  				WRITE_ONCE(rsp->gp_activity, jiffies);  				ret = 0; /* Force full wait till next FQS. */  				j = jiffies_till_next_fqs; @@ -2260,7 +2208,7 @@ static int __noreturn rcu_gp_kthread(void *arg)  				}  			} else {  				/* Deal with stray signal. */ -				cond_resched_rcu_qs(); +				cond_resched_tasks_rcu_qs();  				WRITE_ONCE(rsp->gp_activity, jiffies);  				WARN_ON(signal_pending(current));  				trace_rcu_grace_period(rsp->name, @@ -2283,71 +2231,6 @@ static int __noreturn rcu_gp_kthread(void *arg)  }  /* - * Start a new RCU grace period if warranted, re-initializing the hierarchy - * in preparation for detecting the next grace period.  The caller must hold - * the root node's ->lock and hard irqs must be disabled. - * - * Note that it is legal for a dying CPU (which is marked as offline) to - * invoke this function.  This can happen when the dying CPU reports its - * quiescent state. - * - * Returns true if the grace-period kthread must be awakened. - */ -static bool -rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, -		      struct rcu_data *rdp) -{ -	raw_lockdep_assert_held_rcu_node(rnp); -	if (!rsp->gp_kthread || !cpu_needs_another_gp(rsp, rdp)) { -		/* -		 * Either we have not yet spawned the grace-period -		 * task, this CPU does not need another grace period, -		 * or a grace period is already in progress. -		 * Either way, don't start a new grace period. -		 */ -		return false; -	} -	WRITE_ONCE(rsp->gp_flags, RCU_GP_FLAG_INIT); -	trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gpnum), -			       TPS("newreq")); - -	/* -	 * We can't do wakeups while holding the rnp->lock, as that -	 * could cause possible deadlocks with the rq->lock. Defer -	 * the wakeup to our caller. -	 */ -	return true; -} - -/* - * Similar to rcu_start_gp_advanced(), but also advance the calling CPU's - * callbacks.  Note that rcu_start_gp_advanced() cannot do this because it - * is invoked indirectly from rcu_advance_cbs(), which would result in - * endless recursion -- or would do so if it wasn't for the self-deadlock - * that is encountered beforehand. - * - * Returns true if the grace-period kthread needs to be awakened. - */ -static bool rcu_start_gp(struct rcu_state *rsp) -{ -	struct rcu_data *rdp = this_cpu_ptr(rsp->rda); -	struct rcu_node *rnp = rcu_get_root(rsp); -	bool ret = false; - -	/* -	 * If there is no grace period in progress right now, any -	 * callbacks we have up to this point will be satisfied by the -	 * next grace period.  Also, advancing the callbacks reduces the -	 * probability of false positives from cpu_needs_another_gp() -	 * resulting in pointless grace periods.  So, advance callbacks -	 * then start the grace period! -	 */ -	ret = rcu_advance_cbs(rsp, rnp, rdp) || ret; -	ret = rcu_start_gp_advanced(rsp, rnp, rdp) || ret; -	return ret; -} - -/*   * Report a full set of quiescent states to the specified rcu_state data   * structure.  Invoke rcu_gp_kthread_wake() to awaken the grace-period   * kthread if another grace period is required.  Whether we wake @@ -2398,7 +2281,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,  			return;  		}  		WARN_ON_ONCE(oldmask); /* Any child must be all zeroed! */ -		WARN_ON_ONCE(rnp->level != rcu_num_lvls - 1 && +		WARN_ON_ONCE(!rcu_is_leaf_node(rnp) &&  			     rcu_preempt_blocked_readers_cgp(rnp));  		rnp->qsmask &= ~mask;  		trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum, @@ -2782,7 +2665,7 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *rsp))  	struct rcu_node *rnp;  	rcu_for_each_leaf_node(rsp, rnp) { -		cond_resched_rcu_qs(); +		cond_resched_tasks_rcu_qs();  		mask = 0;  		raw_spin_lock_irqsave_rcu_node(rnp, flags);  		if (rnp->qsmask == 0) { @@ -2874,22 +2757,27 @@ __rcu_process_callbacks(struct rcu_state *rsp)  	unsigned long flags;  	bool needwake;  	struct rcu_data *rdp = raw_cpu_ptr(rsp->rda); +	struct rcu_node *rnp;  	WARN_ON_ONCE(!rdp->beenonline);  	/* Update RCU state based on any recent quiescent states. */  	rcu_check_quiescent_state(rsp, rdp); -	/* Does this CPU require a not-yet-started grace period? */ -	local_irq_save(flags); -	if (cpu_needs_another_gp(rsp, rdp)) { -		raw_spin_lock_rcu_node(rcu_get_root(rsp)); /* irqs disabled. */ -		needwake = rcu_start_gp(rsp); -		raw_spin_unlock_irqrestore_rcu_node(rcu_get_root(rsp), flags); -		if (needwake) -			rcu_gp_kthread_wake(rsp); -	} else { -		local_irq_restore(flags); +	/* No grace period and unregistered callbacks? */ +	if (!rcu_gp_in_progress(rsp) && +	    rcu_segcblist_is_enabled(&rdp->cblist)) { +		local_irq_save(flags); +		if (rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL)) { +			local_irq_restore(flags); +		} else { +			rnp = rdp->mynode; +			raw_spin_lock_rcu_node(rnp); /* irqs disabled. */ +			needwake = rcu_accelerate_cbs(rsp, rnp, rdp); +			raw_spin_unlock_irqrestore_rcu_node(rnp, flags); +			if (needwake) +				rcu_gp_kthread_wake(rsp); +		}  	}  	/* If there are callbacks ready, invoke them. */ @@ -2973,11 +2861,11 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,  		/* Start a new grace period if one not already started. */  		if (!rcu_gp_in_progress(rsp)) { -			struct rcu_node *rnp_root = rcu_get_root(rsp); +			struct rcu_node *rnp = rdp->mynode; -			raw_spin_lock_rcu_node(rnp_root); -			needwake = rcu_start_gp(rsp); -			raw_spin_unlock_rcu_node(rnp_root); +			raw_spin_lock_rcu_node(rnp); +			needwake = rcu_accelerate_cbs(rsp, rnp, rdp); +			raw_spin_unlock_rcu_node(rnp);  			if (needwake)  				rcu_gp_kthread_wake(rsp);  		} else { @@ -3368,7 +3256,9 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)  		return 1;  	/* Has RCU gone idle with this CPU needing another grace period? */ -	if (cpu_needs_another_gp(rsp, rdp)) +	if (!rcu_gp_in_progress(rsp) && +	    rcu_segcblist_is_enabled(&rdp->cblist) && +	    !rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL))  		return 1;  	/* Has another RCU grace period completed?  */ @@ -3775,6 +3665,8 @@ int rcutree_dead_cpu(unsigned int cpu)  	return 0;  } +static DEFINE_PER_CPU(int, rcu_cpu_started); +  /*   * Mark the specified CPU as being online so that subsequent grace periods   * (both expedited and normal) will wait on it.  Note that this means that @@ -3796,6 +3688,11 @@ void rcu_cpu_starting(unsigned int cpu)  	struct rcu_node *rnp;  	struct rcu_state *rsp; +	if (per_cpu(rcu_cpu_started, cpu)) +		return; + +	per_cpu(rcu_cpu_started, cpu) = 1; +  	for_each_rcu_flavor(rsp) {  		rdp = per_cpu_ptr(rsp->rda, cpu);  		rnp = rdp->mynode; @@ -3852,6 +3749,8 @@ void rcu_report_dead(unsigned int cpu)  	preempt_enable();  	for_each_rcu_flavor(rsp)  		rcu_cleanup_dying_idle_cpu(cpu, rsp); + +	per_cpu(rcu_cpu_started, cpu) = 0;  }  /* Migrate the dead CPU's callbacks to the current CPU. */ @@ -3861,6 +3760,7 @@ static void rcu_migrate_callbacks(int cpu, struct rcu_state *rsp)  	struct rcu_data *my_rdp;  	struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);  	struct rcu_node *rnp_root = rcu_get_root(rdp->rsp); +	bool needwake;  	if (rcu_is_nocb_cpu(cpu) || rcu_segcblist_empty(&rdp->cblist))  		return;  /* No callbacks to migrate. */ @@ -3872,12 +3772,15 @@ static void rcu_migrate_callbacks(int cpu, struct rcu_state *rsp)  		return;  	}  	raw_spin_lock_rcu_node(rnp_root); /* irqs already disabled. */ -	rcu_advance_cbs(rsp, rnp_root, rdp); /* Leverage recent GPs. */ -	rcu_advance_cbs(rsp, rnp_root, my_rdp); /* Assign GP to pending CBs. */ +	/* Leverage recent GPs and set GP for new callbacks. */ +	needwake = rcu_advance_cbs(rsp, rnp_root, rdp) || +		   rcu_advance_cbs(rsp, rnp_root, my_rdp);  	rcu_segcblist_merge(&my_rdp->cblist, &rdp->cblist);  	WARN_ON_ONCE(rcu_segcblist_empty(&my_rdp->cblist) !=  		     !rcu_segcblist_n_cbs(&my_rdp->cblist));  	raw_spin_unlock_irqrestore_rcu_node(rnp_root, flags); +	if (needwake) +		rcu_gp_kthread_wake(rsp);  	WARN_ONCE(rcu_segcblist_n_cbs(&rdp->cblist) != 0 ||  		  !rcu_segcblist_empty(&rdp->cblist),  		  "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, 1stCB=%p\n", @@ -4056,7 +3959,7 @@ static void __init rcu_init_one(struct rcu_state *rsp)  	init_swait_queue_head(&rsp->gp_wq);  	init_swait_queue_head(&rsp->expedited_wq); -	rnp = rsp->level[rcu_num_lvls - 1]; +	rnp = rcu_first_leaf_node(rsp);  	for_each_possible_cpu(i) {  		while (i > rnp->grphi)  			rnp++; @@ -4168,6 +4071,7 @@ static void __init rcu_dump_rcu_node_tree(struct rcu_state *rsp)  }  struct workqueue_struct *rcu_gp_wq; +struct workqueue_struct *rcu_par_gp_wq;  void __init rcu_init(void)  { @@ -4199,6 +4103,8 @@ void __init rcu_init(void)  	/* Create workqueue for expedited GPs and for Tree SRCU. */  	rcu_gp_wq = alloc_workqueue("rcu_gp", WQ_MEM_RECLAIM, 0);  	WARN_ON(!rcu_gp_wq); +	rcu_par_gp_wq = alloc_workqueue("rcu_par_gp", WQ_MEM_RECLAIM, 0); +	WARN_ON(!rcu_par_gp_wq);  }  #include "tree_exp.h" diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index f491ab4f2e8e..78e051dffc5b 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -58,6 +58,14 @@ struct rcu_dynticks {  #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */  }; +/* Communicate arguments to a workqueue handler. */ +struct rcu_exp_work { +	smp_call_func_t rew_func; +	struct rcu_state *rew_rsp; +	unsigned long rew_s; +	struct work_struct rew_work; +}; +  /* RCU's kthread states for tracing. */  #define RCU_KTHREAD_STOPPED  0  #define RCU_KTHREAD_RUNNING  1 @@ -150,15 +158,32 @@ struct rcu_node {  	struct swait_queue_head nocb_gp_wq[2];  				/* Place for rcu_nocb_kthread() to wait GP. */  #endif /* #ifdef CONFIG_RCU_NOCB_CPU */ -	int need_future_gp[2]; -				/* Counts of upcoming no-CB GP requests. */ +	u8 need_future_gp[4];	/* Counts of upcoming GP requests. */  	raw_spinlock_t fqslock ____cacheline_internodealigned_in_smp;  	spinlock_t exp_lock ____cacheline_internodealigned_in_smp;  	unsigned long exp_seq_rq;  	wait_queue_head_t exp_wq[4]; +	struct rcu_exp_work rew; +	bool exp_need_flush;	/* Need to flush workitem? */  } ____cacheline_internodealigned_in_smp; +/* Accessors for ->need_future_gp[] array. */ +#define need_future_gp_mask() \ +	(ARRAY_SIZE(((struct rcu_node *)NULL)->need_future_gp) - 1) +#define need_future_gp_element(rnp, c) \ +	((rnp)->need_future_gp[(c) & need_future_gp_mask()]) +#define need_any_future_gp(rnp)						\ +({									\ +	int __i;							\ +	bool __nonzero = false;						\ +									\ +	for (__i = 0; __i < ARRAY_SIZE((rnp)->need_future_gp); __i++)	\ +		__nonzero = __nonzero ||				\ +			    READ_ONCE((rnp)->need_future_gp[__i]);	\ +	__nonzero;							\ +}) +  /*   * Bitmasks in an rcu_node cover the interval [grplo, grphi] of CPU IDs, and   * are indexed relative to this interval rather than the global CPU ID space. @@ -224,10 +249,6 @@ struct rcu_data {  #ifdef CONFIG_RCU_FAST_NO_HZ  	struct rcu_head oom_head;  #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ -	atomic_long_t exp_workdone0;	/* # done by workqueue. */ -	atomic_long_t exp_workdone1;	/* # done by others #1. */ -	atomic_long_t exp_workdone2;	/* # done by others #2. */ -	atomic_long_t exp_workdone3;	/* # done by others #3. */  	int exp_dynticks_snap;		/* Double-check need for IPI. */  	/* 6) Callback offloading. */ @@ -408,7 +429,6 @@ extern struct rcu_state rcu_preempt_state;  #endif /* #ifdef CONFIG_PREEMPT_RCU */  int rcu_dynticks_snap(struct rcu_dynticks *rdtp); -bool rcu_eqs_special_set(int cpu);  #ifdef CONFIG_RCU_BOOST  DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status); @@ -438,7 +458,6 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);  static void invoke_rcu_callbacks_kthread(void);  static bool rcu_is_callbacks_kthread(void);  #ifdef CONFIG_RCU_BOOST -static void rcu_preempt_do_callbacks(void);  static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,  						 struct rcu_node *rnp);  #endif /* #ifdef CONFIG_RCU_BOOST */ @@ -454,7 +473,6 @@ static void print_cpu_stall_info_end(void);  static void zero_cpu_stall_ticks(struct rcu_data *rdp);  static void increment_cpu_stall_ticks(void);  static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu); -static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq);  static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp);  static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq);  static void rcu_init_one_nocb(struct rcu_node *rnp); diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index f72eefab8543..d40708e8c5d6 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -20,6 +20,8 @@   * Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>   */ +#include <linux/lockdep.h> +  /*   * Record the start of an expedited grace period.   */ @@ -154,15 +156,35 @@ static void __maybe_unused sync_exp_reset_tree(struct rcu_state *rsp)   * for the current expedited grace period.  Works only for preemptible   * RCU -- other RCU implementation use other means.   * - * Caller must hold the rcu_state's exp_mutex. + * Caller must hold the specificed rcu_node structure's ->lock   */  static bool sync_rcu_preempt_exp_done(struct rcu_node *rnp)  { +	raw_lockdep_assert_held_rcu_node(rnp); +  	return rnp->exp_tasks == NULL &&  	       READ_ONCE(rnp->expmask) == 0;  }  /* + * Like sync_rcu_preempt_exp_done(), but this function assumes the caller + * doesn't hold the rcu_node's ->lock, and will acquire and release the lock + * itself + */ +static bool sync_rcu_preempt_exp_done_unlocked(struct rcu_node *rnp) +{ +	unsigned long flags; +	bool ret; + +	raw_spin_lock_irqsave_rcu_node(rnp, flags); +	ret = sync_rcu_preempt_exp_done(rnp); +	raw_spin_unlock_irqrestore_rcu_node(rnp, flags); + +	return ret; +} + + +/*   * Report the exit from RCU read-side critical section for the last task   * that queued itself during or before the current expedited preemptible-RCU   * grace period.  This event is reported either to the rcu_node structure on @@ -170,8 +192,7 @@ static bool sync_rcu_preempt_exp_done(struct rcu_node *rnp)   * recursively up the tree.  (Calm down, calm down, we do the recursion   * iteratively!)   * - * Caller must hold the rcu_state's exp_mutex and the specified rcu_node - * structure's ->lock. + * Caller must hold the specified rcu_node structure's ->lock.   */  static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,  				 bool wake, unsigned long flags) @@ -207,8 +228,6 @@ static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,  /*   * Report expedited quiescent state for specified node.  This is a   * lock-acquisition wrapper function for __rcu_report_exp_rnp(). - * - * Caller must hold the rcu_state's exp_mutex.   */  static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp,  					      struct rcu_node *rnp, bool wake) @@ -221,8 +240,7 @@ static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp,  /*   * Report expedited quiescent state for multiple CPUs, all covered by the - * specified leaf rcu_node structure.  Caller must hold the rcu_state's - * exp_mutex. + * specified leaf rcu_node structure.   */  static void rcu_report_exp_cpu_mult(struct rcu_state *rsp, struct rcu_node *rnp,  				    unsigned long mask, bool wake) @@ -248,14 +266,12 @@ static void rcu_report_exp_rdp(struct rcu_state *rsp, struct rcu_data *rdp,  }  /* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */ -static bool sync_exp_work_done(struct rcu_state *rsp, atomic_long_t *stat, -			       unsigned long s) +static bool sync_exp_work_done(struct rcu_state *rsp, unsigned long s)  {  	if (rcu_exp_gp_seq_done(rsp, s)) {  		trace_rcu_exp_grace_period(rsp->name, s, TPS("done"));  		/* Ensure test happens before caller kfree(). */  		smp_mb__before_atomic(); /* ^^^ */ -		atomic_long_inc(stat);  		return true;  	}  	return false; @@ -289,7 +305,7 @@ static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s)  	 * promoting locality and is not strictly needed for correctness.  	 */  	for (; rnp != NULL; rnp = rnp->parent) { -		if (sync_exp_work_done(rsp, &rdp->exp_workdone1, s)) +		if (sync_exp_work_done(rsp, s))  			return true;  		/* Work not done, either wait here or go up. */ @@ -302,8 +318,7 @@ static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s)  						  rnp->grplo, rnp->grphi,  						  TPS("wait"));  			wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3], -				   sync_exp_work_done(rsp, -						      &rdp->exp_workdone2, s)); +				   sync_exp_work_done(rsp, s));  			return true;  		}  		rnp->exp_seq_rq = s; /* Followers can wait on us. */ @@ -313,7 +328,7 @@ static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s)  	}  	mutex_lock(&rsp->exp_mutex);  fastpath: -	if (sync_exp_work_done(rsp, &rdp->exp_workdone3, s)) { +	if (sync_exp_work_done(rsp, s)) {  		mutex_unlock(&rsp->exp_mutex);  		return true;  	} @@ -362,93 +377,129 @@ static void sync_sched_exp_online_cleanup(int cpu)  }  /* - * Select the nodes that the upcoming expedited grace period needs - * to wait for. + * Select the CPUs within the specified rcu_node that the upcoming + * expedited grace period needs to wait for.   */ -static void sync_rcu_exp_select_cpus(struct rcu_state *rsp, -				     smp_call_func_t func) +static void sync_rcu_exp_select_node_cpus(struct work_struct *wp)  {  	int cpu;  	unsigned long flags; +	smp_call_func_t func;  	unsigned long mask_ofl_test;  	unsigned long mask_ofl_ipi;  	int ret; -	struct rcu_node *rnp; - -	trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("reset")); -	sync_exp_reset_tree(rsp); -	trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("select")); -	rcu_for_each_leaf_node(rsp, rnp) { -		raw_spin_lock_irqsave_rcu_node(rnp, flags); +	struct rcu_exp_work *rewp = +		container_of(wp, struct rcu_exp_work, rew_work); +	struct rcu_node *rnp = container_of(rewp, struct rcu_node, rew); +	struct rcu_state *rsp = rewp->rew_rsp; -		/* Each pass checks a CPU for identity, offline, and idle. */ -		mask_ofl_test = 0; -		for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) { -			unsigned long mask = leaf_node_cpu_bit(rnp, cpu); -			struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); -			struct rcu_dynticks *rdtp = per_cpu_ptr(&rcu_dynticks, cpu); -			int snap; +	func = rewp->rew_func; +	raw_spin_lock_irqsave_rcu_node(rnp, flags); -			if (raw_smp_processor_id() == cpu || -			    !(rnp->qsmaskinitnext & mask)) { +	/* Each pass checks a CPU for identity, offline, and idle. */ +	mask_ofl_test = 0; +	for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) { +		unsigned long mask = leaf_node_cpu_bit(rnp, cpu); +		struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); +		struct rcu_dynticks *rdtp = per_cpu_ptr(&rcu_dynticks, cpu); +		int snap; + +		if (raw_smp_processor_id() == cpu || +		    !(rnp->qsmaskinitnext & mask)) { +			mask_ofl_test |= mask; +		} else { +			snap = rcu_dynticks_snap(rdtp); +			if (rcu_dynticks_in_eqs(snap))  				mask_ofl_test |= mask; -			} else { -				snap = rcu_dynticks_snap(rdtp); -				if (rcu_dynticks_in_eqs(snap)) -					mask_ofl_test |= mask; -				else -					rdp->exp_dynticks_snap = snap; -			} +			else +				rdp->exp_dynticks_snap = snap;  		} -		mask_ofl_ipi = rnp->expmask & ~mask_ofl_test; - -		/* -		 * Need to wait for any blocked tasks as well.  Note that -		 * additional blocking tasks will also block the expedited -		 * GP until such time as the ->expmask bits are cleared. -		 */ -		if (rcu_preempt_has_tasks(rnp)) -			rnp->exp_tasks = rnp->blkd_tasks.next; -		raw_spin_unlock_irqrestore_rcu_node(rnp, flags); +	} +	mask_ofl_ipi = rnp->expmask & ~mask_ofl_test; -		/* IPI the remaining CPUs for expedited quiescent state. */ -		for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) { -			unsigned long mask = leaf_node_cpu_bit(rnp, cpu); -			struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); +	/* +	 * Need to wait for any blocked tasks as well.	Note that +	 * additional blocking tasks will also block the expedited GP +	 * until such time as the ->expmask bits are cleared. +	 */ +	if (rcu_preempt_has_tasks(rnp)) +		rnp->exp_tasks = rnp->blkd_tasks.next; +	raw_spin_unlock_irqrestore_rcu_node(rnp, flags); -			if (!(mask_ofl_ipi & mask)) -				continue; +	/* IPI the remaining CPUs for expedited quiescent state. */ +	for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) { +		unsigned long mask = leaf_node_cpu_bit(rnp, cpu); +		struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); + +		if (!(mask_ofl_ipi & mask)) +			continue;  retry_ipi: -			if (rcu_dynticks_in_eqs_since(rdp->dynticks, -						      rdp->exp_dynticks_snap)) { -				mask_ofl_test |= mask; -				continue; -			} -			ret = smp_call_function_single(cpu, func, rsp, 0); -			if (!ret) { -				mask_ofl_ipi &= ~mask; -				continue; -			} -			/* Failed, raced with CPU hotplug operation. */ -			raw_spin_lock_irqsave_rcu_node(rnp, flags); -			if ((rnp->qsmaskinitnext & mask) && -			    (rnp->expmask & mask)) { -				/* Online, so delay for a bit and try again. */ -				raw_spin_unlock_irqrestore_rcu_node(rnp, flags); -				trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("selectofl")); -				schedule_timeout_uninterruptible(1); -				goto retry_ipi; -			} -			/* CPU really is offline, so we can ignore it. */ -			if (!(rnp->expmask & mask)) -				mask_ofl_ipi &= ~mask; +		if (rcu_dynticks_in_eqs_since(rdp->dynticks, +					      rdp->exp_dynticks_snap)) { +			mask_ofl_test |= mask; +			continue; +		} +		ret = smp_call_function_single(cpu, func, rsp, 0); +		if (!ret) { +			mask_ofl_ipi &= ~mask; +			continue; +		} +		/* Failed, raced with CPU hotplug operation. */ +		raw_spin_lock_irqsave_rcu_node(rnp, flags); +		if ((rnp->qsmaskinitnext & mask) && +		    (rnp->expmask & mask)) { +			/* Online, so delay for a bit and try again. */  			raw_spin_unlock_irqrestore_rcu_node(rnp, flags); +			trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("selectofl")); +			schedule_timeout_uninterruptible(1); +			goto retry_ipi; +		} +		/* CPU really is offline, so we can ignore it. */ +		if (!(rnp->expmask & mask)) +			mask_ofl_ipi &= ~mask; +		raw_spin_unlock_irqrestore_rcu_node(rnp, flags); +	} +	/* Report quiescent states for those that went offline. */ +	mask_ofl_test |= mask_ofl_ipi; +	if (mask_ofl_test) +		rcu_report_exp_cpu_mult(rsp, rnp, mask_ofl_test, false); +} + +/* + * Select the nodes that the upcoming expedited grace period needs + * to wait for. + */ +static void sync_rcu_exp_select_cpus(struct rcu_state *rsp, +				     smp_call_func_t func) +{ +	struct rcu_node *rnp; + +	trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("reset")); +	sync_exp_reset_tree(rsp); +	trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("select")); + +	/* Schedule work for each leaf rcu_node structure. */ +	rcu_for_each_leaf_node(rsp, rnp) { +		rnp->exp_need_flush = false; +		if (!READ_ONCE(rnp->expmask)) +			continue; /* Avoid early boot non-existent wq. */ +		rnp->rew.rew_func = func; +		rnp->rew.rew_rsp = rsp; +		if (!READ_ONCE(rcu_par_gp_wq) || +		    rcu_scheduler_active != RCU_SCHEDULER_RUNNING) { +			/* No workqueues yet. */ +			sync_rcu_exp_select_node_cpus(&rnp->rew.rew_work); +			continue;  		} -		/* Report quiescent states for those that went offline. */ -		mask_ofl_test |= mask_ofl_ipi; -		if (mask_ofl_test) -			rcu_report_exp_cpu_mult(rsp, rnp, mask_ofl_test, false); +		INIT_WORK(&rnp->rew.rew_work, sync_rcu_exp_select_node_cpus); +		queue_work_on(rnp->grplo, rcu_par_gp_wq, &rnp->rew.rew_work); +		rnp->exp_need_flush = true;  	} + +	/* Wait for workqueue jobs (if any) to complete. */ +	rcu_for_each_leaf_node(rsp, rnp) +		if (rnp->exp_need_flush) +			flush_work(&rnp->rew.rew_work);  }  static void synchronize_sched_expedited_wait(struct rcu_state *rsp) @@ -469,9 +520,9 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)  	for (;;) {  		ret = swait_event_timeout(  				rsp->expedited_wq, -				sync_rcu_preempt_exp_done(rnp_root), +				sync_rcu_preempt_exp_done_unlocked(rnp_root),  				jiffies_stall); -		if (ret > 0 || sync_rcu_preempt_exp_done(rnp_root)) +		if (ret > 0 || sync_rcu_preempt_exp_done_unlocked(rnp_root))  			return;  		WARN_ON(ret < 0);  /* workqueues should not be signaled. */  		if (rcu_cpu_stall_suppress) @@ -504,7 +555,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)  			rcu_for_each_node_breadth_first(rsp, rnp) {  				if (rnp == rnp_root)  					continue; /* printed unconditionally */ -				if (sync_rcu_preempt_exp_done(rnp)) +				if (sync_rcu_preempt_exp_done_unlocked(rnp))  					continue;  				pr_cont(" l=%u:%d-%d:%#lx/%c",  					rnp->level, rnp->grplo, rnp->grphi, @@ -560,14 +611,6 @@ static void rcu_exp_wait_wake(struct rcu_state *rsp, unsigned long s)  	mutex_unlock(&rsp->exp_wake_mutex);  } -/* Let the workqueue handler know what it is supposed to do. */ -struct rcu_exp_work { -	smp_call_func_t rew_func; -	struct rcu_state *rew_rsp; -	unsigned long rew_s; -	struct work_struct rew_work; -}; -  /*   * Common code to drive an expedited grace period forward, used by   * workqueues and mid-boot-time tasks. @@ -633,7 +676,7 @@ static void _synchronize_rcu_expedited(struct rcu_state *rsp,  	rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());  	rnp = rcu_get_root(rsp);  	wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3], -		   sync_exp_work_done(rsp, &rdp->exp_workdone0, s)); +		   sync_exp_work_done(rsp, s));  	smp_mb(); /* Workqueue actions happen before return. */  	/* Let the next expedited grace period start. */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 84fbee4686d3..7fd12039e512 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -182,7 +182,7 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)  	raw_lockdep_assert_held_rcu_node(rnp);  	WARN_ON_ONCE(rdp->mynode != rnp); -	WARN_ON_ONCE(rnp->level != rcu_num_lvls - 1); +	WARN_ON_ONCE(!rcu_is_leaf_node(rnp));  	/*  	 * Decide where to queue the newly blocked task.  In theory, @@ -384,6 +384,50 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)  }  /* + * Preemptible RCU implementation for rcu_read_lock(). + * Just increment ->rcu_read_lock_nesting, shared state will be updated + * if we block. + */ +void __rcu_read_lock(void) +{ +	current->rcu_read_lock_nesting++; +	barrier();  /* critical section after entry code. */ +} +EXPORT_SYMBOL_GPL(__rcu_read_lock); + +/* + * Preemptible RCU implementation for rcu_read_unlock(). + * Decrement ->rcu_read_lock_nesting.  If the result is zero (outermost + * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then + * invoke rcu_read_unlock_special() to clean up after a context switch + * in an RCU read-side critical section and other special cases. + */ +void __rcu_read_unlock(void) +{ +	struct task_struct *t = current; + +	if (t->rcu_read_lock_nesting != 1) { +		--t->rcu_read_lock_nesting; +	} else { +		barrier();  /* critical section before exit code. */ +		t->rcu_read_lock_nesting = INT_MIN; +		barrier();  /* assign before ->rcu_read_unlock_special load */ +		if (unlikely(READ_ONCE(t->rcu_read_unlock_special.s))) +			rcu_read_unlock_special(t); +		barrier();  /* ->rcu_read_unlock_special load before assign */ +		t->rcu_read_lock_nesting = 0; +	} +#ifdef CONFIG_PROVE_LOCKING +	{ +		int rrln = READ_ONCE(t->rcu_read_lock_nesting); + +		WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2); +	} +#endif /* #ifdef CONFIG_PROVE_LOCKING */ +} +EXPORT_SYMBOL_GPL(__rcu_read_unlock); + +/*   * Advance a ->blkd_tasks-list pointer to the next entry, instead   * returning NULL if at the end of the list.   */ @@ -489,7 +533,7 @@ void rcu_read_unlock_special(struct task_struct *t)  		rnp = t->rcu_blocked_node;  		raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */  		WARN_ON_ONCE(rnp != t->rcu_blocked_node); -		WARN_ON_ONCE(rnp->level != rcu_num_lvls - 1); +		WARN_ON_ONCE(!rcu_is_leaf_node(rnp));  		empty_norm = !rcu_preempt_blocked_readers_cgp(rnp);  		empty_exp = sync_rcu_preempt_exp_done(rnp);  		smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ @@ -685,15 +729,6 @@ static void rcu_preempt_check_callbacks(void)  		t->rcu_read_unlock_special.b.need_qs = true;  } -#ifdef CONFIG_RCU_BOOST - -static void rcu_preempt_do_callbacks(void) -{ -	rcu_do_batch(rcu_state_p, this_cpu_ptr(rcu_data_p)); -} - -#endif /* #ifdef CONFIG_RCU_BOOST */ -  /**   * call_rcu() - Queue an RCU callback for invocation after a grace period.   * @head: structure to be used for queueing the RCU updates. @@ -1140,7 +1175,7 @@ static void rcu_kthread_do_work(void)  {  	rcu_do_batch(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data));  	rcu_do_batch(&rcu_bh_state, this_cpu_ptr(&rcu_bh_data)); -	rcu_preempt_do_callbacks(); +	rcu_do_batch(&rcu_preempt_state, this_cpu_ptr(&rcu_preempt_data));  }  static void rcu_cpu_kthread_setup(unsigned int cpu) @@ -1607,7 +1642,7 @@ static int rcu_oom_notify(struct notifier_block *self,  	for_each_online_cpu(cpu) {  		smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1); -		cond_resched_rcu_qs(); +		cond_resched_tasks_rcu_qs();  	}  	/* Unconditionally decrement: no need to wake ourselves up. */ @@ -1780,19 +1815,6 @@ static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)  	swake_up_all(sq);  } -/* - * Set the root rcu_node structure's ->need_future_gp field - * based on the sum of those of all rcu_node structures.  This does - * double-count the root rcu_node structure's requests, but this - * is necessary to handle the possibility of a rcu_nocb_kthread() - * having awakened during the time that the rcu_node structures - * were being updated for the end of the previous grace period. - */ -static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq) -{ -	rnp->need_future_gp[(rnp->completed + 1) & 0x1] += nrq; -} -  static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)  {  	return &rnp->nocb_gp_wq[rnp->completed & 0x1]; @@ -1966,7 +1988,7 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,  			trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,  					    TPS("WakeOvf"));  		} else { -			wake_nocb_leader_defer(rdp, RCU_NOCB_WAKE, +			wake_nocb_leader_defer(rdp, RCU_NOCB_WAKE_FORCE,  					       TPS("WakeOvfIsDeferred"));  		}  		rdp->qlen_last_fqs_check = LONG_MAX / 2; @@ -2048,7 +2070,8 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)  	struct rcu_node *rnp = rdp->mynode;  	raw_spin_lock_irqsave_rcu_node(rnp, flags); -	needwake = rcu_start_future_gp(rnp, rdp, &c); +	c = rcu_cbs_completed(rdp->rsp, rnp); +	needwake = rcu_start_this_gp(rnp, rdp, c);  	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);  	if (needwake)  		rcu_gp_kthread_wake(rdp->rsp); @@ -2057,7 +2080,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)  	 * Wait for the grace period.  Do so interruptibly to avoid messing  	 * up the load average.  	 */ -	trace_rcu_future_gp(rnp, rdp, c, TPS("StartWait")); +	trace_rcu_this_gp(rnp, rdp, c, TPS("StartWait"));  	for (;;) {  		swait_event_interruptible(  			rnp->nocb_gp_wq[c & 0x1], @@ -2065,9 +2088,9 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)  		if (likely(d))  			break;  		WARN_ON(signal_pending(current)); -		trace_rcu_future_gp(rnp, rdp, c, TPS("ResumeWait")); +		trace_rcu_this_gp(rnp, rdp, c, TPS("ResumeWait"));  	} -	trace_rcu_future_gp(rnp, rdp, c, TPS("EndWait")); +	trace_rcu_this_gp(rnp, rdp, c, TPS("EndWait"));  	smp_mb(); /* Ensure that CB invocation happens after GP end. */  } @@ -2236,7 +2259,7 @@ static int rcu_nocb_kthread(void *arg)  				cl++;  			c++;  			local_bh_enable(); -			cond_resched_rcu_qs(); +			cond_resched_tasks_rcu_qs();  			list = next;  		}  		trace_rcu_batch_end(rdp->rsp->name, c, !!list, 0, 0, 1); @@ -2292,7 +2315,7 @@ static void do_nocb_deferred_wakeup(struct rcu_data *rdp)  void __init rcu_init_nohz(void)  {  	int cpu; -	bool need_rcu_nocb_mask = true; +	bool need_rcu_nocb_mask = false;  	struct rcu_state *rsp;  #if defined(CONFIG_NO_HZ_FULL) @@ -2315,7 +2338,7 @@ void __init rcu_init_nohz(void)  #endif /* #if defined(CONFIG_NO_HZ_FULL) */  	if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) { -		pr_info("\tNote: kernel parameter 'rcu_nocbs=' contains nonexistent CPUs.\n"); +		pr_info("\tNote: kernel parameter 'rcu_nocbs=', 'nohz_full', or 'isolcpus=' contains nonexistent CPUs.\n");  		cpumask_and(rcu_nocb_mask, cpu_possible_mask,  			    rcu_nocb_mask);  	} @@ -2495,10 +2518,6 @@ static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)  {  } -static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq) -{ -} -  static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)  {  	return NULL; @@ -2587,8 +2606,7 @@ static bool rcu_nohz_full_cpu(struct rcu_state *rsp)  }  /* - * Bind the grace-period kthread for the sysidle flavor of RCU to the - * timekeeping CPU. + * Bind the RCU grace-period kthreads to the housekeeping CPU.   */  static void rcu_bind_gp_kthread(void)  { diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 68fa19a5e7bd..4c230a60ece4 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -226,54 +226,6 @@ core_initcall(rcu_set_runtime_mode);  #endif /* #if !defined(CONFIG_TINY_RCU) || defined(CONFIG_SRCU) */ -#ifdef CONFIG_PREEMPT_RCU - -/* - * Preemptible RCU implementation for rcu_read_lock(). - * Just increment ->rcu_read_lock_nesting, shared state will be updated - * if we block. - */ -void __rcu_read_lock(void) -{ -	current->rcu_read_lock_nesting++; -	barrier();  /* critical section after entry code. */ -} -EXPORT_SYMBOL_GPL(__rcu_read_lock); - -/* - * Preemptible RCU implementation for rcu_read_unlock(). - * Decrement ->rcu_read_lock_nesting.  If the result is zero (outermost - * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then - * invoke rcu_read_unlock_special() to clean up after a context switch - * in an RCU read-side critical section and other special cases. - */ -void __rcu_read_unlock(void) -{ -	struct task_struct *t = current; - -	if (t->rcu_read_lock_nesting != 1) { -		--t->rcu_read_lock_nesting; -	} else { -		barrier();  /* critical section before exit code. */ -		t->rcu_read_lock_nesting = INT_MIN; -		barrier();  /* assign before ->rcu_read_unlock_special load */ -		if (unlikely(READ_ONCE(t->rcu_read_unlock_special.s))) -			rcu_read_unlock_special(t); -		barrier();  /* ->rcu_read_unlock_special load before assign */ -		t->rcu_read_lock_nesting = 0; -	} -#ifdef CONFIG_PROVE_LOCKING -	{ -		int rrln = READ_ONCE(t->rcu_read_lock_nesting); - -		WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2); -	} -#endif /* #ifdef CONFIG_PROVE_LOCKING */ -} -EXPORT_SYMBOL_GPL(__rcu_read_unlock); - -#endif /* #ifdef CONFIG_PREEMPT_RCU */ -  #ifdef CONFIG_DEBUG_LOCK_ALLOC  static struct lock_class_key rcu_lock_key;  struct lockdep_map rcu_lock_map = @@ -624,7 +576,7 @@ EXPORT_SYMBOL_GPL(call_rcu_tasks);   * grace period has elapsed, in other words after all currently   * executing rcu-tasks read-side critical sections have elapsed.  These   * read-side critical sections are delimited by calls to schedule(), - * cond_resched_rcu_qs(), idle execution, userspace execution, calls + * cond_resched_tasks_rcu_qs(), idle execution, userspace execution, calls   * to synchronize_rcu_tasks(), and (in theory, anyway) cond_resched().   *   * This is a very specialized primitive, intended only for a few uses in diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 211890edf37e..e27034bd954e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5025,20 +5025,6 @@ int __cond_resched_lock(spinlock_t *lock)  }  EXPORT_SYMBOL(__cond_resched_lock); -int __sched __cond_resched_softirq(void) -{ -	BUG_ON(!in_softirq()); - -	if (should_resched(SOFTIRQ_DISABLE_OFFSET)) { -		local_bh_enable(); -		preempt_schedule_common(); -		local_bh_disable(); -		return 1; -	} -	return 0; -} -EXPORT_SYMBOL(__cond_resched_softirq); -  /**   * yield - yield the current processor to other threads.   * diff --git a/kernel/softirq.c b/kernel/softirq.c index 177de3640c78..03981f1c39ea 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -145,8 +145,7 @@ static void __local_bh_enable(unsigned int cnt)  }  /* - * Special-case - softirqs can safely be enabled in - * cond_resched_softirq(), or by __do_softirq(), + * Special-case - softirqs can safely be enabled by __do_softirq(),   * without processing still-pending softirqs:   */  void _local_bh_enable(void) diff --git a/kernel/torture.c b/kernel/torture.c index 37b94012a3f8..3de1efbecd6a 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -574,7 +574,7 @@ void stutter_wait(const char *title)  {  	int spt; -	cond_resched_rcu_qs(); +	cond_resched_tasks_rcu_qs();  	spt = READ_ONCE(stutter_pause_test);  	for (; spt; spt = READ_ONCE(stutter_pause_test)) {  		if (spt == 1) { diff --git a/kernel/trace/trace_benchmark.c b/kernel/trace/trace_benchmark.c index 22fee766081b..80e0b2aca703 100644 --- a/kernel/trace/trace_benchmark.c +++ b/kernel/trace/trace_benchmark.c @@ -159,13 +159,13 @@ static int benchmark_event_kthread(void *arg)  		 * wants to run, schedule in, but if the CPU is idle,  		 * we'll keep burning cycles.  		 * -		 * Note the _rcu_qs() version of cond_resched() will +		 * Note the tasks_rcu_qs() version of cond_resched() will  		 * notify synchronize_rcu_tasks() that this thread has  		 * passed a quiescent state for rcu_tasks. Otherwise  		 * this thread will never voluntarily schedule which would  		 * block synchronize_rcu_tasks() indefinitely.  		 */ -		cond_resched(); +		cond_resched_tasks_rcu_qs();  	}  	return 0; diff --git a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh new file mode 100755 index 000000000000..98f650c9bf54 --- /dev/null +++ b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh @@ -0,0 +1,56 @@ +#!/bin/sh +# +# Invoke a text editor on all console.log files for all runs with diagnostics, +# that is, on all such files having a console.log.diags counterpart. +# Note that both console.log.diags and console.log are passed to the +# editor (currently defaulting to "vi"), allowing the user to get an +# idea of what to search for in the console.log file. +# +# Usage: kvm-find-errors.sh directory +# +# The "directory" above should end with the date/time directory, for example, +# "tools/testing/selftests/rcutorture/res/2018.02.25-14:27:27". + +rundir="${1}" +if test -z "$rundir" -o ! -d "$rundir" +then +	echo Usage: $0 directory +fi +editor=${EDITOR-vi} + +# Find builds with errors +files= +for i in ${rundir}/*/Make.out +do +	if egrep -q "error:|warning:" < $i +	then +		egrep "error:|warning:" < $i > $i.diags +		files="$files $i.diags $i" +	fi +done +if test -n "$files" +then +	$editor $files +else +	echo No build errors. +fi +if grep -q -e "--buildonly" < ${rundir}/log +then +	echo Build-only run, no console logs to check. +fi + +# Find console logs with errors +files= +for i in ${rundir}/*/console.log +do +	if test -r $i.diags +	then +		files="$files $i.diags $i" +	fi +done +if test -n "$files" +then +	$editor $files +else +	echo No errors in console logs. +fi diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh index c2e1bb6d0cba..477ecb1293ab 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh @@ -34,11 +34,15 @@ fi  configfile=`echo $i | sed -e 's/^.*\///'`  ngps=`grep ver: $i/console.log 2> /dev/null | tail -1 | sed -e 's/^.* ver: //' -e 's/ .*$//'` +stopstate="`grep 'End-test grace-period state: g' $i/console.log 2> /dev/null | +	    tail -1 | sed -e 's/^\[[ 0-9.]*] //' | +	    awk '{ print \"[\" $1 \" \" $5 \" \" $6 \" \" $7 \"]\"; }' | +	    tr -d '\012\015'`"  if test -z "$ngps"  then -	echo "$configfile -------" +	echo "$configfile ------- " $stopstate  else -	title="$configfile ------- $ngps grace periods" +	title="$configfile ------- $ngps GPs"  	dur=`sed -e 's/^.* rcutorture.shutdown_secs=//' -e 's/ .*$//' < $i/qemu-cmd 2> /dev/null`  	if test -z "$dur"  	then @@ -46,9 +50,9 @@ else  	else  		ngpsps=`awk -v ngps=$ngps -v dur=$dur '  			BEGIN { print ngps / dur }' < /dev/null` -		title="$title ($ngpsps per second)" +		title="$title ($ngpsps/s)"  	fi -	echo $title +	echo $title $stopstate  	nclosecalls=`grep --binary-files=text 'torture: Reader Batch' $i/console.log | tail -1 | awk '{for (i=NF-8;i<=NF;i++) sum+=$i; } END {print sum}'`  	if test -z "$nclosecalls"  	then diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh index f7e988f369dd..c27e97824163 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh @@ -48,10 +48,6 @@ do  				cat $i/Make.oldconfig.err  			fi  			parse-build.sh $i/Make.out $configfile -			if test "$TORTURE_SUITE" != rcuperf -			then -				parse-torture.sh $i/console.log $configfile -			fi  			parse-console.sh $i/console.log $configfile  			if test -r $i/Warnings  			then diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh index 5f8fbb0d7c17..c5b0f94341d9 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh @@ -267,5 +267,4 @@ then  	echo Unknown PID, cannot kill qemu command  fi -parse-torture.sh $resdir/console.log $title  parse-console.sh $resdir/console.log $title diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh index 08aa7d50ae0e..17293436f551 100755 --- a/tools/testing/selftests/rcutorture/bin/parse-console.sh +++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh @@ -24,57 +24,146 @@  #  # Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com> +T=${TMPDIR-/tmp}/parse-console.sh.$$  file="$1"  title="$2" +trap 'rm -f $T.seq $T.diags' 0 +  . functions.sh +# Check for presence and readability of console output file +if test -f "$file" -a -r "$file" +then +	: +else +	echo $title unreadable console output file: $file +	exit 1 +fi  if grep -Pq '\x00' < $file  then  	print_warning Console output contains nul bytes, old qemu still running?  fi -egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for' < $file | grep -v 'ODEBUG: ' | grep -v 'Warning: unable to open an initial console' > $1.diags -if test -s $1.diags +cat /dev/null > $file.diags + +# Check for proper termination, except that rcuperf runs don't indicate this. +if test "$TORTURE_SUITE" != rcuperf  then -	print_warning Assertion failure in $file $title -	# cat $1.diags +	# check for abject failure + +	if grep -q FAILURE $file || grep -q -e '-torture.*!!!' $file +	then +		nerrs=`grep --binary-files=text '!!!' $file | +		tail -1 | +		awk ' +		{ +			for (i=NF-8;i<=NF;i++) +				sum+=$i; +		} +		END { print sum }'` +		print_bug $title FAILURE, $nerrs instances +		exit +	fi + +	grep --binary-files=text 'torture:.*ver:' $file | +	egrep --binary-files=text -v '\(null\)|rtc: 000000000* ' | +	sed -e 's/^(initramfs)[^]]*] //' -e 's/^\[[^]]*] //' | +	awk ' +	BEGIN	{ +		ver = 0; +		badseq = 0; +		} + +		{ +		if (!badseq && ($5 + 0 != $5 || $5 <= ver)) { +			badseqno1 = ver; +			badseqno2 = $5; +			badseqnr = NR; +			badseq = 1; +		} +		ver = $5 +		} + +	END	{ +		if (badseq) { +			if (badseqno1 == badseqno2 && badseqno2 == ver) +				print "GP HANG at " ver " torture stat " badseqnr; +			else +				print "BAD SEQ " badseqno1 ":" badseqno2 " last:" ver " version " badseqnr; +		} +		}' > $T.seq + +	if grep -q SUCCESS $file +	then +		if test -s $T.seq +		then +			print_warning $title `cat $T.seq` +			echo "   " $file +			exit 2 +		fi +	else +		if grep -q "_HOTPLUG:" $file +		then +			print_warning HOTPLUG FAILURES $title `cat $T.seq` +			echo "   " $file +			exit 3 +		fi +		echo $title no success message, `grep --binary-files=text 'ver:' $file | wc -l` successful version messages +		if test -s $T.seq +		then +			print_warning $title `cat $T.seq` +		fi +		exit 2 +	fi +fi | tee -a $file.diags + +egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for' < $file | +grep -v 'ODEBUG: ' | +grep -v 'Warning: unable to open an initial console' > $T.diags +if test -s $T.diags +then +	print_warning "Assertion failure in $file $title" +	# cat $T.diags  	summary="" -	n_badness=`grep -c Badness $1` +	n_badness=`grep -c Badness $file`  	if test "$n_badness" -ne 0  	then  		summary="$summary  Badness: $n_badness"  	fi -	n_warn=`grep -v 'Warning: unable to open an initial console' $1 | egrep -c 'WARNING:|Warn'` +	n_warn=`grep -v 'Warning: unable to open an initial console' $file | egrep -c 'WARNING:|Warn'`  	if test "$n_warn" -ne 0  	then  		summary="$summary  Warnings: $n_warn"  	fi -	n_bugs=`egrep -c 'BUG|Oops:' $1` +	n_bugs=`egrep -c 'BUG|Oops:' $file`  	if test "$n_bugs" -ne 0  	then  		summary="$summary  Bugs: $n_bugs"  	fi -	n_calltrace=`grep -c 'Call Trace:' $1` +	n_calltrace=`grep -c 'Call Trace:' $file`  	if test "$n_calltrace" -ne 0  	then  		summary="$summary  Call Traces: $n_calltrace"  	fi -	n_lockdep=`grep -c =========== $1` +	n_lockdep=`grep -c =========== $file`  	if test "$n_badness" -ne 0  	then  		summary="$summary  lockdep: $n_badness"  	fi -	n_stalls=`egrep -c 'detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' $1` +	n_stalls=`egrep -c 'detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' $file`  	if test "$n_stalls" -ne 0  	then  		summary="$summary  Stalls: $n_stalls"  	fi -	n_starves=`grep -c 'rcu_.*kthread starved for' $1` +	n_starves=`grep -c 'rcu_.*kthread starved for' $file`  	if test "$n_starves" -ne 0  	then  		summary="$summary  Starves: $n_starves"  	fi  	print_warning Summary: $summary -else -	rm $1.diags +	cat $T.diags >> $file.diags +fi +if ! test -s $file.diags +then +	rm -f $file.diags  fi diff --git a/tools/testing/selftests/rcutorture/bin/parse-torture.sh b/tools/testing/selftests/rcutorture/bin/parse-torture.sh deleted file mode 100755 index 5987e50cfeb4..000000000000 --- a/tools/testing/selftests/rcutorture/bin/parse-torture.sh +++ /dev/null @@ -1,105 +0,0 @@ -#!/bin/bash -# -# Check the console output from a torture run for goodness. -# The "file" is a pathname on the local system, and "title" is -# a text string for error-message purposes. -# -# The file must contain torture output, but can be interspersed -# with other dmesg text, as in console-log output. -# -# Usage: parse-torture.sh file title -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# Copyright (C) IBM Corporation, 2011 -# -# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com> - -T=${TMPDIR-/tmp}/parse-torture.sh.$$ -file="$1" -title="$2" - -trap 'rm -f $T.seq' 0 - -. functions.sh - -# check for presence of torture output file. - -if test -f "$file" -a -r "$file" -then -	: -else -	echo $title unreadable torture output file: $file -	exit 1 -fi - -# check for abject failure - -if grep -q FAILURE $file || grep -q -e '-torture.*!!!' $file -then -	nerrs=`grep --binary-files=text '!!!' $file | tail -1 | awk '{for (i=NF-8;i<=NF;i++) sum+=$i; } END {print sum}'` -	print_bug $title FAILURE, $nerrs instances -	echo "   " $url -	exit -fi - -grep --binary-files=text 'torture:.*ver:' $file | egrep --binary-files=text -v '\(null\)|rtc: 000000000* ' | sed -e 's/^(initramfs)[^]]*] //' -e 's/^\[[^]]*] //' | -awk ' -BEGIN	{ -	ver = 0; -	badseq = 0; -	} - -	{ -	if (!badseq && ($5 + 0 != $5 || $5 <= ver)) { -		badseqno1 = ver; -		badseqno2 = $5; -		badseqnr = NR; -		badseq = 1; -	} -	ver = $5 -	} - -END	{ -	if (badseq) { -		if (badseqno1 == badseqno2 && badseqno2 == ver) -			print "GP HANG at " ver " torture stat " badseqnr; -		else -			print "BAD SEQ " badseqno1 ":" badseqno2 " last:" ver " version " badseqnr; -	} -	}' > $T.seq - -if grep -q SUCCESS $file -then -	if test -s $T.seq -	then -		print_warning $title $title `cat $T.seq` -		echo "   " $file -		exit 2 -	fi -else -	if grep -q "_HOTPLUG:" $file -	then -		print_warning HOTPLUG FAILURES $title `cat $T.seq` -		echo "   " $file -		exit 3 -	fi -	echo $title no success message, `grep --binary-files=text 'ver:' $file | wc -l` successful version messages -	if test -s $T.seq -	then -		print_warning $title `cat $T.seq` -	fi -	exit 2 -fi | 
