diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/events/uprobes.c | 12 | ||||
-rw-r--r-- | kernel/locking/locktorture.c | 108 | ||||
-rw-r--r-- | kernel/locking/qspinlock.c | 12 | ||||
-rw-r--r-- | kernel/rcu/rcu.h | 27 | ||||
-rw-r--r-- | kernel/rcu/rcuperf.c | 6 | ||||
-rw-r--r-- | kernel/rcu/rcutorture.c | 12 | ||||
-rw-r--r-- | kernel/rcu/tree.c | 350 | ||||
-rw-r--r-- | kernel/rcu/tree.h | 5 | ||||
-rw-r--r-- | kernel/rcu/tree_plugin.h | 2 | ||||
-rw-r--r-- | kernel/sched/core.c | 3 | ||||
-rw-r--r-- | kernel/sched/rt.c | 2 | ||||
-rw-r--r-- | kernel/softirq.c | 2 | ||||
-rw-r--r-- | kernel/torture.c | 39 | ||||
-rw-r--r-- | kernel/trace/trace.c | 11 | ||||
-rw-r--r-- | kernel/trace/trace_benchmark.c | 2 | ||||
-rw-r--r-- | kernel/tracepoint.c | 9 | ||||
-rw-r--r-- | kernel/workqueue.c | 2 |
17 files changed, 234 insertions, 370 deletions
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 267f6ef91d97..ce6848e46e94 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1167,8 +1167,8 @@ static int xol_add_vma(struct mm_struct *mm, struct xol_area *area) } ret = 0; - smp_wmb(); /* pairs with get_xol_area() */ - mm->uprobes_state.xol_area = area; + /* pairs with get_xol_area() */ + smp_store_release(&mm->uprobes_state.xol_area, area); /* ^^^ */ fail: up_write(&mm->mmap_sem); @@ -1230,8 +1230,8 @@ static struct xol_area *get_xol_area(void) if (!mm->uprobes_state.xol_area) __create_xol_area(0); - area = mm->uprobes_state.xol_area; - smp_read_barrier_depends(); /* pairs with wmb in xol_add_vma() */ + /* Pairs with xol_add_vma() smp_store_release() */ + area = READ_ONCE(mm->uprobes_state.xol_area); /* ^^^ */ return area; } @@ -1528,8 +1528,8 @@ static unsigned long get_trampoline_vaddr(void) struct xol_area *area; unsigned long trampoline_vaddr = -1; - area = current->mm->uprobes_state.xol_area; - smp_read_barrier_depends(); + /* Pairs with xol_add_vma() smp_store_release() */ + area = READ_ONCE(current->mm->uprobes_state.xol_area); /* ^^^ */ if (area) trampoline_vaddr = area->vaddr; diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index f24582d4dad3..6850ffd69125 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -77,10 +77,6 @@ struct lock_stress_stats { long n_lock_acquired; }; -int torture_runnable = IS_ENABLED(MODULE); -module_param(torture_runnable, int, 0444); -MODULE_PARM_DESC(torture_runnable, "Start locktorture at module init"); - /* Forward reference. */ static void lock_torture_cleanup(void); @@ -130,10 +126,8 @@ static void torture_lock_busted_write_delay(struct torture_random_state *trsp) if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 2000 * longdelay_ms))) mdelay(longdelay_ms); -#ifdef CONFIG_PREEMPT if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000))) - preempt_schedule(); /* Allow test to be preempted. */ -#endif + torture_preempt_schedule(); /* Allow test to be preempted. */ } static void torture_lock_busted_write_unlock(void) @@ -179,10 +173,8 @@ static void torture_spin_lock_write_delay(struct torture_random_state *trsp) if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 2 * shortdelay_us))) udelay(shortdelay_us); -#ifdef CONFIG_PREEMPT if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000))) - preempt_schedule(); /* Allow test to be preempted. */ -#endif + torture_preempt_schedule(); /* Allow test to be preempted. */ } static void torture_spin_lock_write_unlock(void) __releases(torture_spinlock) @@ -352,10 +344,8 @@ static void torture_mutex_delay(struct torture_random_state *trsp) mdelay(longdelay_ms * 5); else mdelay(longdelay_ms / 5); -#ifdef CONFIG_PREEMPT if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000))) - preempt_schedule(); /* Allow test to be preempted. */ -#endif + torture_preempt_schedule(); /* Allow test to be preempted. */ } static void torture_mutex_unlock(void) __releases(torture_mutex) @@ -507,10 +497,8 @@ static void torture_rtmutex_delay(struct torture_random_state *trsp) if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 2 * shortdelay_us))) udelay(shortdelay_us); -#ifdef CONFIG_PREEMPT if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000))) - preempt_schedule(); /* Allow test to be preempted. */ -#endif + torture_preempt_schedule(); /* Allow test to be preempted. */ } static void torture_rtmutex_unlock(void) __releases(torture_rtmutex) @@ -547,10 +535,8 @@ static void torture_rwsem_write_delay(struct torture_random_state *trsp) mdelay(longdelay_ms * 10); else mdelay(longdelay_ms / 10); -#ifdef CONFIG_PREEMPT if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000))) - preempt_schedule(); /* Allow test to be preempted. */ -#endif + torture_preempt_schedule(); /* Allow test to be preempted. */ } static void torture_rwsem_up_write(void) __releases(torture_rwsem) @@ -570,14 +556,12 @@ static void torture_rwsem_read_delay(struct torture_random_state *trsp) /* We want a long delay occasionally to force massive contention. */ if (!(torture_random(trsp) % - (cxt.nrealwriters_stress * 2000 * longdelay_ms))) + (cxt.nrealreaders_stress * 2000 * longdelay_ms))) mdelay(longdelay_ms * 2); else mdelay(longdelay_ms / 2); -#ifdef CONFIG_PREEMPT if (!(torture_random(trsp) % (cxt.nrealreaders_stress * 20000))) - preempt_schedule(); /* Allow test to be preempted. */ -#endif + torture_preempt_schedule(); /* Allow test to be preempted. */ } static void torture_rwsem_up_read(void) __releases(torture_rwsem) @@ -715,8 +699,7 @@ static void __torture_print_stats(char *page, { bool fail = 0; int i, n_stress; - long max = 0; - long min = statp[0].n_lock_acquired; + long max = 0, min = statp ? statp[0].n_lock_acquired : 0; long long sum = 0; n_stress = write ? cxt.nrealwriters_stress : cxt.nrealreaders_stress; @@ -823,7 +806,7 @@ static void lock_torture_cleanup(void) * such, only perform the underlying torture-specific cleanups, * and avoid anything related to locktorture. */ - if (!cxt.lwsa) + if (!cxt.lwsa && !cxt.lrsa) goto end; if (writer_tasks) { @@ -879,7 +862,7 @@ static int __init lock_torture_init(void) &percpu_rwsem_lock_ops, }; - if (!torture_init_begin(torture_type, verbose, &torture_runnable)) + if (!torture_init_begin(torture_type, verbose)) return -EBUSY; /* Process args and tell the world that the torturer is on the job. */ @@ -898,6 +881,13 @@ static int __init lock_torture_init(void) firsterr = -EINVAL; goto unwind; } + + if (nwriters_stress == 0 && nreaders_stress == 0) { + pr_alert("lock-torture: must run at least one locking thread\n"); + firsterr = -EINVAL; + goto unwind; + } + if (cxt.cur_ops->init) cxt.cur_ops->init(); @@ -921,17 +911,19 @@ static int __init lock_torture_init(void) #endif /* Initialize the statistics so that each run gets its own numbers. */ + if (nwriters_stress) { + lock_is_write_held = 0; + cxt.lwsa = kmalloc(sizeof(*cxt.lwsa) * cxt.nrealwriters_stress, GFP_KERNEL); + if (cxt.lwsa == NULL) { + VERBOSE_TOROUT_STRING("cxt.lwsa: Out of memory"); + firsterr = -ENOMEM; + goto unwind; + } - lock_is_write_held = 0; - cxt.lwsa = kmalloc(sizeof(*cxt.lwsa) * cxt.nrealwriters_stress, GFP_KERNEL); - if (cxt.lwsa == NULL) { - VERBOSE_TOROUT_STRING("cxt.lwsa: Out of memory"); - firsterr = -ENOMEM; - goto unwind; - } - for (i = 0; i < cxt.nrealwriters_stress; i++) { - cxt.lwsa[i].n_lock_fail = 0; - cxt.lwsa[i].n_lock_acquired = 0; + for (i = 0; i < cxt.nrealwriters_stress; i++) { + cxt.lwsa[i].n_lock_fail = 0; + cxt.lwsa[i].n_lock_acquired = 0; + } } if (cxt.cur_ops->readlock) { @@ -948,19 +940,21 @@ static int __init lock_torture_init(void) cxt.nrealreaders_stress = cxt.nrealwriters_stress; } - lock_is_read_held = 0; - cxt.lrsa = kmalloc(sizeof(*cxt.lrsa) * cxt.nrealreaders_stress, GFP_KERNEL); - if (cxt.lrsa == NULL) { - VERBOSE_TOROUT_STRING("cxt.lrsa: Out of memory"); - firsterr = -ENOMEM; - kfree(cxt.lwsa); - cxt.lwsa = NULL; - goto unwind; - } - - for (i = 0; i < cxt.nrealreaders_stress; i++) { - cxt.lrsa[i].n_lock_fail = 0; - cxt.lrsa[i].n_lock_acquired = 0; + if (nreaders_stress) { + lock_is_read_held = 0; + cxt.lrsa = kmalloc(sizeof(*cxt.lrsa) * cxt.nrealreaders_stress, GFP_KERNEL); + if (cxt.lrsa == NULL) { + VERBOSE_TOROUT_STRING("cxt.lrsa: Out of memory"); + firsterr = -ENOMEM; + kfree(cxt.lwsa); + cxt.lwsa = NULL; + goto unwind; + } + + for (i = 0; i < cxt.nrealreaders_stress; i++) { + cxt.lrsa[i].n_lock_fail = 0; + cxt.lrsa[i].n_lock_acquired = 0; + } } } @@ -990,12 +984,14 @@ static int __init lock_torture_init(void) goto unwind; } - writer_tasks = kzalloc(cxt.nrealwriters_stress * sizeof(writer_tasks[0]), - GFP_KERNEL); - if (writer_tasks == NULL) { - VERBOSE_TOROUT_ERRSTRING("writer_tasks: Out of memory"); - firsterr = -ENOMEM; - goto unwind; + if (nwriters_stress) { + writer_tasks = kzalloc(cxt.nrealwriters_stress * sizeof(writer_tasks[0]), + GFP_KERNEL); + if (writer_tasks == NULL) { + VERBOSE_TOROUT_ERRSTRING("writer_tasks: Out of memory"); + firsterr = -ENOMEM; + goto unwind; + } } if (cxt.cur_ops->readlock) { diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 294294c71ba4..38ece035039e 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -170,7 +170,7 @@ static __always_inline void clear_pending_set_locked(struct qspinlock *lock) * @tail : The new queue tail code word * Return: The previous queue tail code word * - * xchg(lock, tail) + * xchg(lock, tail), which heads an address dependency * * p,*,* -> n,*,* ; prev = xchg(lock, node) */ @@ -409,13 +409,11 @@ queue: if (old & _Q_TAIL_MASK) { prev = decode_tail(old); /* - * The above xchg_tail() is also a load of @lock which generates, - * through decode_tail(), a pointer. - * - * The address dependency matches the RELEASE of xchg_tail() - * such that the access to @prev must happen after. + * The above xchg_tail() is also a load of @lock which + * generates, through decode_tail(), a pointer. The address + * dependency matches the RELEASE of xchg_tail() such that + * the subsequent access to @prev happens after. */ - smp_read_barrier_depends(); WRITE_ONCE(prev->next, node); diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 59c471de342a..6334f2c1abd0 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -30,31 +30,8 @@ #define RCU_TRACE(stmt) #endif /* #else #ifdef CONFIG_RCU_TRACE */ -/* - * Process-level increment to ->dynticks_nesting field. This allows for - * architectures that use half-interrupts and half-exceptions from - * process context. - * - * DYNTICK_TASK_NEST_MASK defines a field of width DYNTICK_TASK_NEST_WIDTH - * that counts the number of process-based reasons why RCU cannot - * consider the corresponding CPU to be idle, and DYNTICK_TASK_NEST_VALUE - * is the value used to increment or decrement this field. - * - * The rest of the bits could in principle be used to count interrupts, - * but this would mean that a negative-one value in the interrupt - * field could incorrectly zero out the DYNTICK_TASK_NEST_MASK field. - * We therefore provide a two-bit guard field defined by DYNTICK_TASK_MASK - * that is set to DYNTICK_TASK_FLAG upon initial exit from idle. - * The DYNTICK_TASK_EXIT_IDLE value is thus the combined value used upon - * initial exit from idle. - */ -#define DYNTICK_TASK_NEST_WIDTH 7 -#define DYNTICK_TASK_NEST_VALUE ((LLONG_MAX >> DYNTICK_TASK_NEST_WIDTH) + 1) -#define DYNTICK_TASK_NEST_MASK (LLONG_MAX - DYNTICK_TASK_NEST_VALUE + 1) -#define DYNTICK_TASK_FLAG ((DYNTICK_TASK_NEST_VALUE / 8) * 2) -#define DYNTICK_TASK_MASK ((DYNTICK_TASK_NEST_VALUE / 8) * 3) -#define DYNTICK_TASK_EXIT_IDLE (DYNTICK_TASK_NEST_VALUE + \ - DYNTICK_TASK_FLAG) +/* Offset to allow for unmatched rcu_irq_{enter,exit}(). */ +#define DYNTICK_IRQ_NONIDLE ((LONG_MAX / 2) + 1) /* diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c index 1f87a02c3399..d1ebdf9868bb 100644 --- a/kernel/rcu/rcuperf.c +++ b/kernel/rcu/rcuperf.c @@ -106,10 +106,6 @@ static int rcu_perf_writer_state; #define MAX_MEAS 10000 #define MIN_MEAS 100 -static int perf_runnable = IS_ENABLED(MODULE); -module_param(perf_runnable, int, 0444); -MODULE_PARM_DESC(perf_runnable, "Start rcuperf at boot"); - /* * Operations vector for selecting different types of tests. */ @@ -646,7 +642,7 @@ rcu_perf_init(void) &tasks_ops, }; - if (!torture_init_begin(perf_type, verbose, &perf_runnable)) + if (!torture_init_begin(perf_type, verbose)) return -EBUSY; /* Process args and tell the world that the perf'er is on the job. */ diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 74f6b0146b98..308e6fdbced8 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -187,10 +187,6 @@ static const char *rcu_torture_writer_state_getname(void) return rcu_torture_writer_state_names[i]; } -static int torture_runnable = IS_ENABLED(MODULE); -module_param(torture_runnable, int, 0444); -MODULE_PARM_DESC(torture_runnable, "Start rcutorture at boot"); - #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) #define rcu_can_boost() 1 #else /* #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) */ @@ -315,11 +311,9 @@ static void rcu_read_delay(struct torture_random_state *rrsp) } if (!(torture_random(rrsp) % (nrealreaders * 2 * shortdelay_us))) udelay(shortdelay_us); -#ifdef CONFIG_PREEMPT if (!preempt_count() && - !(torture_random(rrsp) % (nrealreaders * 20000))) - preempt_schedule(); /* No QS if preempt_disable() in effect */ -#endif + !(torture_random(rrsp) % (nrealreaders * 500))) + torture_preempt_schedule(); /* QS only if preemptible. */ } static void rcu_torture_read_unlock(int idx) __releases(RCU) @@ -1731,7 +1725,7 @@ rcu_torture_init(void) &sched_ops, &tasks_ops, }; - if (!torture_init_begin(torture_type, verbose, &torture_runnable)) + if (!torture_init_begin(torture_type, verbose)) return -EBUSY; /* Process args and tell the world that the torturer is on the job. */ diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 1bdc0481aaf1..491bdf39f276 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -265,25 +265,12 @@ void rcu_bh_qs(void) #endif static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { - .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE, + .dynticks_nesting = 1, + .dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE, .dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR), }; /* - * There's a few places, currently just in the tracing infrastructure, - * that uses rcu_irq_enter() to make sure RCU is watching. But there's - * a small location where that will not even work. In those cases - * rcu_irq_enter_disabled() needs to be checked to make sure rcu_irq_enter() - * can be called. - */ -static DEFINE_PER_CPU(bool, disable_rcu_irq_enter); - -bool rcu_irq_enter_disabled(void) -{ - return this_cpu_read(disable_rcu_irq_enter); -} - -/* * Record entry into an extended quiescent state. This is only to be * called when not already in an extended quiescent state. */ @@ -762,68 +749,39 @@ cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp) } /* - * rcu_eqs_enter_common - current CPU is entering an extended quiescent state + * Enter an RCU extended quiescent state, which can be either the + * idle loop or adaptive-tickless usermode execution. * - * Enter idle, doing appropriate accounting. The caller must have - * disabled interrupts. + * We crowbar the ->dynticks_nmi_nesting field to zero to allow for + * the possibility of usermode upcalls having messed up our count + * of interrupt nesting level during the prior busy period. */ -static void rcu_eqs_enter_common(bool user) +static void rcu_eqs_enter(bool user) { struct rcu_state *rsp; struct rcu_data *rdp; - struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); + struct rcu_dynticks *rdtp; - lockdep_assert_irqs_disabled(); - trace_rcu_dyntick(TPS("Start"), rdtp->dynticks_nesting, 0); - if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && - !user && !is_idle_task(current)) { - struct task_struct *idle __maybe_unused = - idle_task(smp_processor_id()); - - trace_rcu_dyntick(TPS("Error on entry: not idle task"), rdtp->dynticks_nesting, 0); - rcu_ftrace_dump(DUMP_ORIG); - WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", - current->pid, current->comm, - idle->pid, idle->comm); /* must be idle task! */ + rdtp = this_cpu_ptr(&rcu_dynticks); + WRITE_ONCE(rdtp->dynticks_nmi_nesting, 0); + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && + rdtp->dynticks_nesting == 0); + if (rdtp->dynticks_nesting != 1) { + rdtp->dynticks_nesting--; + return; } + + lockdep_assert_irqs_disabled(); + trace_rcu_dyntick(TPS("Start"), rdtp->dynticks_nesting, 0, rdtp->dynticks); + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current)); for_each_rcu_flavor(rsp) { rdp = this_cpu_ptr(rsp->rda); do_nocb_deferred_wakeup(rdp); } rcu_prepare_for_idle(); - __this_cpu_inc(disable_rcu_irq_enter); - rdtp->dynticks_nesting = 0; /* Breaks tracing momentarily. */ - rcu_dynticks_eqs_enter(); /* After this, tracing works again. */ - __this_cpu_dec(disable_rcu_irq_enter); + WRITE_ONCE(rdtp->dynticks_nesting, 0); /* Avoid irq-access tearing. */ + rcu_dynticks_eqs_enter(); rcu_dynticks_task_enter(); - - /* - * It is illegal to enter an extended quiescent state while - * in an RCU read-side critical section. - */ - RCU_LOCKDEP_WARN(lock_is_held(&rcu_lock_map), - "Illegal idle entry in RCU read-side critical section."); - RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map), - "Illegal idle entry in RCU-bh read-side critical section."); - RCU_LOCKDEP_WARN(lock_is_held(&rcu_sched_lock_map), - "Illegal idle entry in RCU-sched read-side critical section."); -} - -/* - * Enter an RCU extended quiescent state, which can be either the - * idle loop or adaptive-tickless usermode execution. - */ -static void rcu_eqs_enter(bool user) -{ - struct rcu_dynticks *rdtp; - - rdtp = this_cpu_ptr(&rcu_dynticks); - WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && - (rdtp->dynticks_nesting & DYNTICK_TASK_NEST_MASK) == 0); - if ((rdtp->dynticks_nesting & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE) - rcu_eqs_enter_common(user); - else - rdtp->dynticks_nesting -= DYNTICK_TASK_NEST_VALUE; } /** @@ -834,10 +792,6 @@ static void rcu_eqs_enter(bool user) * critical sections can occur in irq handlers in idle, a possibility * handled by irq_enter() and irq_exit().) * - * We crowbar the ->dynticks_nesting field to zero to allow for - * the possibility of usermode upcalls having messed up our count - * of interrupt nesting level during the prior busy period. - * * If you add or remove a call to rcu_idle_enter(), be sure to test with * CONFIG_RCU_EQS_DEBUG=y. */ @@ -867,6 +821,46 @@ void rcu_user_enter(void) #endif /* CONFIG_NO_HZ_FULL */ /** + * rcu_nmi_exit - inform RCU of exit from NMI context + * + * If we are returning from the outermost NMI handler that interrupted an + * RCU-idle period, update rdtp->dynticks and rdtp->dynticks_nmi_nesting + * to let the RCU grace-period handling know that the CPU is back to + * being RCU-idle. + * + * If you add or remove a call to rcu_nmi_exit(), be sure to test + * with CONFIG_RCU_EQS_DEBUG=y. + */ +void rcu_nmi_exit(void) +{ + struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); + + /* + * Check for ->dynticks_nmi_nesting underflow and bad ->dynticks. + * (We are exiting an NMI handler, so RCU better be paying attention + * to us!) + */ + WARN_ON_ONCE(rdtp->dynticks_nmi_nesting <= 0); + WARN_ON_ONCE(rcu_dynticks_curr_cpu_in_eqs()); + + /* + * If the nesting level is not 1, the CPU wasn't RCU-idle, so + * leave it in non-RCU-idle state. + */ + if (rdtp->dynticks_nmi_nesting != 1) { + trace_rcu_dyntick(TPS("--="), rdtp->dynticks_nmi_nesting, rdtp->dynticks_nmi_nesting - 2, rdtp->dynticks); + WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* No store tearing. */ + rdtp->dynticks_nmi_nesting - 2); + return; + } + + /* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */ + trace_rcu_dyntick(TPS("Startirq"), rdtp->dynticks_nmi_nesting, 0, rdtp->dynticks); + WRITE_ONCE(rdtp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */ + rcu_dynticks_eqs_enter(); +} + +/** * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle * * Exit from an interrupt handler, which might possibly result in entering @@ -875,8 +869,8 @@ void rcu_user_enter(void) * * This code assumes that the idle loop never does anything that might * result in unbalanced calls to irq_enter() and irq_exit(). If your - * architecture violates this assumption, RCU will give you what you - * deserve, good and hard. But very infrequently and irreproducibly. + * architecture's idle loop violates this assumption, RCU will give you what + * you deserve, good and hard. But very infrequently and irreproducibly. * * Use things like work queues to work around this limitation. * @@ -887,23 +881,14 @@ void rcu_user_enter(void) */ void rcu_irq_exit(void) { - struct rcu_dynticks *rdtp; + struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); lockdep_assert_irqs_disabled(); - rdtp = this_cpu_ptr(&rcu_dynticks); - - /* Page faults can happen in NMI handlers, so check... */ - if (rdtp->dynticks_nmi_nesting) - return; - - WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && - rdtp->dynticks_nesting < 1); - if (rdtp->dynticks_nesting <= 1) { - rcu_eqs_enter_common(true); - } else { - trace_rcu_dyntick(TPS("--="), rdtp->dynticks_nesting, rdtp->dynticks_nesting - 1); - rdtp->dynticks_nesting--; - } + if (rdtp->dynticks_nmi_nesting == 1) + rcu_prepare_for_idle(); + rcu_nmi_exit(); + if (rdtp->dynticks_nmi_nesting == 0) + rcu_dynticks_task_enter(); } /* @@ -922,55 +907,33 @@ void rcu_irq_exit_irqson(void) } /* - * rcu_eqs_exit_common - current CPU moving away from extended quiescent state - * - * If the new value of the ->dynticks_nesting counter was previously zero, - * we really have exited idle, and must do the appropriate accounting. - * The caller must have disabled interrupts. - */ -static void rcu_eqs_exit_common(long long oldval, int user) -{ - RCU_TRACE(struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);) - - rcu_dynticks_task_exit(); - rcu_dynticks_eqs_exit(); - rcu_cleanup_after_idle(); - trace_rcu_dyntick(TPS("End"), oldval, rdtp->dynticks_nesting); - if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && - !user && !is_idle_task(current)) { - struct task_struct *idle __maybe_unused = - idle_task(smp_processor_id()); - - trace_rcu_dyntick(TPS("Error on exit: not idle task"), - oldval, rdtp->dynticks_nesting); - rcu_ftrace_dump(DUMP_ORIG); - WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", - current->pid, current->comm, - idle->pid, idle->comm); /* must be idle task! */ - } -} - -/* * Exit an RCU extended quiescent state, which can be either the * idle loop or adaptive-tickless usermode execution. + * + * We crowbar the ->dynticks_nmi_nesting field to DYNTICK_IRQ_NONIDLE to + * allow for the possibility of usermode upcalls messing up our count of + * interrupt nesting level during the busy period that is just now starting. */ static void rcu_eqs_exit(bool user) { struct rcu_dynticks *rdtp; - long long oldval; + long oldval; lockdep_assert_irqs_disabled(); rdtp = this_cpu_ptr(&rcu_dynticks); oldval = rdtp->dynticks_nesting; WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && oldval < 0); - if (oldval & DYNTICK_TASK_NEST_MASK) { - rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE; - } else { - __this_cpu_inc(disable_rcu_irq_enter); - rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; - rcu_eqs_exit_common(oldval, user); - __this_cpu_dec(disable_rcu_irq_enter); + if (oldval) { + rdtp->dynticks_nesting++; + return; } + rcu_dynticks_task_exit(); + rcu_dynticks_eqs_exit(); + rcu_cleanup_after_idle(); + trace_rcu_dyntick(TPS("End"), rdtp->dynticks_nesting, 1, rdtp->dynticks); + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current)); + WRITE_ONCE(rdtp->dynticks_nesting, 1); + WRITE_ONCE(rdtp->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE); } /** @@ -979,11 +942,6 @@ static void rcu_eqs_exit(bool user) * Exit idle mode, in other words, -enter- the mode in which RCU * read-side critical sections can occur. * - * We crowbar the ->dynticks_nesting field to DYNTICK_TASK_NEST to - * allow for the possibility of usermode upcalls messing up our count - * of interrupt nesting level during the busy period that is just - * now starting. - * * If you add or remove a call to rcu_idle_exit(), be sure to test with * CONFIG_RCU_EQS_DEBUG=y. */ @@ -1013,65 +971,6 @@ void rcu_user_exit(void) #endif /* CONFIG_NO_HZ_FULL */ /** - * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle - * - * Enter an interrupt handler, which might possibly result in exiting - * idle mode, in other words, entering the mode in which read-side critical - * sections can occur. The caller must have disabled interrupts. - * - * Note that the Linux kernel is fully capable of entering an interrupt - * handler that it never exits, for example when doing upcalls to - * user mode! This code assumes that the idle loop never does upcalls to - * user mode. If your architecture does do upcalls from the idle loop (or - * does anything else that results in unbalanced calls to the irq_enter() - * and irq_exit() functions), RCU will give you what you deserve, good - * and hard. But very infrequently and irreproducibly. - * - * Use things like work queues to work around this limitation. - * - * You have been warned. - * - * If you add or remove a call to rcu_irq_enter(), be sure to test with - * CONFIG_RCU_EQS_DEBUG=y. - */ -void rcu_irq_enter(void) -{ - struct rcu_dynticks *rdtp; - long long oldval; - - lockdep_assert_irqs_disabled(); - rdtp = this_cpu_ptr(&rcu_dynticks); - - /* Page faults can happen in NMI handlers, so check... */ - if (rdtp->dynticks_nmi_nesting) - return; - - oldval = rdtp->dynticks_nesting; - rdtp->dynticks_nesting++; - WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && - rdtp->dynticks_nesting == 0); - if (oldval) - trace_rcu_dyntick(TPS("++="), oldval, rdtp->dynticks_nesting); - else - rcu_eqs_exit_common(oldval, true); -} - -/* - * Wrapper for rcu_irq_enter() where interrupts are enabled. - * - * If you add or remove a call to rcu_irq_enter_irqson(), be sure to test - * with CONFIG_RCU_EQS_DEBUG=y. - */ -void rcu_irq_enter_irqson(void) -{ - unsigned long flags; - - local_irq_save(flags); - rcu_irq_enter(); - local_irq_restore(flags); -} - -/** * rcu_nmi_enter - inform RCU of entry to NMI context * * If the CPU was idle from RCU's viewpoint, update rdtp->dynticks and @@ -1086,7 +985,7 @@ void rcu_irq_enter_irqson(void) void rcu_nmi_enter(void) { struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); - int incby = 2; + long incby = 2; /* Complain about underflow. */ WARN_ON_ONCE(rdtp->dynticks_nmi_nesting < 0); @@ -1103,45 +1002,61 @@ void rcu_nmi_enter(void) rcu_dynticks_eqs_exit(); incby = 1; } - rdtp->dynticks_nmi_nesting += incby; + trace_rcu_dyntick(incby == 1 ? TPS("Endirq") : TPS("++="), + rdtp->dynticks_nmi_nesting, + rdtp->dynticks_nmi_nesting + incby, rdtp->dynticks); + WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* Prevent store tearing. */ + rdtp->dynticks_nmi_nesting + incby); barrier(); } /** - * rcu_nmi_exit - inform RCU of exit from NMI context + * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle * - * If we are returning from the outermost NMI handler that interrupted an - * RCU-idle period, update rdtp->dynticks and rdtp->dynticks_nmi_nesting - * to let the RCU grace-period handling know that the CPU is back to - * being RCU-idle. + * Enter an interrupt handler, which might possibly result in exiting + * idle mode, in other words, entering the mode in which read-side critical + * sections can occur. The caller must have disabled interrupts. * - * If you add or remove a call to rcu_nmi_exit(), be sure to test - * with CONFIG_RCU_EQS_DEBUG=y. + * Note that the Linux kernel is fully capable of entering an interrupt + * handler that it never exits, for example when doing upcalls to user mode! + * This code assumes that the idle loop never does upcalls to user mode. + * If your architecture's idle loop does do upcalls to user mode (or does + * anything else that results in unbalanced calls to the irq_enter() and + * irq_exit() functions), RCU will give you what you deserve, good and hard. + * But very infrequently and irreproducibly. + * + * Use things like work queues to work around this limitation. + * + * You have been warned. + * + * If you add or remove a call to rcu_irq_enter(), be sure to test with + * CONFIG_RCU_EQS_DEBUG=y. */ -void rcu_nmi_exit(void) +void rcu_irq_enter(void) { struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); - /* - * Check for ->dynticks_nmi_nesting underflow and bad ->dynticks. - * (We are exiting an NMI handler, so RCU better be paying attention - * to us!) - */ - WARN_ON_ONCE(rdtp->dynticks_nmi_nesting <= 0); - WARN_ON_ONCE(rcu_dynticks_curr_cpu_in_eqs()); + lockdep_assert_irqs_disabled(); + if (rdtp->dynticks_nmi_nesting == 0) + rcu_dynticks_task_exit(); + rcu_nmi_enter(); + if (rdtp->dynticks_nmi_nesting == 1) + rcu_cleanup_after_idle(); +} - /* - * If the nesting level is not 1, the CPU wasn't RCU-idle, so - * leave it in non-RCU-idle state. - */ - if (rdtp->dynticks_nmi_nesting != 1) { - rdtp->dynticks_nmi_nesting -= 2; - return; - } +/* + * Wrapper for rcu_irq_enter() where interrupts are enabled. + * + * If you add or remove a call to rcu_irq_enter_irqson(), be sure to test + * with CONFIG_RCU_EQS_DEBUG=y. + */ +void rcu_irq_enter_irqson(void) +{ + unsigned long flags; - /* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */ - rdtp->dynticks_nmi_nesting = 0; - rcu_dynticks_eqs_enter(); + local_irq_save(flags); + rcu_irq_enter(); + local_irq_restore(flags); } /** @@ -1233,7 +1148,8 @@ EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online); */ static int rcu_is_cpu_rrupt_from_idle(void) { - return __this_cpu_read(rcu_dynticks.dynticks_nesting) <= 1; + return __this_cpu_read(rcu_dynticks.dynticks_nesting) <= 0 && + __this_cpu_read(rcu_dynticks.dynticks_nmi_nesting) <= 1; } /* @@ -3728,7 +3644,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) raw_spin_lock_irqsave_rcu_node(rnp, flags); rdp->grpmask = leaf_node_cpu_bit(rdp->mynode, cpu); rdp->dynticks = &per_cpu(rcu_dynticks, cpu); - WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE); + WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != 1); WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp->dynticks))); rdp->cpu = cpu; rdp->rsp = rsp; @@ -3757,7 +3673,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp) if (rcu_segcblist_empty(&rdp->cblist) && /* No early-boot CBs? */ !init_nocb_callback_list(rdp)) rcu_segcblist_init(&rdp->cblist); /* Re-enable callbacks. */ - rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; + rdp->dynticks->dynticks_nesting = 1; /* CPU not up, no tearing. */ rcu_dynticks_eqs_online(); raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */ diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 46a5d1991450..6488a3b0e729 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -38,9 +38,8 @@ * Dynticks per-CPU state. */ struct rcu_dynticks { - long long dynticks_nesting; /* Track irq/process nesting level. */ - /* Process level is worth LLONG_MAX/2. */ - int dynticks_nmi_nesting; /* Track NMI nesting level. */ + long dynticks_nesting; /* Track process nesting level. */ + long dynticks_nmi_nesting; /* Track irq/NMI nesting level. */ atomic_t dynticks; /* Even value for idle, else odd. */ bool rcu_need_heavy_qs; /* GP old, need heavy quiescent state. */ unsigned long rcu_qs_ctr; /* Light universal quiescent state ctr. */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 13a8e08f1998..fb88a028deec 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1686,7 +1686,7 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu) } print_cpu_stall_fast_no_hz(fast_no_hz, cpu); delta = rdp->mynode->gpnum - rdp->rcu_iw_gpnum; - pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%llx/%d softirq=%u/%u fqs=%ld %s\n", + pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%ld/%ld softirq=%u/%u fqs=%ld %s\n", cpu, "O."[!!cpu_online(cpu)], "o."[!!(rdp->grpmask & rdp->mynode->qsmaskinit)], diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 75554f366fd3..c85dfb746f8c 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -508,7 +508,8 @@ void resched_cpu(int cpu) unsigned long flags; raw_spin_lock_irqsave(&rq->lock, flags); - resched_curr(rq); + if (cpu_online(cpu) || cpu == smp_processor_id()) + resched_curr(rq); raw_spin_unlock_irqrestore(&rq->lock, flags); } diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 4056c19ca3f0..f242f642ef53 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -2206,7 +2206,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p) if (p->nr_cpus_allowed > 1 && rq->rt.overloaded) queue_push_tasks(rq); #endif /* CONFIG_SMP */ - if (p->prio < rq->curr->prio) + if (p->prio < rq->curr->prio && cpu_online(cpu_of(rq))) resched_curr(rq); } } diff --git a/kernel/softirq.c b/kernel/softirq.c index 2f5e87f1bae2..24d243ef8e71 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -665,7 +665,7 @@ static void run_ksoftirqd(unsigned int cpu) */ __do_softirq(); local_irq_enable(); - cond_resched_rcu_qs(); + cond_resched(); return; } local_irq_enable(); diff --git a/kernel/torture.c b/kernel/torture.c index 637e172835d8..37b94012a3f8 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -47,6 +47,7 @@ #include <linux/ktime.h> #include <asm/byteorder.h> #include <linux/torture.h> +#include "rcu/rcu.h" MODULE_LICENSE("GPL"); MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com>"); @@ -60,7 +61,6 @@ static bool verbose; #define FULLSTOP_RMMOD 2 /* Normal rmmod of torture. */ static int fullstop = FULLSTOP_RMMOD; static DEFINE_MUTEX(fullstop_mutex); -static int *torture_runnable; #ifdef CONFIG_HOTPLUG_CPU @@ -500,7 +500,7 @@ static int torture_shutdown(void *arg) torture_shutdown_hook(); else VERBOSE_TOROUT_STRING("No torture_shutdown_hook(), skipping."); - ftrace_dump(DUMP_ALL); + rcu_ftrace_dump(DUMP_ALL); kernel_power_off(); /* Shut down the system. */ return 0; } @@ -572,17 +572,19 @@ static int stutter; */ void stutter_wait(const char *title) { + int spt; + cond_resched_rcu_qs(); - while (READ_ONCE(stutter_pause_test) || - (torture_runnable && !READ_ONCE(*torture_runnable))) { - if (stutter_pause_test) - if (READ_ONCE(stutter_pause_test) == 1) - schedule_timeout_interruptible(1); - else - while (READ_ONCE(stutter_pause_test)) - cond_resched(); - else + spt = READ_ONCE(stutter_pause_test); + for (; spt; spt = READ_ONCE(stutter_pause_test)) { + if (spt == 1) { + schedule_timeout_interruptible(1); + } else if (spt == 2) { + while (READ_ONCE(stutter_pause_test)) + cond_resched(); + } else { schedule_timeout_interruptible(round_jiffies_relative(HZ)); + } torture_shutdown_absorb(title); } } @@ -596,17 +598,15 @@ static int torture_stutter(void *arg) { VERBOSE_TOROUT_STRING("torture_stutter task started"); do { - if (!torture_must_stop()) { - if (stutter > 1) { - schedule_timeout_interruptible(stutter - 1); - WRITE_ONCE(stutter_pause_test, 2); - } - schedule_timeout_interruptible(1); + if (!torture_must_stop() && stutter > 1) { WRITE_ONCE(stutter_pause_test, 1); + schedule_timeout_interruptible(stutter - 1); + WRITE_ONCE(stutter_pause_test, 2); + schedule_timeout_interruptible(1); } + WRITE_ONCE(stutter_pause_test, 0); if (!torture_must_stop()) schedule_timeout_interruptible(stutter); - WRITE_ONCE(stutter_pause_test, 0); torture_shutdown_absorb("torture_stutter"); } while (!torture_must_stop()); torture_kthread_stopping("torture_stutter"); @@ -647,7 +647,7 @@ static void torture_stutter_cleanup(void) * The runnable parameter points to a flag that controls whether or not * the test is currently runnable. If there is no such flag, pass in NULL. */ -bool torture_init_begin(char *ttype, bool v, int *runnable) +bool torture_init_begin(char *ttype, bool v) { mutex_lock(&fullstop_mutex); if (torture_type != NULL) { @@ -659,7 +659,6 @@ bool torture_init_begin(char *ttype, bool v, int *runnable) } torture_type = ttype; verbose = v; - torture_runnable = runnable; fullstop = FULLSTOP_DONTSTOP; return true; } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 73e67b68c53b..dbce1be3bab8 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2682,17 +2682,6 @@ void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, if (unlikely(in_nmi())) return; - /* - * It is possible that a function is being traced in a - * location that RCU is not watching. A call to - * rcu_irq_enter() will make sure that it is, but there's - * a few internal rcu functions that could be traced - * where that wont work either. In those cases, we just - * do nothing. - */ - if (unlikely(rcu_irq_enter_disabled())) - return; - rcu_irq_enter_irqson(); __ftrace_trace_stack(buffer, flags, skip, pc, NULL); rcu_irq_exit_irqson(); diff --git a/kernel/trace/trace_benchmark.c b/kernel/trace/trace_benchmark.c index 79f838a75077..22fee766081b 100644 --- a/kernel/trace/trace_benchmark.c +++ b/kernel/trace/trace_benchmark.c @@ -165,7 +165,7 @@ static int benchmark_event_kthread(void *arg) * this thread will never voluntarily schedule which would * block synchronize_rcu_tasks() indefinitely. */ - cond_resched_rcu_qs(); + cond_resched(); } return 0; diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 685c50ae6300..671b13457387 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -212,11 +212,10 @@ static int tracepoint_add_func(struct tracepoint *tp, } /* - * rcu_assign_pointer has a smp_wmb() which makes sure that the new - * probe callbacks array is consistent before setting a pointer to it. - * This array is referenced by __DO_TRACE from - * include/linux/tracepoints.h. A matching smp_read_barrier_depends() - * is used. + * rcu_assign_pointer has as smp_store_release() which makes sure + * that the new probe callbacks array is consistent before setting + * a pointer to it. This array is referenced by __DO_TRACE from + * include/linux/tracepoint.h using rcu_dereference_sched(). */ rcu_assign_pointer(tp->funcs, tp_funcs); if (!static_key_enabled(&tp->key)) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 8fdb710bfdd7..aee7eaab05cb 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2135,7 +2135,7 @@ __acquires(&pool->lock) * stop_machine. At the same time, report a quiescent RCU state so * the same condition doesn't freeze RCU. */ - cond_resched_rcu_qs(); + cond_resched(); spin_lock_irq(&pool->lock); |