summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/entry/common.c4
-rw-r--r--kernel/events/core.c12
-rw-r--r--kernel/fork.c10
-rw-r--r--kernel/futex.c20
-rw-r--r--kernel/hung_task.c3
-rw-r--r--kernel/irq/Kconfig1
-rw-r--r--kernel/kprobes.c25
-rw-r--r--kernel/kthread.c3
-rw-r--r--kernel/locking/lockdep.c20
-rw-r--r--kernel/params.c2
-rw-r--r--kernel/power/process.c2
-rw-r--r--kernel/printk/printk_ringbuffer.c2
-rw-r--r--kernel/rcu/tree.c2
-rw-r--r--kernel/sched/cpufreq_schedutil.c18
-rw-r--r--kernel/signal.c19
-rw-r--r--kernel/stop_machine.c2
-rw-r--r--kernel/time/hrtimer.c5
-rw-r--r--kernel/time/itimer.c4
-rw-r--r--kernel/time/sched_clock.c4
-rw-r--r--kernel/time/timer.c5
-rw-r--r--kernel/trace/ring_buffer.c58
-rw-r--r--kernel/trace/trace.c6
-rw-r--r--kernel/trace/trace.h26
-rw-r--r--kernel/trace/trace_events_synth.c53
-rw-r--r--kernel/trace/trace_selftest.c9
-rw-r--r--kernel/tracepoint.c2
26 files changed, 196 insertions, 121 deletions
diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index 2b8366693d5c..e9e2df3f3f9e 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -337,10 +337,10 @@ noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)
* already contains a warning when RCU is not watching, so no point
* in having another one here.
*/
+ lockdep_hardirqs_off(CALLER_ADDR0);
instrumentation_begin();
rcu_irq_enter_check_tick();
- /* Use the combo lockdep/tracing function */
- trace_hardirqs_off();
+ trace_hardirqs_off_finish();
instrumentation_end();
return ret;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index da467e1dd49a..5a29ab09e72d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -10085,6 +10085,7 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr,
if (token == IF_SRC_FILE || token == IF_SRC_FILEADDR) {
int fpos = token == IF_SRC_FILE ? 2 : 1;
+ kfree(filename);
filename = match_strdup(&args[fpos]);
if (!filename) {
ret = -ENOMEM;
@@ -10131,16 +10132,13 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr,
*/
ret = -EOPNOTSUPP;
if (!event->ctx->task)
- goto fail_free_name;
+ goto fail;
/* look up the path and grab its inode */
ret = kern_path(filename, LOOKUP_FOLLOW,
&filter->path);
if (ret)
- goto fail_free_name;
-
- kfree(filename);
- filename = NULL;
+ goto fail;
ret = -EINVAL;
if (!filter->path.dentry ||
@@ -10160,13 +10158,13 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr,
if (state != IF_STATE_ACTION)
goto fail;
+ kfree(filename);
kfree(orig);
return 0;
-fail_free_name:
- kfree(filename);
fail:
+ kfree(filename);
free_filters_list(filters);
kfree(orig);
diff --git a/kernel/fork.c b/kernel/fork.c
index 32083db7a2a2..6d266388d380 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2167,14 +2167,9 @@ static __latent_entropy struct task_struct *copy_process(
/* ok, now we should be set up.. */
p->pid = pid_nr(pid);
if (clone_flags & CLONE_THREAD) {
- p->exit_signal = -1;
p->group_leader = current->group_leader;
p->tgid = current->tgid;
} else {
- if (clone_flags & CLONE_PARENT)
- p->exit_signal = current->group_leader->exit_signal;
- else
- p->exit_signal = args->exit_signal;
p->group_leader = p;
p->tgid = p->pid;
}
@@ -2218,9 +2213,14 @@ static __latent_entropy struct task_struct *copy_process(
if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
p->real_parent = current->real_parent;
p->parent_exec_id = current->parent_exec_id;
+ if (clone_flags & CLONE_THREAD)
+ p->exit_signal = -1;
+ else
+ p->exit_signal = current->group_leader->exit_signal;
} else {
p->real_parent = current;
p->parent_exec_id = current->self_exec_id;
+ p->exit_signal = args->exit_signal;
}
klp_copy_process(p);
diff --git a/kernel/futex.c b/kernel/futex.c
index be68ac0d49ad..ac328874f6e5 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1503,8 +1503,10 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_
*/
newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
- if (unlikely(should_fail_futex(true)))
+ if (unlikely(should_fail_futex(true))) {
ret = -EFAULT;
+ goto out_unlock;
+ }
ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval);
if (!ret && (curval != uval)) {
@@ -2378,10 +2380,22 @@ retry:
}
/*
- * Since we just failed the trylock; there must be an owner.
+ * The trylock just failed, so either there is an owner or
+ * there is a higher priority waiter than this one.
*/
newowner = rt_mutex_owner(&pi_state->pi_mutex);
- BUG_ON(!newowner);
+ /*
+ * If the higher priority waiter has not yet taken over the
+ * rtmutex then newowner is NULL. We can't return here with
+ * that state because it's inconsistent vs. the user space
+ * state. So drop the locks and try again. It's a valid
+ * situation and not any different from the other retry
+ * conditions.
+ */
+ if (unlikely(!newowner)) {
+ err = -EAGAIN;
+ goto handle_err;
+ }
} else {
WARN_ON_ONCE(argowner != current);
if (oldowner == current) {
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index ce76f490126c..396ebaebea3f 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -225,8 +225,7 @@ static long hung_timeout_jiffies(unsigned long last_checked,
* Process updating of timeout sysctl
*/
int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
- void __user *buffer,
- size_t *lenp, loff_t *ppos)
+ void *buffer, size_t *lenp, loff_t *ppos)
{
int ret;
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index 10a5aff4eecc..164a031cfdb6 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -82,6 +82,7 @@ config IRQ_FASTEOI_HIERARCHY_HANDLERS
# Generic IRQ IPI support
config GENERIC_IRQ_IPI
bool
+ select IRQ_DOMAIN_HIERARCHY
# Generic MSI interrupt support
config GENERIC_MSI_IRQ
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 8a12a25fa40d..41fdbb7953c6 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1249,7 +1249,13 @@ __acquires(hlist_lock)
*head = &kretprobe_inst_table[hash];
hlist_lock = kretprobe_table_lock_ptr(hash);
- raw_spin_lock_irqsave(hlist_lock, *flags);
+ /*
+ * Nested is a workaround that will soon not be needed.
+ * There's other protections that make sure the same lock
+ * is not taken on the same CPU that lockdep is unaware of.
+ * Differentiate when it is taken in NMI context.
+ */
+ raw_spin_lock_irqsave_nested(hlist_lock, *flags, !!in_nmi());
}
NOKPROBE_SYMBOL(kretprobe_hash_lock);
@@ -1258,7 +1264,13 @@ static void kretprobe_table_lock(unsigned long hash,
__acquires(hlist_lock)
{
raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
- raw_spin_lock_irqsave(hlist_lock, *flags);
+ /*
+ * Nested is a workaround that will soon not be needed.
+ * There's other protections that make sure the same lock
+ * is not taken on the same CPU that lockdep is unaware of.
+ * Differentiate when it is taken in NMI context.
+ */
+ raw_spin_lock_irqsave_nested(hlist_lock, *flags, !!in_nmi());
}
NOKPROBE_SYMBOL(kretprobe_table_lock);
@@ -2028,7 +2040,12 @@ static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
/* TODO: consider to only swap the RA after the last pre_handler fired */
hash = hash_ptr(current, KPROBE_HASH_BITS);
- raw_spin_lock_irqsave(&rp->lock, flags);
+ /*
+ * Nested is a workaround that will soon not be needed.
+ * There's other protections that make sure the same lock
+ * is not taken on the same CPU that lockdep is unaware of.
+ */
+ raw_spin_lock_irqsave_nested(&rp->lock, flags, 1);
if (!hlist_empty(&rp->free_instances)) {
ri = hlist_entry(rp->free_instances.first,
struct kretprobe_instance, hlist);
@@ -2039,7 +2056,7 @@ static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
ri->task = current;
if (rp->entry_handler && rp->entry_handler(ri, regs)) {
- raw_spin_lock_irqsave(&rp->lock, flags);
+ raw_spin_lock_irqsave_nested(&rp->lock, flags, 1);
hlist_add_head(&ri->hlist, &rp->free_instances);
raw_spin_unlock_irqrestore(&rp->lock, flags);
return 0;
diff --git a/kernel/kthread.c b/kernel/kthread.c
index e29773c82b70..933a625621b8 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -897,7 +897,8 @@ void kthread_delayed_work_timer_fn(struct timer_list *t)
/* Move the work from worker->delayed_work_list. */
WARN_ON_ONCE(list_empty(&work->node));
list_del_init(&work->node);
- kthread_insert_work(worker, work, &worker->work_list);
+ if (!work->canceling)
+ kthread_insert_work(worker, work, &worker->work_list);
raw_spin_unlock_irqrestore(&worker->lock, flags);
}
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 3e99dfef8408..b71ad8d9f1c9 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -84,7 +84,7 @@ static inline bool lockdep_enabled(void)
if (!debug_locks)
return false;
- if (raw_cpu_read(lockdep_recursion))
+ if (this_cpu_read(lockdep_recursion))
return false;
if (current->lockdep_recursion)
@@ -4057,7 +4057,7 @@ void lockdep_hardirqs_on_prepare(unsigned long ip)
if (unlikely(in_nmi()))
return;
- if (unlikely(__this_cpu_read(lockdep_recursion)))
+ if (unlikely(this_cpu_read(lockdep_recursion)))
return;
if (unlikely(lockdep_hardirqs_enabled())) {
@@ -4126,7 +4126,7 @@ void noinstr lockdep_hardirqs_on(unsigned long ip)
goto skip_checks;
}
- if (unlikely(__this_cpu_read(lockdep_recursion)))
+ if (unlikely(this_cpu_read(lockdep_recursion)))
return;
if (lockdep_hardirqs_enabled()) {
@@ -4396,6 +4396,9 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
if (unlikely(hlock_class(this)->usage_mask & new_mask))
goto unlock;
+ if (!hlock_class(this)->usage_mask)
+ debug_atomic_dec(nr_unused_locks);
+
hlock_class(this)->usage_mask |= new_mask;
if (new_bit < LOCK_TRACE_STATES) {
@@ -4403,19 +4406,10 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
return 0;
}
- switch (new_bit) {
- case 0 ... LOCK_USED-1:
+ if (new_bit < LOCK_USED) {
ret = mark_lock_irq(curr, this, new_bit);
if (!ret)
return 0;
- break;
-
- case LOCK_USED:
- debug_atomic_dec(nr_unused_locks);
- break;
-
- default:
- break;
}
unlock:
diff --git a/kernel/params.c b/kernel/params.c
index 3835fb82c64b..164d79330849 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -530,7 +530,7 @@ struct module_param_attrs
{
unsigned int num;
struct attribute_group grp;
- struct param_attribute attrs[0];
+ struct param_attribute attrs[];
};
#ifdef CONFIG_SYSFS
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 4b6a54da7e65..45b054b7b5ec 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -146,7 +146,7 @@ int freeze_processes(void)
BUG_ON(in_atomic());
/*
- * Now that the whole userspace is frozen we need to disbale
+ * Now that the whole userspace is frozen we need to disable
* the OOM killer to disallow any further interference with
* killable tasks. There is no guarantee oom victims will
* ever reach a point they go away we have to wait with a timeout.
diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c
index 24a960a89aa8..6b1525685277 100644
--- a/kernel/printk/printk_ringbuffer.c
+++ b/kernel/printk/printk_ringbuffer.c
@@ -345,7 +345,7 @@ DESC_ID((id) - DESCS_COUNT(desc_ring))
*/
struct prb_data_block {
unsigned long id;
- char data[0];
+ char data[];
};
/*
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 06895ef85d69..2a52f42f64b6 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -409,7 +409,7 @@ bool rcu_eqs_special_set(int cpu)
*
* The caller must have disabled interrupts and must not be idle.
*/
-void rcu_momentary_dyntick_idle(void)
+notrace void rcu_momentary_dyntick_idle(void)
{
int special;
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index e254745a82cb..d73bccde2720 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -102,8 +102,12 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time)
static bool sugov_update_next_freq(struct sugov_policy *sg_policy, u64 time,
unsigned int next_freq)
{
- if (sg_policy->next_freq == next_freq)
- return false;
+ if (!sg_policy->need_freq_update) {
+ if (sg_policy->next_freq == next_freq)
+ return false;
+ } else {
+ sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS);
+ }
sg_policy->next_freq = next_freq;
sg_policy->last_freq_update_time = time;
@@ -164,7 +168,6 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy,
if (freq == sg_policy->cached_raw_freq && !sg_policy->need_freq_update)
return sg_policy->next_freq;
- sg_policy->need_freq_update = false;
sg_policy->cached_raw_freq = freq;
return cpufreq_driver_resolve_freq(policy, freq);
}
@@ -440,7 +443,6 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
struct sugov_policy *sg_policy = sg_cpu->sg_policy;
unsigned long util, max;
unsigned int next_f;
- bool busy;
unsigned int cached_freq = sg_policy->cached_raw_freq;
sugov_iowait_boost(sg_cpu, time, flags);
@@ -451,9 +453,6 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
if (!sugov_should_update_freq(sg_policy, time))
return;
- /* Limits may have changed, don't skip frequency update */
- busy = !sg_policy->need_freq_update && sugov_cpu_is_busy(sg_cpu);
-
util = sugov_get_util(sg_cpu);
max = sg_cpu->max;
util = sugov_iowait_apply(sg_cpu, time, util, max);
@@ -462,7 +461,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
* Do not reduce the frequency if the CPU has not been idle
* recently, as the reduction is likely to be premature then.
*/
- if (busy && next_f < sg_policy->next_freq) {
+ if (sugov_cpu_is_busy(sg_cpu) && next_f < sg_policy->next_freq) {
next_f = sg_policy->next_freq;
/* Restore cached freq as next_freq has changed */
@@ -827,9 +826,10 @@ static int sugov_start(struct cpufreq_policy *policy)
sg_policy->next_freq = 0;
sg_policy->work_in_progress = false;
sg_policy->limits_changed = false;
- sg_policy->need_freq_update = false;
sg_policy->cached_raw_freq = 0;
+ sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS);
+
for_each_cpu(cpu, policy->cpus) {
struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
diff --git a/kernel/signal.c b/kernel/signal.c
index a38b3edc6851..ef8f2a28d37c 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -391,16 +391,17 @@ static bool task_participate_group_stop(struct task_struct *task)
void task_join_group_stop(struct task_struct *task)
{
+ unsigned long mask = current->jobctl & JOBCTL_STOP_SIGMASK;
+ struct signal_struct *sig = current->signal;
+
+ if (sig->group_stop_count) {
+ sig->group_stop_count++;
+ mask |= JOBCTL_STOP_CONSUME;
+ } else if (!(sig->flags & SIGNAL_STOP_STOPPED))
+ return;
+
/* Have the new thread join an on-going signal group stop */
- unsigned long jobctl = current->jobctl;
- if (jobctl & JOBCTL_STOP_PENDING) {
- struct signal_struct *sig = current->signal;
- unsigned long signr = jobctl & JOBCTL_STOP_SIGMASK;
- unsigned long gstop = JOBCTL_STOP_PENDING | JOBCTL_STOP_CONSUME;
- if (task_set_jobctl_pending(task, signr | gstop)) {
- sig->group_stop_count++;
- }
- }
+ task_set_jobctl_pending(task, mask | JOBCTL_STOP_PENDING);
}
/*
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 865bb0228ab6..890b79cf0e7c 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -178,7 +178,7 @@ static void ack_state(struct multi_stop_data *msdata)
set_state(msdata, msdata->state + 1);
}
-void __weak stop_machine_yield(const struct cpumask *cpumask)
+notrace void __weak stop_machine_yield(const struct cpumask *cpumask)
{
cpu_relax();
}
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 3624b9b5835d..387b4bef7dd1 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -425,11 +425,6 @@ static inline void debug_hrtimer_deactivate(struct hrtimer *timer)
debug_object_deactivate(timer, &hrtimer_debug_descr);
}
-static inline void debug_hrtimer_free(struct hrtimer *timer)
-{
- debug_object_free(timer, &hrtimer_debug_descr);
-}
-
static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
enum hrtimer_mode mode);
diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c
index ca4e6d57d68b..00629e658ca1 100644
--- a/kernel/time/itimer.c
+++ b/kernel/time/itimer.c
@@ -172,10 +172,6 @@ static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
u64 oval, nval, ointerval, ninterval;
struct cpu_itimer *it = &tsk->signal->it[clock_id];
- /*
- * Use the to_ktime conversion because that clamps the maximum
- * value to KTIME_MAX and avoid multiplication overflows.
- */
nval = timespec64_to_ns(&value->it_value);
ninterval = timespec64_to_ns(&value->it_interval);
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index 0642013dace4..b1b9b12899f5 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -68,13 +68,13 @@ static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
return (cyc * mult) >> shift;
}
-struct clock_read_data *sched_clock_read_begin(unsigned int *seq)
+notrace struct clock_read_data *sched_clock_read_begin(unsigned int *seq)
{
*seq = raw_read_seqcount_latch(&cd.seq);
return cd.read_data + (*seq & 1);
}
-int sched_clock_read_retry(unsigned int seq)
+notrace int sched_clock_read_retry(unsigned int seq)
{
return read_seqcount_latch_retry(&cd.seq, seq);
}
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index de37e33a868d..c3ad64fb9d8b 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -732,11 +732,6 @@ static inline void debug_timer_deactivate(struct timer_list *timer)
debug_object_deactivate(timer, &timer_debug_descr);
}
-static inline void debug_timer_free(struct timer_list *timer)
-{
- debug_object_free(timer, &timer_debug_descr);
-}
-
static inline void debug_timer_assert_init(struct timer_list *timer)
{
debug_object_assert_init(timer, &timer_debug_descr);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 7f45fd9d5a45..dc83b3fa9fe7 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -438,14 +438,16 @@ enum {
};
/*
* Used for which event context the event is in.
- * NMI = 0
- * IRQ = 1
- * SOFTIRQ = 2
- * NORMAL = 3
+ * TRANSITION = 0
+ * NMI = 1
+ * IRQ = 2
+ * SOFTIRQ = 3
+ * NORMAL = 4
*
* See trace_recursive_lock() comment below for more details.
*/
enum {
+ RB_CTX_TRANSITION,
RB_CTX_NMI,
RB_CTX_IRQ,
RB_CTX_SOFTIRQ,
@@ -3014,10 +3016,10 @@ rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
* a bit of overhead in something as critical as function tracing,
* we use a bitmask trick.
*
- * bit 0 = NMI context
- * bit 1 = IRQ context
- * bit 2 = SoftIRQ context
- * bit 3 = normal context.
+ * bit 1 = NMI context
+ * bit 2 = IRQ context
+ * bit 3 = SoftIRQ context
+ * bit 4 = normal context.
*
* This works because this is the order of contexts that can
* preempt other contexts. A SoftIRQ never preempts an IRQ
@@ -3040,6 +3042,30 @@ rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
* The least significant bit can be cleared this way, and it
* just so happens that it is the same bit corresponding to
* the current context.
+ *
+ * Now the TRANSITION bit breaks the above slightly. The TRANSITION bit
+ * is set when a recursion is detected at the current context, and if
+ * the TRANSITION bit is already set, it will fail the recursion.
+ * This is needed because there's a lag between the changing of
+ * interrupt context and updating the preempt count. In this case,
+ * a false positive will be found. To handle this, one extra recursion
+ * is allowed, and this is done by the TRANSITION bit. If the TRANSITION
+ * bit is already set, then it is considered a recursion and the function
+ * ends. Otherwise, the TRANSITION bit is set, and that bit is returned.
+ *
+ * On the trace_recursive_unlock(), the TRANSITION bit will be the first
+ * to be cleared. Even if it wasn't the context that set it. That is,
+ * if an interrupt comes in while NORMAL bit is set and the ring buffer
+ * is called before preempt_count() is updated, since the check will
+ * be on the NORMAL bit, the TRANSITION bit will then be set. If an
+ * NMI then comes in, it will set the NMI bit, but when the NMI code
+ * does the trace_recursive_unlock() it will clear the TRANSTION bit
+ * and leave the NMI bit set. But this is fine, because the interrupt
+ * code that set the TRANSITION bit will then clear the NMI bit when it
+ * calls trace_recursive_unlock(). If another NMI comes in, it will
+ * set the TRANSITION bit and continue.
+ *
+ * Note: The TRANSITION bit only handles a single transition between context.
*/
static __always_inline int
@@ -3055,8 +3081,16 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
bit = pc & NMI_MASK ? RB_CTX_NMI :
pc & HARDIRQ_MASK ? RB_CTX_IRQ : RB_CTX_SOFTIRQ;
- if (unlikely(val & (1 << (bit + cpu_buffer->nest))))
- return 1;
+ if (unlikely(val & (1 << (bit + cpu_buffer->nest)))) {
+ /*
+ * It is possible that this was called by transitioning
+ * between interrupt context, and preempt_count() has not
+ * been updated yet. In this case, use the TRANSITION bit.
+ */
+ bit = RB_CTX_TRANSITION;
+ if (val & (1 << (bit + cpu_buffer->nest)))
+ return 1;
+ }
val |= (1 << (bit + cpu_buffer->nest));
cpu_buffer->current_context = val;
@@ -3071,8 +3105,8 @@ trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer)
cpu_buffer->current_context - (1 << cpu_buffer->nest);
}
-/* The recursive locking above uses 4 bits */
-#define NESTED_BITS 4
+/* The recursive locking above uses 5 bits */
+#define NESTED_BITS 5
/**
* ring_buffer_nest_start - Allow to trace while nested
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 528971714fc6..410cfeb16db5 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2750,7 +2750,7 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
/*
* If tracing is off, but we have triggers enabled
* we still need to look at the event data. Use the temp_buffer
- * to store the trace event for the tigger to use. It's recusive
+ * to store the trace event for the trigger to use. It's recursive
* safe and will not be recorded anywhere.
*/
if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
@@ -2952,7 +2952,7 @@ static void __ftrace_trace_stack(struct trace_buffer *buffer,
stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
/* This should never happen. If it does, yell once and skip */
- if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
+ if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
goto out;
/*
@@ -3132,7 +3132,7 @@ static char *get_trace_buf(void)
/* Interrupts must see nesting incremented before we use the buffer */
barrier();
- return &buffer->buffer[buffer->nesting][0];
+ return &buffer->buffer[buffer->nesting - 1][0];
}
static void put_trace_buf(void)
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index f3f5e77123ad..1dadef445cd1 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -637,6 +637,12 @@ enum {
* function is called to clear it.
*/
TRACE_GRAPH_NOTRACE_BIT,
+
+ /*
+ * When transitioning between context, the preempt_count() may
+ * not be correct. Allow for a single recursion to cover this case.
+ */
+ TRACE_TRANSITION_BIT,
};
#define trace_recursion_set(bit) do { (current)->trace_recursion |= (1<<(bit)); } while (0)
@@ -691,14 +697,27 @@ static __always_inline int trace_test_and_set_recursion(int start, int max)
return 0;
bit = trace_get_context_bit() + start;
- if (unlikely(val & (1 << bit)))
- return -1;
+ if (unlikely(val & (1 << bit))) {
+ /*
+ * It could be that preempt_count has not been updated during
+ * a switch between contexts. Allow for a single recursion.
+ */
+ bit = TRACE_TRANSITION_BIT;
+ if (trace_recursion_test(bit))
+ return -1;
+ trace_recursion_set(bit);
+ barrier();
+ return bit + 1;
+ }
+
+ /* Normal check passed, clear the transition to allow it again */
+ trace_recursion_clear(TRACE_TRANSITION_BIT);
val |= 1 << bit;
current->trace_recursion = val;
barrier();
- return bit;
+ return bit + 1;
}
static __always_inline void trace_clear_recursion(int bit)
@@ -708,6 +727,7 @@ static __always_inline void trace_clear_recursion(int bit)
if (!bit)
return;
+ bit--;
bit = 1 << bit;
val &= ~bit;
diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index 3212e2c653b3..881df991742a 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -584,7 +584,8 @@ static struct synth_field *parse_synth_field(int argc, const char **argv,
{
struct synth_field *field;
const char *prefix = NULL, *field_type = argv[0], *field_name, *array;
- int len, ret = 0;
+ int len, ret = -ENOMEM;
+ struct seq_buf s;
ssize_t size;
if (field_type[0] == ';')
@@ -616,10 +617,9 @@ static struct synth_field *parse_synth_field(int argc, const char **argv,
len--;
field->name = kmemdup_nul(field_name, len, GFP_KERNEL);
- if (!field->name) {
- ret = -ENOMEM;
+ if (!field->name)
goto free;
- }
+
if (!is_good_name(field->name)) {
synth_err(SYNTH_ERR_BAD_NAME, errpos(field_name));
ret = -EINVAL;
@@ -630,29 +630,29 @@ static struct synth_field *parse_synth_field(int argc, const char **argv,
field_type++;
len = strlen(field_type) + 1;
- if (array) {
- int l = strlen(array);
+ if (array)
+ len += strlen(array);
- if (l && array[l - 1] == ';')
- l--;
- len += l;
- }
if (prefix)
len += strlen(prefix);
field->type = kzalloc(len, GFP_KERNEL);
- if (!field->type) {
- ret = -ENOMEM;
+ if (!field->type)
goto free;
- }
+
+ seq_buf_init(&s, field->type, len);
if (prefix)
- strcat(field->type, prefix);
- strcat(field->type, field_type);
+ seq_buf_puts(&s, prefix);
+ seq_buf_puts(&s, field_type);
if (array) {
- strcat(field->type, array);
- if (field->type[len - 1] == ';')
- field->type[len - 1] = '\0';
+ seq_buf_puts(&s, array);
+ if (s.buffer[s.len - 1] == ';')
+ s.len--;
}
+ if (WARN_ON_ONCE(!seq_buf_buffer_left(&s)))
+ goto free;
+
+ s.buffer[s.len] = '\0';
size = synth_field_size(field->type);
if (size < 0) {
@@ -663,14 +663,19 @@ static struct synth_field *parse_synth_field(int argc, const char **argv,
if (synth_field_is_string(field->type)) {
char *type;
- type = kzalloc(sizeof("__data_loc ") + strlen(field->type) + 1, GFP_KERNEL);
- if (!type) {
- ret = -ENOMEM;
+ len = sizeof("__data_loc ") + strlen(field->type) + 1;
+ type = kzalloc(len, GFP_KERNEL);
+ if (!type)
goto free;
- }
- strcat(type, "__data_loc ");
- strcat(type, field->type);
+ seq_buf_init(&s, type, len);
+ seq_buf_puts(&s, "__data_loc ");
+ seq_buf_puts(&s, field->type);
+
+ if (WARN_ON_ONCE(!seq_buf_buffer_left(&s)))
+ goto free;
+ s.buffer[s.len] = '\0';
+
kfree(field->type);
field->type = type;
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index b5e3496cf803..4738ad48a667 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -492,8 +492,13 @@ trace_selftest_function_recursion(void)
unregister_ftrace_function(&test_rec_probe);
ret = -1;
- if (trace_selftest_recursion_cnt != 1) {
- pr_cont("*callback not called once (%d)* ",
+ /*
+ * Recursion allows for transitions between context,
+ * and may call the callback twice.
+ */
+ if (trace_selftest_recursion_cnt != 1 &&
+ trace_selftest_recursion_cnt != 2) {
+ pr_cont("*callback not called once (or twice) (%d)* ",
trace_selftest_recursion_cnt);
goto out;
}
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 26efd22f0633..3f659f855074 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -50,7 +50,7 @@ static bool ok_to_free_tracepoints;
*/
struct tp_probes {
struct rcu_head rcu;
- struct tracepoint_func probes[0];
+ struct tracepoint_func probes[];
};
static inline void *allocate_probes(int count)