summaryrefslogtreecommitdiff
path: root/kernel/trace
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-05-06 13:11:48 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2019-05-06 13:11:48 -0700
commit2c6a392cddacde153865b15e8295ad0a35ed3c02 (patch)
tree558c34595f8987c87d26fc0fa0dc644fca9ef2cd /kernel/trace
parent0a499fc5c37e6db096969a83534fd98a2bf2b36c (diff)
parent3599fe12a125fa7118da2bcc5033d7741fb5f3a1 (diff)
Merge branch 'core-stacktrace-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull stack trace updates from Ingo Molnar: "So Thomas looked at the stacktrace code recently and noticed a few weirdnesses, and we all know how such stories of crummy kernel code meeting German engineering perfection end: a 45-patch series to clean it all up! :-) Here's the changes in Thomas's words: 'Struct stack_trace is a sinkhole for input and output parameters which is largely pointless for most usage sites. In fact if embedded into other data structures it creates indirections and extra storage overhead for no benefit. Looking at all usage sites makes it clear that they just require an interface which is based on a storage array. That array is either on stack, global or embedded into some other data structure. Some of the stack depot usage sites are outright wrong, but fortunately the wrongness just causes more stack being used for nothing and does not have functional impact. Another oddity is the inconsistent termination of the stack trace with ULONG_MAX. It's pointless as the number of entries is what determines the length of the stored trace. In fact quite some call sites remove the ULONG_MAX marker afterwards with or without nasty comments about it. Not all architectures do that and those which do, do it inconsistenly either conditional on nr_entries == 0 or unconditionally. The following series cleans that up by: 1) Removing the ULONG_MAX termination in the architecture code 2) Removing the ULONG_MAX fixups at the call sites 3) Providing plain storage array based interfaces for stacktrace and stackdepot. 4) Cleaning up the mess at the callsites including some related cleanups. 5) Removing the struct stack_trace based interfaces This is not changing the struct stack_trace interfaces at the architecture level, but it removes the exposure to the generic code'" * 'core-stacktrace-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (45 commits) x86/stacktrace: Use common infrastructure stacktrace: Provide common infrastructure lib/stackdepot: Remove obsolete functions stacktrace: Remove obsolete functions livepatch: Simplify stack trace retrieval tracing: Remove the last struct stack_trace usage tracing: Simplify stack trace retrieval tracing: Make ftrace_trace_userstack() static and conditional tracing: Use percpu stack trace buffer more intelligently tracing: Simplify stacktrace retrieval in histograms lockdep: Simplify stack trace handling lockdep: Remove save argument from check_prev_add() lockdep: Remove unused trace argument from print_circular_bug() drm: Simplify stacktrace handling dm persistent data: Simplify stack trace handling dm bufio: Simplify stack trace retrieval btrfs: ref-verify: Simplify stack trace retrieval dma/debug: Simplify stracktrace retrieval fault-inject: Simplify stacktrace retrieval mm/page_owner: Simplify stack trace handling ...
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/trace.c105
-rw-r--r--kernel/trace/trace.h8
-rw-r--r--kernel/trace/trace_events_hist.c14
-rw-r--r--kernel/trace/trace_stack.c85
4 files changed, 78 insertions, 134 deletions
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ca1ee656d6d8..ec439999f387 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -159,6 +159,8 @@ static union trace_eval_map_item *trace_eval_maps;
#endif /* CONFIG_TRACE_EVAL_MAP_FILE */
static int tracing_set_tracer(struct trace_array *tr, const char *buf);
+static void ftrace_trace_userstack(struct ring_buffer *buffer,
+ unsigned long flags, int pc);
#define MAX_TRACER_SIZE 100
static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
@@ -2752,12 +2754,21 @@ trace_function(struct trace_array *tr,
#ifdef CONFIG_STACKTRACE
-#define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
+/* Allow 4 levels of nesting: normal, softirq, irq, NMI */
+#define FTRACE_KSTACK_NESTING 4
+
+#define FTRACE_KSTACK_ENTRIES (PAGE_SIZE / FTRACE_KSTACK_NESTING)
+
struct ftrace_stack {
- unsigned long calls[FTRACE_STACK_MAX_ENTRIES];
+ unsigned long calls[FTRACE_KSTACK_ENTRIES];
+};
+
+
+struct ftrace_stacks {
+ struct ftrace_stack stacks[FTRACE_KSTACK_NESTING];
};
-static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
+static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
static DEFINE_PER_CPU(int, ftrace_stack_reserve);
static void __ftrace_trace_stack(struct ring_buffer *buffer,
@@ -2766,13 +2777,10 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
{
struct trace_event_call *call = &event_kernel_stack;
struct ring_buffer_event *event;
+ unsigned int size, nr_entries;
+ struct ftrace_stack *fstack;
struct stack_entry *entry;
- struct stack_trace trace;
- int use_stack;
- int size = FTRACE_STACK_ENTRIES;
-
- trace.nr_entries = 0;
- trace.skip = skip;
+ int stackidx;
/*
* Add one, for this function and the call to save_stack_trace()
@@ -2780,7 +2788,7 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
*/
#ifndef CONFIG_UNWINDER_ORC
if (!regs)
- trace.skip++;
+ skip++;
#endif
/*
@@ -2791,53 +2799,40 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
*/
preempt_disable_notrace();
- use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
+ stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
+
+ /* This should never happen. If it does, yell once and skip */
+ if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
+ goto out;
+
/*
- * We don't need any atomic variables, just a barrier.
- * If an interrupt comes in, we don't care, because it would
- * have exited and put the counter back to what we want.
- * We just need a barrier to keep gcc from moving things
- * around.
+ * The above __this_cpu_inc_return() is 'atomic' cpu local. An
+ * interrupt will either see the value pre increment or post
+ * increment. If the interrupt happens pre increment it will have
+ * restored the counter when it returns. We just need a barrier to
+ * keep gcc from moving things around.
*/
barrier();
- if (use_stack == 1) {
- trace.entries = this_cpu_ptr(ftrace_stack.calls);
- trace.max_entries = FTRACE_STACK_MAX_ENTRIES;
- if (regs)
- save_stack_trace_regs(regs, &trace);
- else
- save_stack_trace(&trace);
-
- if (trace.nr_entries > size)
- size = trace.nr_entries;
- } else
- /* From now on, use_stack is a boolean */
- use_stack = 0;
+ fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
+ size = ARRAY_SIZE(fstack->calls);
- size *= sizeof(unsigned long);
+ if (regs) {
+ nr_entries = stack_trace_save_regs(regs, fstack->calls,
+ size, skip);
+ } else {
+ nr_entries = stack_trace_save(fstack->calls, size, skip);
+ }
+ size = nr_entries * sizeof(unsigned long);
event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
sizeof(*entry) + size, flags, pc);
if (!event)
goto out;
entry = ring_buffer_event_data(event);
- memset(&entry->caller, 0, size);
-
- if (use_stack)
- memcpy(&entry->caller, trace.entries,
- trace.nr_entries * sizeof(unsigned long));
- else {
- trace.max_entries = FTRACE_STACK_ENTRIES;
- trace.entries = entry->caller;
- if (regs)
- save_stack_trace_regs(regs, &trace);
- else
- save_stack_trace(&trace);
- }
-
- entry->size = trace.nr_entries;
+ memcpy(&entry->caller, fstack->calls, size);
+ entry->size = nr_entries;
if (!call_filter_check_discard(call, entry, buffer, event))
__buffer_unlock_commit(buffer, event);
@@ -2907,15 +2902,15 @@ void trace_dump_stack(int skip)
}
EXPORT_SYMBOL_GPL(trace_dump_stack);
+#ifdef CONFIG_USER_STACKTRACE_SUPPORT
static DEFINE_PER_CPU(int, user_stack_count);
-void
+static void
ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
{
struct trace_event_call *call = &event_user_stack;
struct ring_buffer_event *event;
struct userstack_entry *entry;
- struct stack_trace trace;
if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
return;
@@ -2946,12 +2941,7 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
entry->tgid = current->tgid;
memset(&entry->caller, 0, sizeof(entry->caller));
- trace.nr_entries = 0;
- trace.max_entries = FTRACE_STACK_ENTRIES;
- trace.skip = 0;
- trace.entries = entry->caller;
-
- save_stack_trace_user(&trace);
+ stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
if (!call_filter_check_discard(call, entry, buffer, event))
__buffer_unlock_commit(buffer, event);
@@ -2960,13 +2950,12 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
out:
preempt_enable();
}
-
-#ifdef UNUSED
-static void __trace_userstack(struct trace_array *tr, unsigned long flags)
+#else /* CONFIG_USER_STACKTRACE_SUPPORT */
+static void ftrace_trace_userstack(struct ring_buffer *buffer,
+ unsigned long flags, int pc)
{
- ftrace_trace_userstack(tr, flags, preempt_count());
}
-#endif /* UNUSED */
+#endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
#endif /* CONFIG_STACKTRACE */
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index d80cee49e0eb..639047b259d7 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -782,17 +782,9 @@ void update_max_tr_single(struct trace_array *tr,
#endif /* CONFIG_TRACER_MAX_TRACE */
#ifdef CONFIG_STACKTRACE
-void ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags,
- int pc);
-
void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
int pc);
#else
-static inline void ftrace_trace_userstack(struct ring_buffer *buffer,
- unsigned long flags, int pc)
-{
-}
-
static inline void __trace_stack(struct trace_array *tr, unsigned long flags,
int skip, int pc)
{
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 795aa2038377..a1d20421f4b0 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -5186,7 +5186,6 @@ static void event_hist_trigger(struct event_trigger_data *data, void *rec,
u64 var_ref_vals[TRACING_MAP_VARS_MAX];
char compound_key[HIST_KEY_SIZE_MAX];
struct tracing_map_elt *elt = NULL;
- struct stack_trace stacktrace;
struct hist_field *key_field;
u64 field_contents;
void *key = NULL;
@@ -5198,14 +5197,9 @@ static void event_hist_trigger(struct event_trigger_data *data, void *rec,
key_field = hist_data->fields[i];
if (key_field->flags & HIST_FIELD_FL_STACKTRACE) {
- stacktrace.max_entries = HIST_STACKTRACE_DEPTH;
- stacktrace.entries = entries;
- stacktrace.nr_entries = 0;
- stacktrace.skip = HIST_STACKTRACE_SKIP;
-
- memset(stacktrace.entries, 0, HIST_STACKTRACE_SIZE);
- save_stack_trace(&stacktrace);
-
+ memset(entries, 0, HIST_STACKTRACE_SIZE);
+ stack_trace_save(entries, HIST_STACKTRACE_DEPTH,
+ HIST_STACKTRACE_SKIP);
key = entries;
} else {
field_contents = key_field->fn(key_field, elt, rbe, rec);
@@ -5246,7 +5240,7 @@ static void hist_trigger_stacktrace_print(struct seq_file *m,
unsigned int i;
for (i = 0; i < max_entries; i++) {
- if (stacktrace_entries[i] == ULONG_MAX)
+ if (!stacktrace_entries[i])
return;
seq_printf(m, "%*c", 1 + spaces, ' ');
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index eec648a0d673..5d16f73898db 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -18,44 +18,32 @@
#include "trace.h"
-static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES+1] =
- { [0 ... (STACK_TRACE_ENTRIES)] = ULONG_MAX };
-unsigned stack_trace_index[STACK_TRACE_ENTRIES];
+#define STACK_TRACE_ENTRIES 500
-/*
- * Reserve one entry for the passed in ip. This will allow
- * us to remove most or all of the stack size overhead
- * added by the stack tracer itself.
- */
-struct stack_trace stack_trace_max = {
- .max_entries = STACK_TRACE_ENTRIES - 1,
- .entries = &stack_dump_trace[0],
-};
+static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES];
+static unsigned stack_trace_index[STACK_TRACE_ENTRIES];
-unsigned long stack_trace_max_size;
-arch_spinlock_t stack_trace_max_lock =
+static unsigned int stack_trace_nr_entries;
+static unsigned long stack_trace_max_size;
+static arch_spinlock_t stack_trace_max_lock =
(arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
DEFINE_PER_CPU(int, disable_stack_tracer);
static DEFINE_MUTEX(stack_sysctl_mutex);
int stack_tracer_enabled;
-static int last_stack_tracer_enabled;
-void stack_trace_print(void)
+static void print_max_stack(void)
{
long i;
int size;
pr_emerg(" Depth Size Location (%d entries)\n"
" ----- ---- --------\n",
- stack_trace_max.nr_entries);
+ stack_trace_nr_entries);
- for (i = 0; i < stack_trace_max.nr_entries; i++) {
- if (stack_dump_trace[i] == ULONG_MAX)
- break;
- if (i+1 == stack_trace_max.nr_entries ||
- stack_dump_trace[i+1] == ULONG_MAX)
+ for (i = 0; i < stack_trace_nr_entries; i++) {
+ if (i + 1 == stack_trace_nr_entries)
size = stack_trace_index[i];
else
size = stack_trace_index[i] - stack_trace_index[i+1];
@@ -65,16 +53,7 @@ void stack_trace_print(void)
}
}
-/*
- * When arch-specific code overrides this function, the following
- * data should be filled up, assuming stack_trace_max_lock is held to
- * prevent concurrent updates.
- * stack_trace_index[]
- * stack_trace_max
- * stack_trace_max_size
- */
-void __weak
-check_stack(unsigned long ip, unsigned long *stack)
+static void check_stack(unsigned long ip, unsigned long *stack)
{
unsigned long this_size, flags; unsigned long *p, *top, *start;
static int tracer_frame;
@@ -110,13 +89,12 @@ check_stack(unsigned long ip, unsigned long *stack)
stack_trace_max_size = this_size;
- stack_trace_max.nr_entries = 0;
- stack_trace_max.skip = 0;
-
- save_stack_trace(&stack_trace_max);
+ stack_trace_nr_entries = stack_trace_save(stack_dump_trace,
+ ARRAY_SIZE(stack_dump_trace) - 1,
+ 0);
/* Skip over the overhead of the stack tracer itself */
- for (i = 0; i < stack_trace_max.nr_entries; i++) {
+ for (i = 0; i < stack_trace_nr_entries; i++) {
if (stack_dump_trace[i] == ip)
break;
}
@@ -125,7 +103,7 @@ check_stack(unsigned long ip, unsigned long *stack)
* Some archs may not have the passed in ip in the dump.
* If that happens, we need to show everything.
*/
- if (i == stack_trace_max.nr_entries)
+ if (i == stack_trace_nr_entries)
i = 0;
/*
@@ -143,15 +121,13 @@ check_stack(unsigned long ip, unsigned long *stack)
* loop will only happen once. This code only takes place
* on a new max, so it is far from a fast path.
*/
- while (i < stack_trace_max.nr_entries) {
+ while (i < stack_trace_nr_entries) {
int found = 0;
stack_trace_index[x] = this_size;
p = start;
- for (; p < top && i < stack_trace_max.nr_entries; p++) {
- if (stack_dump_trace[i] == ULONG_MAX)
- break;
+ for (; p < top && i < stack_trace_nr_entries; p++) {
/*
* The READ_ONCE_NOCHECK is used to let KASAN know that
* this is not a stack-out-of-bounds error.
@@ -182,12 +158,10 @@ check_stack(unsigned long ip, unsigned long *stack)
i++;
}
- stack_trace_max.nr_entries = x;
- for (; x < i; x++)
- stack_dump_trace[x] = ULONG_MAX;
+ stack_trace_nr_entries = x;
if (task_stack_end_corrupted(current)) {
- stack_trace_print();
+ print_max_stack();
BUG();
}
@@ -286,7 +260,7 @@ __next(struct seq_file *m, loff_t *pos)
{
long n = *pos - 1;
- if (n >= stack_trace_max.nr_entries || stack_dump_trace[n] == ULONG_MAX)
+ if (n >= stack_trace_nr_entries)
return NULL;
m->private = (void *)n;
@@ -350,7 +324,7 @@ static int t_show(struct seq_file *m, void *v)
seq_printf(m, " Depth Size Location"
" (%d entries)\n"
" ----- ---- --------\n",
- stack_trace_max.nr_entries);
+ stack_trace_nr_entries);
if (!stack_tracer_enabled && !stack_trace_max_size)
print_disabled(m);
@@ -360,12 +334,10 @@ static int t_show(struct seq_file *m, void *v)
i = *(long *)v;
- if (i >= stack_trace_max.nr_entries ||
- stack_dump_trace[i] == ULONG_MAX)
+ if (i >= stack_trace_nr_entries)
return 0;
- if (i+1 == stack_trace_max.nr_entries ||
- stack_dump_trace[i+1] == ULONG_MAX)
+ if (i + 1 == stack_trace_nr_entries)
size = stack_trace_index[i];
else
size = stack_trace_index[i] - stack_trace_index[i+1];
@@ -422,23 +394,21 @@ stack_trace_sysctl(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
+ int was_enabled;
int ret;
mutex_lock(&stack_sysctl_mutex);
+ was_enabled = !!stack_tracer_enabled;
ret = proc_dointvec(table, write, buffer, lenp, ppos);
- if (ret || !write ||
- (last_stack_tracer_enabled == !!stack_tracer_enabled))
+ if (ret || !write || (was_enabled == !!stack_tracer_enabled))
goto out;
- last_stack_tracer_enabled = !!stack_tracer_enabled;
-
if (stack_tracer_enabled)
register_ftrace_function(&trace_ops);
else
unregister_ftrace_function(&trace_ops);
-
out:
mutex_unlock(&stack_sysctl_mutex);
return ret;
@@ -454,7 +424,6 @@ static __init int enable_stacktrace(char *str)
strncpy(stack_trace_filter_buf, str + len, COMMAND_LINE_SIZE);
stack_tracer_enabled = 1;
- last_stack_tracer_enabled = 1;
return 1;
}
__setup("stacktrace", enable_stacktrace);