From 8114865ff82e200b383e46821c25cb0625b842b5 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Sun, 18 Nov 2018 17:10:15 -0500 Subject: function_graph: Create function_graph_enter() to consolidate architecture code Currently all the architectures do basically the same thing in preparing the function graph tracer on entry to a function. This code can be pulled into a generic location and then this will allow the function graph tracer to be fixed, as well as extended. Create a new function graph helper function_graph_enter() that will call the hook function (ftrace_graph_entry) and the shadow stack operation (ftrace_push_return_trace), and remove the need of the architecture code to manage the shadow stack. This is needed to prepare for a fix of a design bug on how the curr_ret_stack is used. Cc: stable@kernel.org Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback") Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_functions_graph.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'kernel') diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 169b3c44ee97..28f2602435d0 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -182,6 +182,22 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth, return 0; } +int function_graph_enter(unsigned long ret, unsigned long func, + unsigned long frame_pointer, unsigned long *retp) +{ + struct ftrace_graph_ent trace; + + trace.func = func; + trace.depth = current->curr_ret_stack + 1; + + /* Only trace if the calling function expects to */ + if (!ftrace_graph_entry(&trace)) + return -EBUSY; + + return ftrace_push_return_trace(ret, func, &trace.depth, + frame_pointer, retp); +} + /* Retrieve a function return address to the trace stack on thread info.*/ static void ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret, -- cgit From d125f3f866df88da5a85df00291f88f0baa89f7c Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 19 Nov 2018 07:40:39 -0500 Subject: function_graph: Make ftrace_push_return_trace() static As all architectures now call function_graph_enter() to do the entry work, no architecture should ever call ftrace_push_return_trace(). Make it static. This is needed to prepare for a fix of a design bug on how the curr_ret_stack is used. Cc: stable@kernel.org Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback") Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_functions_graph.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 28f2602435d0..88ca787a1cdc 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -118,7 +118,7 @@ print_graph_duration(struct trace_array *tr, unsigned long long duration, struct trace_seq *s, u32 flags); /* Add a function return address to the trace stack on thread info.*/ -int +static int ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth, unsigned long frame_pointer, unsigned long *retp) { -- cgit From 39eb456dacb543de90d3bc6a8e0ac5cf51ac475e Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 19 Nov 2018 08:07:12 -0500 Subject: function_graph: Use new curr_ret_depth to manage depth instead of curr_ret_stack Currently, the depth of the ret_stack is determined by curr_ret_stack index. The issue is that there's a race between setting of the curr_ret_stack and calling of the callback attached to the return of the function. Commit 03274a3ffb44 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback") moved the calling of the callback to after the setting of the curr_ret_stack, even stating that it was safe to do so, when in fact, it was the reason there was a barrier() there (yes, I should have commented that barrier()). Not only does the curr_ret_stack keep track of the current call graph depth, it also keeps the ret_stack content from being overwritten by new data. The function profiler, uses the "subtime" variable of ret_stack structure and by moving the curr_ret_stack, it allows for interrupts to use the same structure it was using, corrupting the data, and breaking the profiler. To fix this, there needs to be two variables to handle the call stack depth and the pointer to where the ret_stack is being used, as they need to change at two different locations. Cc: stable@kernel.org Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback") Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 3 +++ kernel/trace/trace_functions_graph.c | 21 +++++++++++++-------- 2 files changed, 16 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index f536f601bd46..48513954713c 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -6814,6 +6814,7 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) atomic_set(&t->tracing_graph_pause, 0); atomic_set(&t->trace_overrun, 0); t->curr_ret_stack = -1; + t->curr_ret_depth = -1; /* Make sure the tasks see the -1 first: */ smp_wmb(); t->ret_stack = ret_stack_list[start++]; @@ -7038,6 +7039,7 @@ graph_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack) void ftrace_graph_init_idle_task(struct task_struct *t, int cpu) { t->curr_ret_stack = -1; + t->curr_ret_depth = -1; /* * The idle task has no parent, it either has its own * stack or no stack at all. @@ -7068,6 +7070,7 @@ void ftrace_graph_init_task(struct task_struct *t) /* Make sure we do not use the parent ret_stack */ t->ret_stack = NULL; t->curr_ret_stack = -1; + t->curr_ret_depth = -1; if (ftrace_graph_active) { struct ftrace_ret_stack *ret_stack; diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 88ca787a1cdc..02d4081a7f5a 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -119,7 +119,7 @@ print_graph_duration(struct trace_array *tr, unsigned long long duration, /* Add a function return address to the trace stack on thread info.*/ static int -ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth, +ftrace_push_return_trace(unsigned long ret, unsigned long func, unsigned long frame_pointer, unsigned long *retp) { unsigned long long calltime; @@ -177,8 +177,6 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth, #ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR current->ret_stack[index].retp = retp; #endif - *depth = current->curr_ret_stack; - return 0; } @@ -188,14 +186,20 @@ int function_graph_enter(unsigned long ret, unsigned long func, struct ftrace_graph_ent trace; trace.func = func; - trace.depth = current->curr_ret_stack + 1; + trace.depth = ++current->curr_ret_depth; /* Only trace if the calling function expects to */ if (!ftrace_graph_entry(&trace)) - return -EBUSY; + goto out; - return ftrace_push_return_trace(ret, func, &trace.depth, - frame_pointer, retp); + if (ftrace_push_return_trace(ret, func, + frame_pointer, retp)) + goto out; + + return 0; + out: + current->curr_ret_depth--; + return -EBUSY; } /* Retrieve a function return address to the trace stack on thread info.*/ @@ -257,7 +261,7 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret, trace->func = current->ret_stack[index].func; trace->calltime = current->ret_stack[index].calltime; trace->overrun = atomic_read(¤t->trace_overrun); - trace->depth = index; + trace->depth = current->curr_ret_depth; } /* @@ -273,6 +277,7 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer) trace.rettime = trace_clock_local(); barrier(); current->curr_ret_stack--; + current->curr_ret_depth--; /* * The curr_ret_stack can be less than -1 only if it was * filtered out and it's about to return from the function. -- cgit From 552701dd0fa7c3d448142e87210590ba424694a0 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 19 Nov 2018 15:18:40 -0500 Subject: function_graph: Move return callback before update of curr_ret_stack In the past, curr_ret_stack had two functions. One was to denote the depth of the call graph, the other is to keep track of where on the ret_stack the data is used. Although they may be slightly related, there are two cases where they need to be used differently. The one case is that it keeps the ret_stack data from being corrupted by an interrupt coming in and overwriting the data still in use. The other is just to know where the depth of the stack currently is. The function profiler uses the ret_stack to save a "subtime" variable that is part of the data on the ret_stack. If curr_ret_stack is modified too early, then this variable can be corrupted. The "max_depth" option, when set to 1, will record the first functions going into the kernel. To see all top functions (when dealing with timings), the depth variable needs to be lowered before calling the return hook. But by lowering the curr_ret_stack, it makes the data on the ret_stack still being used by the return hook susceptible to being overwritten. Now that there's two variables to handle both cases (curr_ret_depth), we can move them to the locations where they can handle both cases. Cc: stable@kernel.org Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback") Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_functions_graph.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 02d4081a7f5a..4f0d72ae6362 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -261,7 +261,13 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret, trace->func = current->ret_stack[index].func; trace->calltime = current->ret_stack[index].calltime; trace->overrun = atomic_read(¤t->trace_overrun); - trace->depth = current->curr_ret_depth; + trace->depth = current->curr_ret_depth--; + /* + * We still want to trace interrupts coming in if + * max_depth is set to 1. Make sure the decrement is + * seen before ftrace_graph_return. + */ + barrier(); } /* @@ -275,9 +281,14 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer) ftrace_pop_return_trace(&trace, &ret, frame_pointer); trace.rettime = trace_clock_local(); + ftrace_graph_return(&trace); + /* + * The ftrace_graph_return() may still access the current + * ret_stack structure, we need to make sure the update of + * curr_ret_stack is after that. + */ barrier(); current->curr_ret_stack--; - current->curr_ret_depth--; /* * The curr_ret_stack can be less than -1 only if it was * filtered out and it's about to return from the function. @@ -288,13 +299,6 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer) return ret; } - /* - * The trace should run after decrementing the ret counter - * in case an interrupt were to come in. We don't want to - * lose the interrupt if max_depth is set. - */ - ftrace_graph_return(&trace); - if (unlikely(!ret)) { ftrace_graph_stop(); WARN_ON(1); -- cgit From 7c6ea35ef50810aa12ab26f21cb858d980881576 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 20 Nov 2018 12:40:25 -0500 Subject: function_graph: Reverse the order of pushing the ret_stack and the callback The function graph profiler uses the ret_stack to store the "subtime" and reuse it by nested functions and also on the return. But the current logic has the profiler callback called before the ret_stack is updated, and it is just modifying the ret_stack that will later be allocated (it's just lucky that the "subtime" is not touched when it is allocated). This could also cause a crash if we are at the end of the ret_stack when this happens. By reversing the order of the allocating the ret_stack and then calling the callbacks attached to a function being traced, the ret_stack entry is no longer used before it is allocated. Cc: stable@kernel.org Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback") Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_functions_graph.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 4f0d72ae6362..2561460d7baf 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -188,15 +188,17 @@ int function_graph_enter(unsigned long ret, unsigned long func, trace.func = func; trace.depth = ++current->curr_ret_depth; - /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) - goto out; - if (ftrace_push_return_trace(ret, func, frame_pointer, retp)) goto out; + /* Only trace if the calling function expects to */ + if (!ftrace_graph_entry(&trace)) + goto out_ret; + return 0; + out_ret: + current->curr_ret_stack--; out: current->curr_ret_depth--; return -EBUSY; -- cgit From b1b35f2e218a5b57d03bbc3b0667d5064570dc60 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 20 Nov 2018 12:51:07 -0500 Subject: function_graph: Have profiler use curr_ret_stack and not depth The profiler uses trace->depth to find its entry on the ret_stack, but the depth may not match the actual location of where its entry is (if an interrupt were to preempt the processing of the profiler for another function, the depth and the curr_ret_stack will be different). Have it use the curr_ret_stack as the index to find its ret_stack entry instead of using the depth variable, as that is no longer guaranteed to be the same. Cc: stable@kernel.org Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback") Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 48513954713c..77734451cb05 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -817,7 +817,7 @@ function_profile_call(unsigned long ip, unsigned long parent_ip, #ifdef CONFIG_FUNCTION_GRAPH_TRACER static int profile_graph_entry(struct ftrace_graph_ent *trace) { - int index = trace->depth; + int index = current->curr_ret_stack; function_profile_call(trace->func, 0, NULL, NULL); @@ -852,7 +852,7 @@ static void profile_graph_return(struct ftrace_graph_ret *trace) if (!fgraph_graph_time) { int index; - index = trace->depth; + index = current->curr_ret_stack; /* Append this call time to the parent time to subtract */ if (index) -- cgit