From 4b512860bdbdddcf41467ebd394f27cb8dfb528c Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 23 May 2023 23:09:13 -0400 Subject: tracing: Rename stacktrace field to common_stacktrace The histogram and synthetic events can use a pseudo event called "stacktrace" that will create a stacktrace at the time of the event and use it just like it was a normal field. We have other pseudo events such as "common_cpu" and "common_timestamp". To stay consistent with that, convert "stacktrace" to "common_stacktrace". As this was used in older kernels, to keep backward compatibility, this will act just like "common_cpu" did with "cpu". That is, "cpu" will be the same as "common_cpu" unless the event has a "cpu" field. In which case, the event's field is used. The same is true with "stacktrace". Also update the documentation to reflect this change. Link: https://lore.kernel.org/linux-trace-kernel/20230523230913.6860e28d@rorschach.local.home Cc: Masami Hiramatsu Cc: Tom Zanussi Cc: Mark Rutland Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ebc59781456a..81801dc31784 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5752,7 +5752,7 @@ static const char readme_msg[] = "\t table using the key(s) and value(s) named, and the value of a\n" "\t sum called 'hitcount' is incremented. Keys and values\n" "\t correspond to fields in the event's format description. Keys\n" - "\t can be any field, or the special string 'stacktrace'.\n" + "\t can be any field, or the special string 'common_stacktrace'.\n" "\t Compound keys consisting of up to two fields can be specified\n" "\t by the 'keys' keyword. Values must correspond to numeric\n" "\t fields. Sort keys consisting of up to two fields can be\n" -- cgit From 5bd4990f19b0fedbba5511b44c1cd1fdb7061f3c Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 22 May 2023 14:50:13 +0100 Subject: trace: Convert trace/seq to use copy_splice_read() For the splice from the trace seq buffer, just use copy_splice_read(). In the future, something better can probably be done by gifting pages from seq->buf into the pipe, but that would require changing seq->buf into a vmap over an array of pages. Signed-off-by: David Howells cc: Christoph Hellwig cc: Al Viro cc: Jens Axboe cc: Steven Rostedt cc: Masami Hiramatsu cc: linux-kernel@vger.kernel.org cc: linux-trace-kernel@vger.kernel.org cc: linux-fsdevel@vger.kernel.org cc: linux-block@vger.kernel.org cc: linux-mm@kvack.org Link: https://lore.kernel.org/r/20230522135018.2742245-27-dhowells@redhat.com Signed-off-by: Jens Axboe --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ebc59781456a..c210d02fac97 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5171,7 +5171,7 @@ static const struct file_operations tracing_fops = { .open = tracing_open, .read = seq_read, .read_iter = seq_read_iter, - .splice_read = generic_file_splice_read, + .splice_read = copy_splice_read, .write = tracing_write_stub, .llseek = tracing_lseek, .release = tracing_release, -- cgit From c7dce4c5d9f6b17feec5ec6056453d019ee4d13b Mon Sep 17 00:00:00 2001 From: Azeem Shaikh Date: Tue, 16 May 2023 14:39:56 +0000 Subject: tracing: Replace all non-returning strlcpy with strscpy strlcpy() reads the entire source buffer first. This read may exceed the destination size limit. This is both inefficient and can lead to linear read overflows if a source string is not NUL-terminated [1]. In an effort to remove strlcpy() completely [2], replace strlcpy() here with strscpy(). No return values were used, so direct replacement with strlcpy is safe. [1] https://www.kernel.org/doc/html/latest/process/deprecated.html#strlcpy [2] https://github.com/KSPP/linux/issues/89 Signed-off-by: Azeem Shaikh Acked-by: Masami Hiramatsu (Google) Reviewed-by: Kees Cook Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20230516143956.1367827-1-azeemshaikh38@gmail.com --- kernel/trace/trace.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ebc59781456a..28ccd0c9bdf0 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -196,7 +196,7 @@ static int boot_snapshot_index; static int __init set_cmdline_ftrace(char *str) { - strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE); + strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE); default_bootup_tracer = bootup_tracer_buf; /* We are using ftrace early, expand it */ ring_buffer_expanded = true; @@ -281,7 +281,7 @@ static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata; static int __init set_trace_boot_options(char *str) { - strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE); + strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE); return 1; } __setup("trace_options=", set_trace_boot_options); @@ -291,7 +291,7 @@ static char *trace_boot_clock __initdata; static int __init set_trace_boot_clock(char *str) { - strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE); + strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE); trace_boot_clock = trace_boot_clock_buf; return 1; } @@ -2521,7 +2521,7 @@ static void __trace_find_cmdline(int pid, char comm[]) if (map != NO_CMDLINE_MAP) { tpid = savedcmd->map_cmdline_to_pid[map]; if (tpid == pid) { - strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN); + strscpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN); return; } } -- cgit From e8352cf5778c53b80fdcee086278b2048ddb8f98 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Sun, 28 May 2023 01:17:38 -0400 Subject: tracing: Move setting of tracing_selftest_running out of register_tracer() The variables tracing_selftest_running and tracing_selftest_disabled are only used for when CONFIG_FTRACE_STARTUP_TEST is enabled. Make them only visible within the selftest code. The setting of those variables are in the register_tracer() call, and set in a location where they do not need to be. Create a wrapper around run_tracer_selftest() called do_run_tracer_selftest() which sets those variables, and have register_tracer() call that instead. Having those variables only set within the CONFIG_FTRACE_STARTUP_TEST scope gets rid of them (and also the ability to remove testing against them) when the startup tests are not enabled (most cases). Link: https://lkml.kernel.org/r/20230528051742.1325503-2-rostedt@goodmis.org Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 81801dc31784..87e5920b141f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2041,6 +2041,17 @@ static int run_tracer_selftest(struct tracer *type) return 0; } +static int do_run_tracer_selftest(struct tracer *type) +{ + int ret; + + tracing_selftest_running = true; + ret = run_tracer_selftest(type); + tracing_selftest_running = false; + + return ret; +} + static __init int init_trace_selftests(void) { struct trace_selftests *p, *n; @@ -2092,6 +2103,10 @@ static inline int run_tracer_selftest(struct tracer *type) { return 0; } +static inline int do_run_tracer_selftest(struct tracer *type) +{ + return 0; +} #endif /* CONFIG_FTRACE_STARTUP_TEST */ static void add_tracer_options(struct trace_array *tr, struct tracer *t); @@ -2127,8 +2142,6 @@ int __init register_tracer(struct tracer *type) mutex_lock(&trace_types_lock); - tracing_selftest_running = true; - for (t = trace_types; t; t = t->next) { if (strcmp(type->name, t->name) == 0) { /* already found */ @@ -2157,7 +2170,7 @@ int __init register_tracer(struct tracer *type) /* store the tracer for __set_tracer_option */ type->flags->trace = type; - ret = run_tracer_selftest(type); + ret = do_run_tracer_selftest(type); if (ret < 0) goto out; @@ -2166,7 +2179,6 @@ int __init register_tracer(struct tracer *type) add_tracer_options(&global_trace, type); out: - tracing_selftest_running = false; mutex_unlock(&trace_types_lock); if (ret || !default_bootup_tracer) -- cgit From 9da705d432a07927526005a0688d81fbbf30e349 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Sun, 28 May 2023 01:17:39 -0400 Subject: tracing: Have tracer selftests call cond_resched() before running As there are more and more internal selftests being added to the Linux kernel (KSAN, lockdep, etc) the selftests are taking longer to run when these are enabled. Add a cond_resched() to the calling of do_run_tracer_selftest() to force a schedule if NEED_RESCHED is set, otherwise the soft lockup watchdog may trigger on boot up. Link: https://lkml.kernel.org/r/20230528051742.1325503-3-rostedt@goodmis.org Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 87e5920b141f..70f2b511b9cd 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2045,6 +2045,13 @@ static int do_run_tracer_selftest(struct tracer *type) { int ret; + /* + * Tests can take a long time, especially if they are run one after the + * other, as does happen during bootup when all the tracers are + * registered. This could cause the soft lockup watchdog to trigger. + */ + cond_resched(); + tracing_selftest_running = true; ret = run_tracer_selftest(type); tracing_selftest_running = false; -- cgit From a3ae76d7ff781208100e6acc58eb09afb7b4b177 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Sun, 28 May 2023 01:17:40 -0400 Subject: tracing: Make tracing_selftest_running/delete nops when not used There's no reason to test the condition variables tracing_selftest_running or tracing_selftest_delete when tracing selftests are not enabled. Make them define 0s when not the selftests are not configured in. Link: https://lkml.kernel.org/r/20230528051742.1325503-4-rostedt@goodmis.org Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 70f2b511b9cd..004f5f99e943 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -60,6 +60,7 @@ */ bool ring_buffer_expanded; +#ifdef CONFIG_FTRACE_STARTUP_TEST /* * We need to change this state when a selftest is running. * A selftest will lurk into the ring-buffer to count the @@ -75,7 +76,6 @@ static bool __read_mostly tracing_selftest_running; */ bool __read_mostly tracing_selftest_disabled; -#ifdef CONFIG_FTRACE_STARTUP_TEST void __init disable_tracing_selftest(const char *reason) { if (!tracing_selftest_disabled) { @@ -83,6 +83,9 @@ void __init disable_tracing_selftest(const char *reason) pr_info("Ftrace startup test is disabled due to %s\n", reason); } } +#else +#define tracing_selftest_running 0 +#define tracing_selftest_disabled 0 #endif /* Pipe tracepoints to printk */ -- cgit From ac9d2cb1d5f8e22235c399338504dadc87d14e74 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Sun, 28 May 2023 01:17:41 -0400 Subject: tracing: Only make selftest conditionals affect the global_trace The tracing_selftest_running and tracing_selftest_disabled variables were to keep trace_printk() and other writes from affecting the tracing selftests, as the tracing selftests would examine the ring buffer to see if it contained what it expected or not. trace_printk() and friends could add to the ring buffer and cause the selftests to fail (and then disable the tracer that was being tested). To keep that from happening, these variables were added and would keep trace_printk() and friends from writing to the ring buffer while the tests were going on. But this was only the top level ring buffer (owned by the global_trace instance). There is no reason to prevent writing into ring buffers of other instances via the trace_array_printk() and friends. For the functions that could be used by other instances, check if the global_trace is the tracer instance that is being written to before deciding to not allow the write. Link: https://lkml.kernel.org/r/20230528051742.1325503-5-rostedt@goodmis.org Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 004f5f99e943..64a4dde073ef 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1054,7 +1054,10 @@ int __trace_array_puts(struct trace_array *tr, unsigned long ip, if (!(tr->trace_flags & TRACE_ITER_PRINTK)) return 0; - if (unlikely(tracing_selftest_running || tracing_disabled)) + if (unlikely(tracing_selftest_running && tr == &global_trace)) + return 0; + + if (unlikely(tracing_disabled)) return 0; alloc = sizeof(*entry) + size + 2; /* possible \n added */ @@ -3512,7 +3515,7 @@ __trace_array_vprintk(struct trace_buffer *buffer, unsigned int trace_ctx; char *tbuffer; - if (tracing_disabled || tracing_selftest_running) + if (tracing_disabled) return 0; /* Don't pollute graph traces with trace_vprintk internals */ @@ -3560,6 +3563,9 @@ __printf(3, 0) int trace_array_vprintk(struct trace_array *tr, unsigned long ip, const char *fmt, va_list args) { + if (tracing_selftest_running && tr == &global_trace) + return 0; + return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args); } -- cgit From 334e5519c3757019cc591d4539d5aca199bdb114 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Tue, 6 Jun 2023 21:39:55 +0900 Subject: tracing/probes: Add fprobe events for tracing function entry and exit. Add fprobe events for tracing function entry and exit instead of kprobe events. With this change, we can continue to trace function entry/exit even if the CONFIG_KPROBES_ON_FTRACE is not available. Since CONFIG_KPROBES_ON_FTRACE requires the CONFIG_DYNAMIC_FTRACE_WITH_REGS, it is not available if the architecture only supports CONFIG_DYNAMIC_FTRACE_WITH_ARGS. And that means kprobe events can not probe function entry/exit effectively on such architecture. But this can be solved if the dynamic events supports fprobe events. The fprobe event is a new dynamic events which is only for the function (symbol) entry and exit. This event accepts non register fetch arguments so that user can trace the function arguments and return values. The fprobe events syntax is here; f[:[GRP/][EVENT]] FUNCTION [FETCHARGS] f[MAXACTIVE][:[GRP/][EVENT]] FUNCTION%return [FETCHARGS] E.g. # echo 'f vfs_read $arg1' >> dynamic_events # echo 'f vfs_read%return $retval' >> dynamic_events # cat dynamic_events f:fprobes/vfs_read__entry vfs_read arg1=$arg1 f:fprobes/vfs_read__exit vfs_read%return arg1=$retval # echo 1 > events/fprobes/enable # head -n 20 trace | tail # TASK-PID CPU# ||||| TIMESTAMP FUNCTION # | | | ||||| | | sh-142 [005] ...1. 448.386420: vfs_read__entry: (vfs_read+0x4/0x340) arg1=0xffff888007f7c540 sh-142 [005] ..... 448.386436: vfs_read__exit: (ksys_read+0x75/0x100 <- vfs_read) arg1=0x1 sh-142 [005] ...1. 448.386451: vfs_read__entry: (vfs_read+0x4/0x340) arg1=0xffff888007f7c540 sh-142 [005] ..... 448.386458: vfs_read__exit: (ksys_read+0x75/0x100 <- vfs_read) arg1=0x1 sh-142 [005] ...1. 448.386469: vfs_read__entry: (vfs_read+0x4/0x340) arg1=0xffff888007f7c540 sh-142 [005] ..... 448.386476: vfs_read__exit: (ksys_read+0x75/0x100 <- vfs_read) arg1=0x1 sh-142 [005] ...1. 448.602073: vfs_read__entry: (vfs_read+0x4/0x340) arg1=0xffff888007f7c540 sh-142 [005] ..... 448.602089: vfs_read__exit: (ksys_read+0x75/0x100 <- vfs_read) arg1=0x1 Link: https://lore.kernel.org/all/168507469754.913472.6112857614708350210.stgit@mhiramat.roam.corp.google.com/ Reported-by: kernel test robot Link: https://lore.kernel.org/all/202302011530.7vm4O8Ro-lkp@intel.com/ Signed-off-by: Masami Hiramatsu (Google) --- kernel/trace/trace.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 64a4dde073ef..755b0bf2e1ac 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5672,10 +5672,16 @@ static const char readme_msg[] = " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n" "\t\t\t Write into this file to define/undefine new trace events.\n" #endif -#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) +#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \ + defined(CONFIG_FPROBE_EVENTS) "\t accepts: event-definitions (one definition per line)\n" +#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) "\t Format: p[:[/][]] []\n" "\t r[maxactive][:[/][]] []\n" +#endif +#ifdef CONFIG_FPROBE_EVENTS + "\t f[:[/][]] [%return] []\n" +#endif #ifdef CONFIG_HIST_TRIGGERS "\t s:[synthetic/] []\n" #endif -- cgit From e2d0d7b2f42dcaf924e9c891c91c9aa22cbbebce Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Tue, 6 Jun 2023 21:39:55 +0900 Subject: tracing/probes: Add tracepoint support on fprobe_events Allow fprobe_events to trace raw tracepoints so that user can trace tracepoints which don't have traceevent wrappers. This new event is always available if the fprobe_events is enabled (thus no kconfig), because the fprobe_events depends on the trace-event and traceporint. e.g. # echo 't sched_overutilized_tp' >> dynamic_events # echo 't 9p_client_req' >> dynamic_events # cat dynamic_events t:tracepoints/sched_overutilized_tp sched_overutilized_tp t:tracepoints/_9p_client_req 9p_client_req The event name is based on the tracepoint name, but if it is started with digit character, an underscore '_' will be added. NOTE: to avoid further confusion, this renames TPARG_FL_TPOINT to TPARG_FL_TEVENT because this flag is used for eprobe (trace-event probe). And reuse TPARG_FL_TPOINT for this raw tracepoint probe. Link: https://lore.kernel.org/all/168507471874.913472.17214624519622959593.stgit@mhiramat.roam.corp.google.com/ Reported-by: kernel test robot Link: https://lore.kernel.org/oe-kbuild-all/202305020453.afTJ3VVp-lkp@intel.com/ Signed-off-by: Masami Hiramatsu (Google) --- kernel/trace/trace.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 755b0bf2e1ac..fa4e1a18da70 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5681,6 +5681,7 @@ static const char readme_msg[] = #endif #ifdef CONFIG_FPROBE_EVENTS "\t f[:[/][]] [%return] []\n" + "\t t[:[/][]] []\n" #endif #ifdef CONFIG_HIST_TRIGGERS "\t s:[synthetic/] []\n" -- cgit From b576e09701c7d045bbe5cd85d53e2f34426aa214 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Tue, 6 Jun 2023 21:39:56 +0900 Subject: tracing/probes: Support function parameters if BTF is available Support function or tracepoint parameters by name if BTF support is enabled and the event is for function entry (this feature can be used with kprobe- events, fprobe-events and tracepoint probe events.) Note that the BTF variable syntax does not require a prefix. If it starts with an alphabetic character or an underscore ('_') without a prefix like '$' and '%', it is considered as a BTF variable. If you specify only the BTF variable name, the argument name will also be the same name instead of 'arg*'. # echo 'p vfs_read count pos' >> dynamic_events # echo 'f vfs_write count pos' >> dynamic_events # echo 't sched_overutilized_tp rd overutilized' >> dynamic_events # cat dynamic_events p:kprobes/p_vfs_read_0 vfs_read count=count pos=pos f:fprobes/vfs_write__entry vfs_write count=count pos=pos t:tracepoints/sched_overutilized_tp sched_overutilized_tp rd=rd overutilized=overutilized Link: https://lore.kernel.org/all/168507474014.913472.16963996883278039183.stgit@mhiramat.roam.corp.google.com/ Signed-off-by: Masami Hiramatsu (Google) Reviewed-by: Alan Maguire Tested-by: Alan Maguire --- kernel/trace/trace.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index fa4e1a18da70..a70b22235eaf 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5698,7 +5698,11 @@ static const char readme_msg[] = "\t args: =fetcharg[:type]\n" "\t fetcharg: (%|$), @
, @[+|-],\n" #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API +#ifdef CONFIG_PROBE_EVENTS_BTF_ARGS + "\t $stack, $stack, $retval, $comm, $arg, \n" +#else "\t $stack, $stack, $retval, $comm, $arg,\n" +#endif #else "\t $stack, $stack, $retval, $comm,\n" #endif -- cgit From 02b0095e2fbbc060560c1065f86a211d91e27b26 Mon Sep 17 00:00:00 2001 From: Mateusz Stachyra Date: Tue, 4 Jul 2023 12:27:06 +0200 Subject: tracing: Fix null pointer dereference in tracing_err_log_open() Fix an issue in function 'tracing_err_log_open'. The function doesn't call 'seq_open' if the file is opened only with write permissions, which results in 'file->private_data' being left as null. If we then use 'lseek' on that opened file, 'seq_lseek' dereferences 'file->private_data' in 'mutex_lock(&m->lock)', resulting in a kernel panic. Writing to this node requires root privileges, therefore this bug has very little security impact. Tracefs node: /sys/kernel/tracing/error_log Example Kernel panic: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000038 Call trace: mutex_lock+0x30/0x110 seq_lseek+0x34/0xb8 __arm64_sys_lseek+0x6c/0xb8 invoke_syscall+0x58/0x13c el0_svc_common+0xc4/0x10c do_el0_svc+0x24/0x98 el0_svc+0x24/0x88 el0t_64_sync_handler+0x84/0xe4 el0t_64_sync+0x1b4/0x1b8 Code: d503201f aa0803e0 aa1f03e1 aa0103e9 (c8e97d02) ---[ end trace 561d1b49c12cf8a5 ]--- Kernel panic - not syncing: Oops: Fatal exception Link: https://lore.kernel.org/linux-trace-kernel/20230703155237eucms1p4dfb6a19caa14c79eb6c823d127b39024@eucms1p4 Link: https://lore.kernel.org/linux-trace-kernel/20230704102706eucms1p30d7ecdcc287f46ad67679fc8491b2e0f@eucms1p3 Cc: stable@vger.kernel.org Fixes: 8a062902be725 ("tracing: Add tracing error log") Signed-off-by: Mateusz Stachyra Suggested-by: Steven Rostedt Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 64a4dde073ef..3d34e6fea6b2 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -8135,7 +8135,7 @@ static const struct file_operations tracing_err_log_fops = { .open = tracing_err_log_open, .write = tracing_err_log_write, .read = seq_read, - .llseek = seq_lseek, + .llseek = tracing_lseek, .release = tracing_err_log_release, }; -- cgit From bec3c25c247c4f88a33d79675a09e1644c9a3114 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Wed, 12 Jul 2023 10:52:35 -0400 Subject: tracing: Stop FORTIFY_SOURCE complaining about stack trace caller The stack_trace event is an event created by the tracing subsystem to store stack traces. It originally just contained a hard coded array of 8 words to hold the stack, and a "size" to know how many entries are there. This is exported to user space as: name: kernel_stack ID: 4 format: field:unsigned short common_type; offset:0; size:2; signed:0; field:unsigned char common_flags; offset:2; size:1; signed:0; field:unsigned char common_preempt_count; offset:3; size:1; signed:0; field:int common_pid; offset:4; size:4; signed:1; field:int size; offset:8; size:4; signed:1; field:unsigned long caller[8]; offset:16; size:64; signed:0; print fmt: "\t=> %ps\n\t=> %ps\n\t=> %ps\n" "\t=> %ps\n\t=> %ps\n\t=> %ps\n" "\t=> %ps\n\t=> %ps\n",i (void *)REC->caller[0], (void *)REC->caller[1], (void *)REC->caller[2], (void *)REC->caller[3], (void *)REC->caller[4], (void *)REC->caller[5], (void *)REC->caller[6], (void *)REC->caller[7] Where the user space tracers could parse the stack. The library was updated for this specific event to only look at the size, and not the array. But some older users still look at the array (note, the older code still checks to make sure the array fits inside the event that it read. That is, if only 4 words were saved, the parser would not read the fifth word because it will see that it was outside of the event size). This event was changed a while ago to be more dynamic, and would save a full stack even if it was greater than 8 words. It does this by simply allocating more ring buffer to hold the extra words. Then it copies in the stack via: memcpy(&entry->caller, fstack->calls, size); As the entry is struct stack_entry, that is created by a macro to both create the structure and export this to user space, it still had the caller field of entry defined as: unsigned long caller[8]. When the stack is greater than 8, the FORTIFY_SOURCE code notices that the amount being copied is greater than the source array and complains about it. It has no idea that the source is pointing to the ring buffer with the required allocation. To hide this from the FORTIFY_SOURCE logic, pointer arithmetic is used: ptr = ring_buffer_event_data(event); entry = ptr; ptr += offsetof(typeof(*entry), caller); memcpy(ptr, fstack->calls, size); Link: https://lore.kernel.org/all/20230612160748.4082850-1-svens@linux.ibm.com/ Link: https://lore.kernel.org/linux-trace-kernel/20230712105235.5fc441aa@gandalf.local.home Cc: Masami Hiramatsu Cc: Mark Rutland Reported-by: Sven Schnelle Tested-by: Sven Schnelle Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4529e264cb86..20122eeccf97 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3118,6 +3118,7 @@ static void __ftrace_trace_stack(struct trace_buffer *buffer, struct ftrace_stack *fstack; struct stack_entry *entry; int stackidx; + void *ptr; /* * Add one, for this function and the call to save_stack_trace() @@ -3161,9 +3162,25 @@ static void __ftrace_trace_stack(struct trace_buffer *buffer, trace_ctx); if (!event) goto out; - entry = ring_buffer_event_data(event); + ptr = ring_buffer_event_data(event); + entry = ptr; + + /* + * For backward compatibility reasons, the entry->caller is an + * array of 8 slots to store the stack. This is also exported + * to user space. The amount allocated on the ring buffer actually + * holds enough for the stack specified by nr_entries. This will + * go into the location of entry->caller. Due to string fortifiers + * checking the size of the destination of memcpy() it triggers + * when it detects that size is greater than 8. To hide this from + * the fortifiers, we use "ptr" and pointer arithmetic to assign caller. + * + * The below is really just: + * memcpy(&entry->caller, fstack->calls, size); + */ + ptr += offsetof(typeof(*entry), caller); + memcpy(ptr, fstack->calls, size); - memcpy(&entry->caller, fstack->calls, size); entry->size = nr_entries; if (!call_filter_check_discard(call, entry, buffer, event)) -- cgit From d5a821896360cc8b93a15bd888fabc858c038dc0 Mon Sep 17 00:00:00 2001 From: Zheng Yejian Date: Thu, 13 Jul 2023 22:14:35 +0800 Subject: tracing: Fix memory leak of iter->temp when reading trace_pipe kmemleak reports: unreferenced object 0xffff88814d14e200 (size 256): comm "cat", pid 336, jiffies 4294871818 (age 779.490s) hex dump (first 32 bytes): 04 00 01 03 00 00 00 00 08 00 00 00 00 00 00 00 ................ 0c d8 c8 9b ff ff ff ff 04 5a ca 9b ff ff ff ff .........Z...... backtrace: [] __kmalloc+0x4f/0x140 [] trace_find_next_entry+0xbb/0x1d0 [] trace_print_lat_context+0xaf/0x4e0 [] print_trace_line+0x3e0/0x950 [] tracing_read_pipe+0x2d9/0x5a0 [] vfs_read+0x143/0x520 [] ksys_read+0xbd/0x160 [] do_syscall_64+0x3f/0x90 [] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 when reading file 'trace_pipe', 'iter->temp' is allocated or relocated in trace_find_next_entry() but not freed before 'trace_pipe' is closed. To fix it, free 'iter->temp' in tracing_release_pipe(). Link: https://lore.kernel.org/linux-trace-kernel/20230713141435.1133021-1-zhengyejian1@huawei.com Cc: stable@vger.kernel.org Fixes: ff895103a84ab ("tracing: Save off entry when peeking at next entry") Signed-off-by: Zheng Yejian Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 20122eeccf97..be847d45d81c 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6781,6 +6781,7 @@ static int tracing_release_pipe(struct inode *inode, struct file *file) free_cpumask_var(iter->started); kfree(iter->fmt); + kfree(iter->temp); mutex_destroy(&iter->mutex); kfree(iter); -- cgit