diff options
author | Steven Rostedt <rostedt@goodmis.org> | 2025-07-29 14:23:09 -0400 |
---|---|---|
committer | Steven Rostedt (Google) <rostedt@goodmis.org> | 2025-07-31 10:20:10 -0400 |
commit | 055c7060e7ca71bb86da616158fc74254730ae2a (patch) | |
tree | 6712903f5c31fbb0abb3905816af9325a65f61bf /kernel/unwind/deferred.c | |
parent | 2dffa355f6c279e7d2e574abf9446c41a631c9e5 (diff) |
unwind_user/deferred: Make unwind deferral requests NMI-safe
Make unwind_deferred_request() NMI-safe so tracers in NMI context can
call it and safely request a user space stacktrace when the task exits.
Note, this is only allowed for architectures that implement a safe
cmpxchg. If an architecture requests a deferred stack trace from NMI
context that does not support a safe NMI cmpxchg, it will get an -EINVAL
and trigger a warning. For those architectures, they would need another
method (perhaps an irqwork), to request a deferred user space stack trace.
That can be dealt with later if one of theses architectures require this
feature.
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Josh Poimboeuf <jpoimboe@kernel.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Indu Bhagat <indu.bhagat@oracle.com>
Cc: "Jose E. Marchesi" <jemarch@gnu.org>
Cc: Beau Belgrave <beaub@linux.microsoft.com>
Cc: Jens Remus <jremus@linux.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Florian Weimer <fweimer@redhat.com>
Cc: Sam James <sam@gentoo.org>
Link: https://lore.kernel.org/20250729182405.657072238@kernel.org
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Diffstat (limited to 'kernel/unwind/deferred.c')
-rw-r--r-- | kernel/unwind/deferred.c | 52 |
1 files changed, 44 insertions, 8 deletions
diff --git a/kernel/unwind/deferred.c b/kernel/unwind/deferred.c index 2cbae2ada309..c5ac087d2396 100644 --- a/kernel/unwind/deferred.c +++ b/kernel/unwind/deferred.c @@ -12,6 +12,31 @@ #include <linux/slab.h> #include <linux/mm.h> +/* + * For requesting a deferred user space stack trace from NMI context + * the architecture must support a safe cmpxchg in NMI context. + * For those architectures that do not have that, then it cannot ask + * for a deferred user space stack trace from an NMI context. If it + * does, then it will get -EINVAL. + */ +#if defined(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG) +# define CAN_USE_IN_NMI 1 +static inline bool try_assign_cnt(struct unwind_task_info *info, u32 cnt) +{ + u32 old = 0; + + return try_cmpxchg(&info->id.cnt, &old, cnt); +} +#else +# define CAN_USE_IN_NMI 0 +/* When NMIs are not allowed, this always succeeds */ +static inline bool try_assign_cnt(struct unwind_task_info *info, u32 cnt) +{ + info->id.cnt = cnt; + return true; +} +#endif + /* Make the cache fit in a 4K page */ #define UNWIND_MAX_ENTRIES \ ((SZ_4K - sizeof(struct unwind_cache)) / sizeof(long)) @@ -42,14 +67,13 @@ static DEFINE_PER_CPU(u32, unwind_ctx_ctr); static u64 get_cookie(struct unwind_task_info *info) { u32 cnt = 1; - u32 old = 0; if (info->id.cpu) return info->id.id; /* LSB is always set to ensure 0 is an invalid value */ cnt |= __this_cpu_read(unwind_ctx_ctr) + 2; - if (try_cmpxchg(&info->id.cnt, &old, cnt)) { + if (try_assign_cnt(info, cnt)) { /* Update the per cpu counter */ __this_cpu_write(unwind_ctx_ctr, cnt); } @@ -167,31 +191,43 @@ static void unwind_deferred_task_work(struct callback_head *head) int unwind_deferred_request(struct unwind_work *work, u64 *cookie) { struct unwind_task_info *info = ¤t->unwind_info; + long pending; int ret; *cookie = 0; - if (WARN_ON_ONCE(in_nmi())) - return -EINVAL; - if ((current->flags & (PF_KTHREAD | PF_EXITING)) || !user_mode(task_pt_regs(current))) return -EINVAL; + /* + * NMI requires having safe cmpxchg operations. + * Trigger a warning to make it obvious that an architecture + * is using this in NMI when it should not be. + */ + if (WARN_ON_ONCE(!CAN_USE_IN_NMI && in_nmi())) + return -EINVAL; + guard(irqsave)(); *cookie = get_cookie(info); /* callback already pending? */ - if (info->pending) + pending = READ_ONCE(info->pending); + if (pending) + return 1; + + /* Claim the work unless an NMI just now swooped in to do so. */ + if (!try_cmpxchg(&info->pending, &pending, 1)) return 1; /* The work has been claimed, now schedule it. */ ret = task_work_add(current, &info->work, TWA_RESUME); - if (WARN_ON_ONCE(ret)) + if (WARN_ON_ONCE(ret)) { + WRITE_ONCE(info->pending, 0); return ret; + } - info->pending = 1; return 0; } |