diff options
| -rw-r--r-- | include/linux/unwind_deferred.h | 24 | ||||
| -rw-r--r-- | include/linux/unwind_deferred_types.h | 24 | ||||
| -rw-r--r-- | kernel/unwind/deferred.c | 156 |
3 files changed, 203 insertions, 1 deletions
diff --git a/include/linux/unwind_deferred.h b/include/linux/unwind_deferred.h index baacf4a1eb4c..14efd8c027aa 100644 --- a/include/linux/unwind_deferred.h +++ b/include/linux/unwind_deferred.h @@ -2,9 +2,19 @@ #ifndef _LINUX_UNWIND_USER_DEFERRED_H #define _LINUX_UNWIND_USER_DEFERRED_H +#include <linux/task_work.h> #include <linux/unwind_user.h> #include <linux/unwind_deferred_types.h> +struct unwind_work; + +typedef void (*unwind_callback_t)(struct unwind_work *work, struct unwind_stacktrace *trace, u64 cookie); + +struct unwind_work { + struct list_head list; + unwind_callback_t func; +}; + #ifdef CONFIG_UNWIND_USER void unwind_task_init(struct task_struct *task); @@ -12,8 +22,19 @@ void unwind_task_free(struct task_struct *task); int unwind_user_faultable(struct unwind_stacktrace *trace); +int unwind_deferred_init(struct unwind_work *work, unwind_callback_t func); +int unwind_deferred_request(struct unwind_work *work, u64 *cookie); +void unwind_deferred_cancel(struct unwind_work *work); + static __always_inline void unwind_reset_info(void) { + if (unlikely(current->unwind_info.id.id)) + current->unwind_info.id.id = 0; + /* + * As unwind_user_faultable() can be called directly and + * depends on nr_entries being cleared on exit to user, + * this needs to be a separate conditional. + */ if (unlikely(current->unwind_info.cache)) current->unwind_info.cache->nr_entries = 0; } @@ -24,6 +45,9 @@ static inline void unwind_task_init(struct task_struct *task) {} static inline void unwind_task_free(struct task_struct *task) {} static inline int unwind_user_faultable(struct unwind_stacktrace *trace) { return -ENOSYS; } +static inline int unwind_deferred_init(struct unwind_work *work, unwind_callback_t func) { return -ENOSYS; } +static inline int unwind_deferred_request(struct unwind_work *work, u64 *timestamp) { return -ENOSYS; } +static inline void unwind_deferred_cancel(struct unwind_work *work) {} static inline void unwind_reset_info(void) {} diff --git a/include/linux/unwind_deferred_types.h b/include/linux/unwind_deferred_types.h index db5b54b18828..104c477d5609 100644 --- a/include/linux/unwind_deferred_types.h +++ b/include/linux/unwind_deferred_types.h @@ -7,8 +7,32 @@ struct unwind_cache { unsigned long entries[]; }; +/* + * The unwind_task_id is a unique identifier that maps to a user space + * stacktrace. It is generated the first time a deferred user space + * stacktrace is requested after a task has entered the kerenl and + * is cleared to zero when it exits. The mapped id will be a non-zero + * number. + * + * To simplify the generation of the 64 bit number, 32 bits will be + * the CPU it was generated on, and the other 32 bits will be a per + * cpu counter that gets incremented by two every time a new identifier + * is generated. The LSB will always be set to keep the value + * from being zero. + */ +union unwind_task_id { + struct { + u32 cpu; + u32 cnt; + }; + u64 id; +}; + struct unwind_task_info { struct unwind_cache *cache; + struct callback_head work; + union unwind_task_id id; + int pending; }; #endif /* _LINUX_UNWIND_USER_DEFERRED_TYPES_H */ diff --git a/kernel/unwind/deferred.c b/kernel/unwind/deferred.c index 96368a5aa522..2cbae2ada309 100644 --- a/kernel/unwind/deferred.c +++ b/kernel/unwind/deferred.c @@ -2,16 +2,63 @@ /* * Deferred user space unwinding */ +#include <linux/sched/task_stack.h> +#include <linux/unwind_deferred.h> +#include <linux/sched/clock.h> +#include <linux/task_work.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/sizes.h> #include <linux/slab.h> -#include <linux/unwind_deferred.h> +#include <linux/mm.h> /* Make the cache fit in a 4K page */ #define UNWIND_MAX_ENTRIES \ ((SZ_4K - sizeof(struct unwind_cache)) / sizeof(long)) +/* Guards adding to and reading the list of callbacks */ +static DEFINE_MUTEX(callback_mutex); +static LIST_HEAD(callbacks); + +/* + * This is a unique percpu identifier for a given task entry context. + * Conceptually, it's incremented every time the CPU enters the kernel from + * user space, so that each "entry context" on the CPU gets a unique ID. In + * reality, as an optimization, it's only incremented on demand for the first + * deferred unwind request after a given entry-from-user. + * + * It's combined with the CPU id to make a systemwide-unique "context cookie". + */ +static DEFINE_PER_CPU(u32, unwind_ctx_ctr); + +/* + * The context cookie is a unique identifier that is assigned to a user + * space stacktrace. As the user space stacktrace remains the same while + * the task is in the kernel, the cookie is an identifier for the stacktrace. + * Although it is possible for the stacktrace to get another cookie if another + * request is made after the cookie was cleared and before reentering user + * space. + */ +static u64 get_cookie(struct unwind_task_info *info) +{ + u32 cnt = 1; + u32 old = 0; + + if (info->id.cpu) + return info->id.id; + + /* LSB is always set to ensure 0 is an invalid value */ + cnt |= __this_cpu_read(unwind_ctx_ctr) + 2; + if (try_cmpxchg(&info->id.cnt, &old, cnt)) { + /* Update the per cpu counter */ + __this_cpu_write(unwind_ctx_ctr, cnt); + } + /* Interrupts are disabled, the CPU will always be same */ + info->id.cpu = smp_processor_id() + 1; /* Must be non zero */ + + return info->id.id; +} + /** * unwind_user_faultable - Produce a user stacktrace in faultable context * @trace: The descriptor that will store the user stacktrace @@ -62,11 +109,117 @@ int unwind_user_faultable(struct unwind_stacktrace *trace) return 0; } +static void unwind_deferred_task_work(struct callback_head *head) +{ + struct unwind_task_info *info = container_of(head, struct unwind_task_info, work); + struct unwind_stacktrace trace; + struct unwind_work *work; + u64 cookie; + + if (WARN_ON_ONCE(!info->pending)) + return; + + /* Allow work to come in again */ + WRITE_ONCE(info->pending, 0); + + /* + * From here on out, the callback must always be called, even if it's + * just an empty trace. + */ + trace.nr = 0; + trace.entries = NULL; + + unwind_user_faultable(&trace); + + cookie = info->id.id; + + guard(mutex)(&callback_mutex); + list_for_each_entry(work, &callbacks, list) { + work->func(work, &trace, cookie); + } +} + +/** + * unwind_deferred_request - Request a user stacktrace on task kernel exit + * @work: Unwind descriptor requesting the trace + * @cookie: The cookie of the first request made for this task + * + * Schedule a user space unwind to be done in task work before exiting the + * kernel. + * + * The returned @cookie output is the generated cookie of the very first + * request for a user space stacktrace for this task since it entered the + * kernel. It can be from a request by any caller of this infrastructure. + * Its value will also be passed to the callback function. It can be + * used to stitch kernel and user stack traces together in post-processing. + * + * It's valid to call this function multiple times for the same @work within + * the same task entry context. Each call will return the same cookie + * while the task hasn't left the kernel. If the callback is not pending + * because it has already been previously called for the same entry context, + * it will be called again with the same stack trace and cookie. + * + * Return: 1 if the the callback was already queued. + * 0 if the callback successfully was queued. + * Negative if there's an error. + * @cookie holds the cookie of the first request by any user + */ +int unwind_deferred_request(struct unwind_work *work, u64 *cookie) +{ + struct unwind_task_info *info = ¤t->unwind_info; + int ret; + + *cookie = 0; + + if (WARN_ON_ONCE(in_nmi())) + return -EINVAL; + + if ((current->flags & (PF_KTHREAD | PF_EXITING)) || + !user_mode(task_pt_regs(current))) + return -EINVAL; + + guard(irqsave)(); + + *cookie = get_cookie(info); + + /* callback already pending? */ + if (info->pending) + return 1; + + /* The work has been claimed, now schedule it. */ + ret = task_work_add(current, &info->work, TWA_RESUME); + if (WARN_ON_ONCE(ret)) + return ret; + + info->pending = 1; + return 0; +} + +void unwind_deferred_cancel(struct unwind_work *work) +{ + if (!work) + return; + + guard(mutex)(&callback_mutex); + list_del(&work->list); +} + +int unwind_deferred_init(struct unwind_work *work, unwind_callback_t func) +{ + memset(work, 0, sizeof(*work)); + + guard(mutex)(&callback_mutex); + list_add(&work->list, &callbacks); + work->func = func; + return 0; +} + void unwind_task_init(struct task_struct *task) { struct unwind_task_info *info = &task->unwind_info; memset(info, 0, sizeof(*info)); + init_task_work(&info->work, unwind_deferred_task_work); } void unwind_task_free(struct task_struct *task) @@ -74,4 +227,5 @@ void unwind_task_free(struct task_struct *task) struct unwind_task_info *info = &task->unwind_info; kfree(info->cache); + task_work_cancel(task, &info->work); } |
