diff options
Diffstat (limited to 'include/linux/entry-common.h')
| -rw-r--r-- | include/linux/entry-common.h | 199 |
1 files changed, 199 insertions, 0 deletions
diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h new file mode 100644 index 000000000000..87efb38b7081 --- /dev/null +++ b/include/linux/entry-common.h @@ -0,0 +1,199 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_ENTRYCOMMON_H +#define __LINUX_ENTRYCOMMON_H + +#include <linux/irq-entry-common.h> +#include <linux/livepatch.h> +#include <linux/ptrace.h> +#include <linux/resume_user_mode.h> +#include <linux/seccomp.h> +#include <linux/sched.h> + +#include <asm/entry-common.h> +#include <asm/syscall.h> + +#ifndef _TIF_UPROBE +# define _TIF_UPROBE (0) +#endif + +/* + * SYSCALL_WORK flags handled in syscall_enter_from_user_mode() + */ +#ifndef ARCH_SYSCALL_WORK_ENTER +# define ARCH_SYSCALL_WORK_ENTER (0) +#endif + +/* + * SYSCALL_WORK flags handled in syscall_exit_to_user_mode() + */ +#ifndef ARCH_SYSCALL_WORK_EXIT +# define ARCH_SYSCALL_WORK_EXIT (0) +#endif + +#define SYSCALL_WORK_ENTER (SYSCALL_WORK_SECCOMP | \ + SYSCALL_WORK_SYSCALL_TRACEPOINT | \ + SYSCALL_WORK_SYSCALL_TRACE | \ + SYSCALL_WORK_SYSCALL_EMU | \ + SYSCALL_WORK_SYSCALL_AUDIT | \ + SYSCALL_WORK_SYSCALL_USER_DISPATCH | \ + ARCH_SYSCALL_WORK_ENTER) + +#define SYSCALL_WORK_EXIT (SYSCALL_WORK_SYSCALL_TRACEPOINT | \ + SYSCALL_WORK_SYSCALL_TRACE | \ + SYSCALL_WORK_SYSCALL_AUDIT | \ + SYSCALL_WORK_SYSCALL_USER_DISPATCH | \ + SYSCALL_WORK_SYSCALL_EXIT_TRAP | \ + ARCH_SYSCALL_WORK_EXIT) + +long syscall_trace_enter(struct pt_regs *regs, long syscall, unsigned long work); + +/** + * syscall_enter_from_user_mode_work - Check and handle work before invoking + * a syscall + * @regs: Pointer to currents pt_regs + * @syscall: The syscall number + * + * Invoked from architecture specific syscall entry code with interrupts + * enabled after invoking enter_from_user_mode(), enabling interrupts and + * extra architecture specific work. + * + * Returns: The original or a modified syscall number + * + * If the returned syscall number is -1 then the syscall should be + * skipped. In this case the caller may invoke syscall_set_error() or + * syscall_set_return_value() first. If neither of those are called and -1 + * is returned, then the syscall will fail with ENOSYS. + * + * It handles the following work items: + * + * 1) syscall_work flag dependent invocations of + * ptrace_report_syscall_entry(), __secure_computing(), trace_sys_enter() + * 2) Invocation of audit_syscall_entry() + */ +static __always_inline long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall) +{ + unsigned long work = READ_ONCE(current_thread_info()->syscall_work); + + if (work & SYSCALL_WORK_ENTER) + syscall = syscall_trace_enter(regs, syscall, work); + + return syscall; +} + +/** + * syscall_enter_from_user_mode - Establish state and check and handle work + * before invoking a syscall + * @regs: Pointer to currents pt_regs + * @syscall: The syscall number + * + * Invoked from architecture specific syscall entry code with interrupts + * disabled. The calling code has to be non-instrumentable. When the + * function returns all state is correct, interrupts are enabled and the + * subsequent functions can be instrumented. + * + * This is the combination of enter_from_user_mode() and + * syscall_enter_from_user_mode_work() to be used when there is no + * architecture specific work to be done between the two. + * + * Returns: The original or a modified syscall number. See + * syscall_enter_from_user_mode_work() for further explanation. + */ +static __always_inline long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall) +{ + long ret; + + enter_from_user_mode(regs); + + instrumentation_begin(); + local_irq_enable(); + ret = syscall_enter_from_user_mode_work(regs, syscall); + instrumentation_end(); + + return ret; +} + +/** + * syscall_exit_work - Handle work before returning to user mode + * @regs: Pointer to current pt_regs + * @work: Current thread syscall work + * + * Do one-time syscall specific work. + */ +void syscall_exit_work(struct pt_regs *regs, unsigned long work); + +/** + * syscall_exit_to_user_mode_work - Handle work before returning to user mode + * @regs: Pointer to currents pt_regs + * + * Same as step 1 and 2 of syscall_exit_to_user_mode() but without calling + * exit_to_user_mode() to perform the final transition to user mode. + * + * Calling convention is the same as for syscall_exit_to_user_mode() and it + * returns with all work handled and interrupts disabled. The caller must + * invoke exit_to_user_mode() before actually switching to user mode to + * make the final state transitions. Interrupts must stay disabled between + * return from this function and the invocation of exit_to_user_mode(). + */ +static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs) +{ + unsigned long work = READ_ONCE(current_thread_info()->syscall_work); + unsigned long nr = syscall_get_nr(current, regs); + + CT_WARN_ON(ct_state() != CT_STATE_KERNEL); + + if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { + if (WARN(irqs_disabled(), "syscall %lu left IRQs disabled", nr)) + local_irq_enable(); + } + + rseq_debug_syscall_return(regs); + + /* + * Do one-time syscall specific work. If these work items are + * enabled, we want to run them exactly once per syscall exit with + * interrupts enabled. + */ + if (unlikely(work & SYSCALL_WORK_EXIT)) + syscall_exit_work(regs, work); + local_irq_disable_exit_to_user(); + syscall_exit_to_user_mode_prepare(regs); +} + +/** + * syscall_exit_to_user_mode - Handle work before returning to user mode + * @regs: Pointer to currents pt_regs + * + * Invoked with interrupts enabled and fully valid regs. Returns with all + * work handled, interrupts disabled such that the caller can immediately + * switch to user mode. Called from architecture specific syscall and ret + * from fork code. + * + * The call order is: + * 1) One-time syscall exit work: + * - rseq syscall exit + * - audit + * - syscall tracing + * - ptrace (single stepping) + * + * 2) Preparatory work + * - Exit to user mode loop (common TIF handling). Invokes + * arch_exit_to_user_mode_work() for architecture specific TIF work + * - Architecture specific one time work arch_exit_to_user_mode_prepare() + * - Address limit and lockdep checks + * + * 3) Final transition (lockdep, tracing, context tracking, RCU), i.e. the + * functionality in exit_to_user_mode(). + * + * This is a combination of syscall_exit_to_user_mode_work() (1,2) and + * exit_to_user_mode(). This function is preferred unless there is a + * compelling architectural reason to use the separate functions. + */ +static __always_inline void syscall_exit_to_user_mode(struct pt_regs *regs) +{ + instrumentation_begin(); + syscall_exit_to_user_mode_work(regs); + instrumentation_end(); + exit_to_user_mode(); +} + +#endif |
