diff options
Diffstat (limited to 'kernel/entry')
-rw-r--r-- | kernel/entry/Makefile | 3 | ||||
-rw-r--r-- | kernel/entry/common.c | 116 | ||||
-rw-r--r-- | kernel/entry/syscall-common.c | 112 | ||||
-rw-r--r-- | kernel/entry/syscall_user_dispatch.c | 36 |
4 files changed, 140 insertions, 127 deletions
diff --git a/kernel/entry/Makefile b/kernel/entry/Makefile index d4b8bd0af79b..77fcd83dd663 100644 --- a/kernel/entry/Makefile +++ b/kernel/entry/Makefile @@ -12,5 +12,6 @@ ccflags-$(CONFIG_TRACE_BRANCH_PROFILING) += -DDISABLE_BRANCH_PROFILING CFLAGS_REMOVE_common.o = -fstack-protector -fstack-protector-strong CFLAGS_common.o += -fno-stack-protector -obj-$(CONFIG_GENERIC_ENTRY) += common.o syscall_user_dispatch.o +obj-$(CONFIG_GENERIC_IRQ_ENTRY) += common.o +obj-$(CONFIG_GENERIC_SYSCALL) += syscall-common.o syscall_user_dispatch.o obj-$(CONFIG_KVM_XFER_TO_GUEST_WORK) += kvm.o diff --git a/kernel/entry/common.c b/kernel/entry/common.c index a8dd1f27417c..408d28b5179d 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -1,84 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 -#include <linux/context_tracking.h> -#include <linux/entry-common.h> +#include <linux/irq-entry-common.h> #include <linux/resume_user_mode.h> #include <linux/highmem.h> #include <linux/jump_label.h> #include <linux/kmsan.h> #include <linux/livepatch.h> -#include <linux/audit.h> #include <linux/tick.h> -#include "common.h" - -#define CREATE_TRACE_POINTS -#include <trace/events/syscalls.h> - -static inline void syscall_enter_audit(struct pt_regs *regs, long syscall) -{ - if (unlikely(audit_context())) { - unsigned long args[6]; - - syscall_get_arguments(current, regs, args); - audit_syscall_entry(syscall, args[0], args[1], args[2], args[3]); - } -} - -long syscall_trace_enter(struct pt_regs *regs, long syscall, - unsigned long work) -{ - long ret = 0; - - /* - * Handle Syscall User Dispatch. This must comes first, since - * the ABI here can be something that doesn't make sense for - * other syscall_work features. - */ - if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) { - if (syscall_user_dispatch(regs)) - return -1L; - } - - /* Handle ptrace */ - if (work & (SYSCALL_WORK_SYSCALL_TRACE | SYSCALL_WORK_SYSCALL_EMU)) { - ret = ptrace_report_syscall_entry(regs); - if (ret || (work & SYSCALL_WORK_SYSCALL_EMU)) - return -1L; - } - - /* Do seccomp after ptrace, to catch any tracer changes. */ - if (work & SYSCALL_WORK_SECCOMP) { - ret = __secure_computing(); - if (ret == -1L) - return ret; - } - - /* Either of the above might have changed the syscall number */ - syscall = syscall_get_nr(current, regs); - - if (unlikely(work & SYSCALL_WORK_SYSCALL_TRACEPOINT)) { - trace_sys_enter(regs, syscall); - /* - * Probes or BPF hooks in the tracepoint may have changed the - * system call number as well. - */ - syscall = syscall_get_nr(current, regs); - } - - syscall_enter_audit(regs, syscall); - - return ret ? : syscall; -} - -noinstr void syscall_enter_from_user_mode_prepare(struct pt_regs *regs) -{ - enter_from_user_mode(regs); - instrumentation_begin(); - local_irq_enable(); - instrumentation_end(); -} - /* Workaround to allow gradual conversion of architecture code */ void __weak arch_do_signal_or_restart(struct pt_regs *regs) { } @@ -133,46 +62,6 @@ __always_inline unsigned long exit_to_user_mode_loop(struct pt_regs *regs, return ti_work; } -/* - * If SYSCALL_EMU is set, then the only reason to report is when - * SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP). This syscall - * instruction has been already reported in syscall_enter_from_user_mode(). - */ -static inline bool report_single_step(unsigned long work) -{ - if (work & SYSCALL_WORK_SYSCALL_EMU) - return false; - - return work & SYSCALL_WORK_SYSCALL_EXIT_TRAP; -} - -void syscall_exit_work(struct pt_regs *regs, unsigned long work) -{ - bool step; - - /* - * If the syscall was rolled back due to syscall user dispatching, - * then the tracers below are not invoked for the same reason as - * the entry side was not invoked in syscall_trace_enter(): The ABI - * of these syscalls is unknown. - */ - if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) { - if (unlikely(current->syscall_dispatch.on_dispatch)) { - current->syscall_dispatch.on_dispatch = false; - return; - } - } - - audit_syscall_exit(regs); - - if (work & SYSCALL_WORK_SYSCALL_TRACEPOINT) - trace_sys_exit(regs, syscall_get_return_value(current, regs)); - - step = report_single_step(work); - if (step || work & SYSCALL_WORK_SYSCALL_TRACE) - ptrace_report_syscall_exit(regs, step); -} - noinstr void irqentry_enter_from_user_mode(struct pt_regs *regs) { enter_from_user_mode(regs); @@ -220,7 +109,8 @@ noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs) * TINY_RCU does not support EQS, so let the compiler eliminate * this part when enabled. */ - if (!IS_ENABLED(CONFIG_TINY_RCU) && is_idle_task(current)) { + if (!IS_ENABLED(CONFIG_TINY_RCU) && + (is_idle_task(current) || arch_in_rcu_eqs())) { /* * If RCU is not watching then the same careful * sequence vs. lockdep and tracing is required diff --git a/kernel/entry/syscall-common.c b/kernel/entry/syscall-common.c new file mode 100644 index 000000000000..66e6ba7fa80c --- /dev/null +++ b/kernel/entry/syscall-common.c @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/audit.h> +#include <linux/entry-common.h> +#include "common.h" + +#define CREATE_TRACE_POINTS +#include <trace/events/syscalls.h> + +static inline void syscall_enter_audit(struct pt_regs *regs, long syscall) +{ + if (unlikely(audit_context())) { + unsigned long args[6]; + + syscall_get_arguments(current, regs, args); + audit_syscall_entry(syscall, args[0], args[1], args[2], args[3]); + } +} + +long syscall_trace_enter(struct pt_regs *regs, long syscall, + unsigned long work) +{ + long ret = 0; + + /* + * Handle Syscall User Dispatch. This must comes first, since + * the ABI here can be something that doesn't make sense for + * other syscall_work features. + */ + if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) { + if (syscall_user_dispatch(regs)) + return -1L; + } + + /* Handle ptrace */ + if (work & (SYSCALL_WORK_SYSCALL_TRACE | SYSCALL_WORK_SYSCALL_EMU)) { + ret = ptrace_report_syscall_entry(regs); + if (ret || (work & SYSCALL_WORK_SYSCALL_EMU)) + return -1L; + } + + /* Do seccomp after ptrace, to catch any tracer changes. */ + if (work & SYSCALL_WORK_SECCOMP) { + ret = __secure_computing(); + if (ret == -1L) + return ret; + } + + /* Either of the above might have changed the syscall number */ + syscall = syscall_get_nr(current, regs); + + if (unlikely(work & SYSCALL_WORK_SYSCALL_TRACEPOINT)) { + trace_sys_enter(regs, syscall); + /* + * Probes or BPF hooks in the tracepoint may have changed the + * system call number as well. + */ + syscall = syscall_get_nr(current, regs); + } + + syscall_enter_audit(regs, syscall); + + return ret ? : syscall; +} + +noinstr void syscall_enter_from_user_mode_prepare(struct pt_regs *regs) +{ + enter_from_user_mode(regs); + instrumentation_begin(); + local_irq_enable(); + instrumentation_end(); +} + +/* + * If SYSCALL_EMU is set, then the only reason to report is when + * SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP). This syscall + * instruction has been already reported in syscall_enter_from_user_mode(). + */ +static inline bool report_single_step(unsigned long work) +{ + if (work & SYSCALL_WORK_SYSCALL_EMU) + return false; + + return work & SYSCALL_WORK_SYSCALL_EXIT_TRAP; +} + +void syscall_exit_work(struct pt_regs *regs, unsigned long work) +{ + bool step; + + /* + * If the syscall was rolled back due to syscall user dispatching, + * then the tracers below are not invoked for the same reason as + * the entry side was not invoked in syscall_trace_enter(): The ABI + * of these syscalls is unknown. + */ + if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) { + if (unlikely(current->syscall_dispatch.on_dispatch)) { + current->syscall_dispatch.on_dispatch = false; + return; + } + } + + audit_syscall_exit(regs); + + if (work & SYSCALL_WORK_SYSCALL_TRACEPOINT) + trace_sys_exit(regs, syscall_get_return_value(current, regs)); + + step = report_single_step(work); + if (step || work & SYSCALL_WORK_SYSCALL_TRACE) + ptrace_report_syscall_exit(regs, step); +} diff --git a/kernel/entry/syscall_user_dispatch.c b/kernel/entry/syscall_user_dispatch.c index 5340c5aa89e7..a9055eccb27e 100644 --- a/kernel/entry/syscall_user_dispatch.c +++ b/kernel/entry/syscall_user_dispatch.c @@ -78,7 +78,7 @@ static int task_set_syscall_user_dispatch(struct task_struct *task, unsigned lon if (offset || len || selector) return -EINVAL; break; - case PR_SYS_DISPATCH_ON: + case PR_SYS_DISPATCH_EXCLUSIVE_ON: /* * Validate the direct dispatcher region just for basic * sanity against overflow and a 0-sized dispatcher @@ -87,30 +87,40 @@ static int task_set_syscall_user_dispatch(struct task_struct *task, unsigned lon */ if (offset && offset + len <= offset) return -EINVAL; - + break; + case PR_SYS_DISPATCH_INCLUSIVE_ON: + if (len == 0 || offset + len <= offset) + return -EINVAL; /* - * access_ok() will clear memory tags for tagged addresses - * if current has memory tagging enabled. - - * To enable a tracer to set a tracees selector the - * selector address must be untagged for access_ok(), - * otherwise an untagged tracer will always fail to set a - * tagged tracees selector. + * Invert the range, the check in syscall_user_dispatch() + * supports wrap-around. */ - if (selector && !access_ok(untagged_addr(selector), sizeof(*selector))) - return -EFAULT; - + offset = offset + len; + len = -len; break; default: return -EINVAL; } + /* + * access_ok() will clear memory tags for tagged addresses + * if current has memory tagging enabled. + * + * To enable a tracer to set a tracees selector the + * selector address must be untagged for access_ok(), + * otherwise an untagged tracer will always fail to set a + * tagged tracees selector. + */ + if (mode != PR_SYS_DISPATCH_OFF && selector && + !access_ok(untagged_addr(selector), sizeof(*selector))) + return -EFAULT; + task->syscall_dispatch.selector = selector; task->syscall_dispatch.offset = offset; task->syscall_dispatch.len = len; task->syscall_dispatch.on_dispatch = false; - if (mode == PR_SYS_DISPATCH_ON) + if (mode != PR_SYS_DISPATCH_OFF) set_task_syscall_work(task, SYSCALL_USER_DISPATCH); else clear_task_syscall_work(task, SYSCALL_USER_DISPATCH); |