diff options
Diffstat (limited to 'kernel/ptrace.c')
| -rw-r--r-- | kernel/ptrace.c | 472 |
1 files changed, 285 insertions, 187 deletions
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index eea265082e97..392ec2f75f01 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -32,6 +32,7 @@ #include <linux/compat.h> #include <linux/sched/signal.h> #include <linux/minmax.h> +#include <linux/syscall_user_dispatch.h> #include <asm/syscall.h> /* for syscall_get_* */ @@ -58,7 +59,7 @@ int ptrace_access_vm(struct task_struct *tsk, unsigned long addr, return 0; } - ret = __access_remote_vm(mm, addr, buf, len, gup_flags); + ret = access_remote_vm(mm, addr, buf, len, gup_flags); mmput(mm); return ret; @@ -144,20 +145,9 @@ void __ptrace_unlink(struct task_struct *child) */ if (!(child->flags & PF_EXITING) && (child->signal->flags & SIGNAL_STOP_STOPPED || - child->signal->group_stop_count)) { + child->signal->group_stop_count)) child->jobctl |= JOBCTL_STOP_PENDING; - /* - * This is only possible if this thread was cloned by the - * traced task running in the stopped group, set the signal - * for the future reports. - * FIXME: we should change ptrace_init_task() to handle this - * case. - */ - if (!(child->jobctl & JOBCTL_STOP_SIGMASK)) - child->jobctl |= SIGSTOP; - } - /* * If transition to TASK_STOPPED is pending or in TASK_TRACED, kick * @child in the butt. Note that @resume should be used iff @child @@ -185,7 +175,12 @@ static bool looks_like_a_spurious_pid(struct task_struct *task) return true; } -/* Ensure that nothing can wake it up, even SIGKILL */ +/* + * Ensure that nothing can wake it up, even SIGKILL + * + * A task is switched to this state while a ptrace operation is in progress; + * such that the ptrace operation is uninterruptible. + */ static bool ptrace_freeze_traced(struct task_struct *task) { bool ret = false; @@ -197,7 +192,7 @@ static bool ptrace_freeze_traced(struct task_struct *task) spin_lock_irq(&task->sighand->siglock); if (task_is_traced(task) && !looks_like_a_spurious_pid(task) && !__fatal_signal_pending(task)) { - WRITE_ONCE(task->__state, __TASK_TRACED); + task->jobctl |= JOBCTL_PTRACE_FROZEN; ret = true; } spin_unlock_irq(&task->sighand->siglock); @@ -207,23 +202,21 @@ static bool ptrace_freeze_traced(struct task_struct *task) static void ptrace_unfreeze_traced(struct task_struct *task) { - if (READ_ONCE(task->__state) != __TASK_TRACED) - return; - - WARN_ON(!task->ptrace || task->parent != current); + unsigned long flags; /* - * PTRACE_LISTEN can allow ptrace_trap_notify to wake us up remotely. - * Recheck state under the lock to close this race. + * The child may be awake and may have cleared + * JOBCTL_PTRACE_FROZEN (see ptrace_resume). The child will + * not set JOBCTL_PTRACE_FROZEN or enter __TASK_TRACED anew. */ - spin_lock_irq(&task->sighand->siglock); - if (READ_ONCE(task->__state) == __TASK_TRACED) { - if (__fatal_signal_pending(task)) + if (lock_task_sighand(task, &flags)) { + task->jobctl &= ~JOBCTL_PTRACE_FROZEN; + if (__fatal_signal_pending(task)) { + task->jobctl &= ~JOBCTL_TRACED; wake_up_state(task, __TASK_TRACED); - else - WRITE_ONCE(task->__state, TASK_TRACED); + } + unlock_task_sighand(task, &flags); } - spin_unlock_irq(&task->sighand->siglock); } /** @@ -256,7 +249,6 @@ static int ptrace_check_attach(struct task_struct *child, bool ignore_state) */ read_lock(&tasklist_lock); if (child->ptrace && child->parent == current) { - WARN_ON(READ_ONCE(child->__state) == __TASK_TRACED); /* * child->sighand can't be NULL, release_task() * does ptrace_unlink() before __exit_signal(). @@ -266,17 +258,9 @@ static int ptrace_check_attach(struct task_struct *child, bool ignore_state) } read_unlock(&tasklist_lock); - if (!ret && !ignore_state) { - if (!wait_task_inactive(child, __TASK_TRACED)) { - /* - * This can only happen if may_ptrace_stop() fails and - * ptrace_stop() changes ->state back to TASK_RUNNING, - * so we should not worry about leaking __TASK_TRACED. - */ - WARN_ON(READ_ONCE(child->__state) == __TASK_TRACED); - ret = -ESRCH; - } - } + if (!ret && !ignore_state && + WARN_ON_ONCE(!wait_task_inactive(child, __TASK_TRACED|TASK_FROZEN))) + ret = -ESRCH; return ret; } @@ -371,64 +355,33 @@ bool ptrace_may_access(struct task_struct *task, unsigned int mode) return !err; } -static int ptrace_attach(struct task_struct *task, long request, - unsigned long addr, - unsigned long flags) +static int check_ptrace_options(unsigned long data) { - bool seize = (request == PTRACE_SEIZE); - int retval; - - retval = -EIO; - if (seize) { - if (addr != 0) - goto out; - if (flags & ~(unsigned long)PTRACE_O_MASK) - goto out; - flags = PT_PTRACED | PT_SEIZED | (flags << PT_OPT_FLAG_SHIFT); - } else { - flags = PT_PTRACED; - } - - audit_ptrace(task); - - retval = -EPERM; - if (unlikely(task->flags & PF_KTHREAD)) - goto out; - if (same_thread_group(task, current)) - goto out; - - /* - * Protect exec's credential calculations against our interference; - * SUID, SGID and LSM creds get determined differently - * under ptrace. - */ - retval = -ERESTARTNOINTR; - if (mutex_lock_interruptible(&task->signal->cred_guard_mutex)) - goto out; + if (data & ~(unsigned long)PTRACE_O_MASK) + return -EINVAL; - task_lock(task); - retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH_REALCREDS); - task_unlock(task); - if (retval) - goto unlock_creds; + if (unlikely(data & PTRACE_O_SUSPEND_SECCOMP)) { + if (!IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) || + !IS_ENABLED(CONFIG_SECCOMP)) + return -EINVAL; - write_lock_irq(&tasklist_lock); - retval = -EPERM; - if (unlikely(task->exit_state)) - goto unlock_tasklist; - if (task->ptrace) - goto unlock_tasklist; + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; - task->ptrace = flags; + if (seccomp_mode(¤t->seccomp) != SECCOMP_MODE_DISABLED || + current->ptrace & PT_SUSPEND_SECCOMP) + return -EPERM; + } + return 0; +} - ptrace_link(task, current); +static inline void ptrace_set_stopped(struct task_struct *task, bool seize) +{ + guard(spinlock)(&task->sighand->siglock); /* SEIZE doesn't trap tracee on attach */ if (!seize) - send_sig_info(SIGSTOP, SEND_SIG_PRIV, task); - - spin_lock(&task->sighand->siglock); - + send_signal_locked(SIGSTOP, SEND_SIG_PRIV, task, PIDTYPE_PID); /* * If the task is already STOPPED, set JOBCTL_TRAP_STOP and * TRAPPING, and kick it so that it transits to TRACED. TRAPPING @@ -447,30 +400,82 @@ static int ptrace_attach(struct task_struct *task, long request, * in and out of STOPPED are protected by siglock. */ if (task_is_stopped(task) && - task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING)) + task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING)) { + task->jobctl &= ~JOBCTL_STOPPED; signal_wake_up_state(task, __TASK_STOPPED); + } +} - spin_unlock(&task->sighand->siglock); +static int ptrace_attach(struct task_struct *task, long request, + unsigned long addr, + unsigned long flags) +{ + bool seize = (request == PTRACE_SEIZE); + int retval; - retval = 0; -unlock_tasklist: - write_unlock_irq(&tasklist_lock); -unlock_creds: - mutex_unlock(&task->signal->cred_guard_mutex); -out: - if (!retval) { + if (seize) { + if (addr != 0) + return -EIO; /* - * We do not bother to change retval or clear JOBCTL_TRAPPING - * if wait_on_bit() was interrupted by SIGKILL. The tracer will - * not return to user-mode, it will exit and clear this bit in - * __ptrace_unlink() if it wasn't already cleared by the tracee; - * and until then nobody can ptrace this task. + * This duplicates the check in check_ptrace_options() because + * ptrace_attach() and ptrace_setoptions() have historically + * used different error codes for unknown ptrace options. */ - wait_on_bit(&task->jobctl, JOBCTL_TRAPPING_BIT, TASK_KILLABLE); - proc_ptrace_connector(task, PTRACE_ATTACH); + if (flags & ~(unsigned long)PTRACE_O_MASK) + return -EIO; + + retval = check_ptrace_options(flags); + if (retval) + return retval; + flags = PT_PTRACED | PT_SEIZED | (flags << PT_OPT_FLAG_SHIFT); + } else { + flags = PT_PTRACED; + } + + audit_ptrace(task); + + if (unlikely(task->flags & PF_KTHREAD)) + return -EPERM; + if (same_thread_group(task, current)) + return -EPERM; + + /* + * Protect exec's credential calculations against our interference; + * SUID, SGID and LSM creds get determined differently + * under ptrace. + */ + scoped_cond_guard (mutex_intr, return -ERESTARTNOINTR, + &task->signal->cred_guard_mutex) { + + scoped_guard (task_lock, task) { + retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH_REALCREDS); + if (retval) + return retval; + } + + scoped_guard (write_lock_irq, &tasklist_lock) { + if (unlikely(task->exit_state)) + return -EPERM; + if (task->ptrace) + return -EPERM; + + task->ptrace = flags; + ptrace_link(task, current); + ptrace_set_stopped(task, seize); + } } - return retval; + /* + * We do not bother to change retval or clear JOBCTL_TRAPPING + * if wait_on_bit() was interrupted by SIGKILL. The tracer will + * not return to user-mode, it will exit and clear this bit in + * __ptrace_unlink() if it wasn't already cleared by the tracee; + * and until then nobody can ptrace this task. + */ + wait_on_bit(&task->jobctl, JOBCTL_TRAPPING_BIT, TASK_KILLABLE); + proc_ptrace_connector(task, PTRACE_ATTACH); + + return 0; } /** @@ -654,22 +659,11 @@ int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long ds static int ptrace_setoptions(struct task_struct *child, unsigned long data) { unsigned flags; + int ret; - if (data & ~(unsigned long)PTRACE_O_MASK) - return -EINVAL; - - if (unlikely(data & PTRACE_O_SUSPEND_SECCOMP)) { - if (!IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) || - !IS_ENABLED(CONFIG_SECCOMP)) - return -EINVAL; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - if (seccomp_mode(¤t->seccomp) != SECCOMP_MODE_DISABLED || - current->ptrace & PT_SUSPEND_SECCOMP) - return -EPERM; - } + ret = check_ptrace_options(data); + if (ret) + return ret; /* Avoid intermediate state when all opts are cleared */ flags = child->ptrace; @@ -799,9 +793,9 @@ static long ptrace_get_rseq_configuration(struct task_struct *task, unsigned long size, void __user *data) { struct ptrace_rseq_configuration conf = { - .rseq_abi_pointer = (u64)(uintptr_t)task->rseq, - .rseq_abi_size = sizeof(*task->rseq), - .signature = task->rseq_sig, + .rseq_abi_pointer = (u64)(uintptr_t)task->rseq.usrptr, + .rseq_abi_size = task->rseq.len, + .signature = task->rseq.sig, .flags = 0, }; @@ -812,11 +806,7 @@ static long ptrace_get_rseq_configuration(struct task_struct *task, } #endif -#ifdef PTRACE_SINGLESTEP #define is_singlestep(request) ((request) == PTRACE_SINGLESTEP) -#else -#define is_singlestep(request) 0 -#endif #ifdef PTRACE_SINGLEBLOCK #define is_singleblock(request) ((request) == PTRACE_SINGLEBLOCK) @@ -833,8 +823,6 @@ static long ptrace_get_rseq_configuration(struct task_struct *task, static int ptrace_resume(struct task_struct *child, long request, unsigned long data) { - bool need_siglock; - if (!valid_signal(data)) return -EIO; @@ -870,18 +858,12 @@ static int ptrace_resume(struct task_struct *child, long request, * Note that we need siglock even if ->exit_code == data and/or this * status was not reported yet, the new status must not be cleared by * wait_task_stopped() after resume. - * - * If data == 0 we do not care if wait_task_stopped() reports the old - * status and clears the code too; this can't race with the tracee, it - * takes siglock after resume. */ - need_siglock = data && !thread_group_empty(current); - if (need_siglock) - spin_lock_irq(&child->sighand->siglock); + spin_lock_irq(&child->sighand->siglock); child->exit_code = data; + child->jobctl &= ~JOBCTL_TRACED; wake_up_state(child, __TASK_TRACED); - if (need_siglock) - spin_unlock_irq(&child->sighand->siglock); + spin_unlock_irq(&child->sighand->siglock); return 0; } @@ -939,7 +921,6 @@ ptrace_get_syscall_info_entry(struct task_struct *child, struct pt_regs *regs, unsigned long args[ARRAY_SIZE(info->entry.args)]; int i; - info->op = PTRACE_SYSCALL_INFO_ENTRY; info->entry.nr = syscall_get_nr(child, regs); syscall_get_arguments(child, regs, args); for (i = 0; i < ARRAY_SIZE(args); i++) @@ -961,10 +942,12 @@ ptrace_get_syscall_info_seccomp(struct task_struct *child, struct pt_regs *regs, * diverge significantly enough. */ ptrace_get_syscall_info_entry(child, regs, info); - info->op = PTRACE_SYSCALL_INFO_SECCOMP; info->seccomp.ret_data = child->ptrace_message; - /* ret_data is the last field in struct ptrace_syscall_info.seccomp */ + /* + * ret_data is the last non-reserved field + * in struct ptrace_syscall_info.seccomp + */ return offsetofend(struct ptrace_syscall_info, seccomp.ret_data); } @@ -972,7 +955,6 @@ static unsigned long ptrace_get_syscall_info_exit(struct task_struct *child, struct pt_regs *regs, struct ptrace_syscall_info *info) { - info->op = PTRACE_SYSCALL_INFO_EXIT; info->exit.rval = syscall_get_error(child, regs); info->exit.is_error = !!info->exit.rval; if (!info->exit.is_error) @@ -983,19 +965,8 @@ ptrace_get_syscall_info_exit(struct task_struct *child, struct pt_regs *regs, } static int -ptrace_get_syscall_info(struct task_struct *child, unsigned long user_size, - void __user *datavp) +ptrace_get_syscall_info_op(struct task_struct *child) { - struct pt_regs *regs = task_pt_regs(child); - struct ptrace_syscall_info info = { - .op = PTRACE_SYSCALL_INFO_NONE, - .arch = syscall_get_arch(child), - .instruction_pointer = instruction_pointer(regs), - .stack_pointer = user_stack_pointer(regs), - }; - unsigned long actual_size = offsetof(struct ptrace_syscall_info, entry); - unsigned long write_size; - /* * This does not need lock_task_sighand() to access * child->last_siginfo because ptrace_freeze_traced() @@ -1006,24 +977,160 @@ ptrace_get_syscall_info(struct task_struct *child, unsigned long user_size, case SIGTRAP | 0x80: switch (child->ptrace_message) { case PTRACE_EVENTMSG_SYSCALL_ENTRY: - actual_size = ptrace_get_syscall_info_entry(child, regs, - &info); - break; + return PTRACE_SYSCALL_INFO_ENTRY; case PTRACE_EVENTMSG_SYSCALL_EXIT: - actual_size = ptrace_get_syscall_info_exit(child, regs, - &info); - break; + return PTRACE_SYSCALL_INFO_EXIT; + default: + return PTRACE_SYSCALL_INFO_NONE; } - break; case SIGTRAP | (PTRACE_EVENT_SECCOMP << 8): - actual_size = ptrace_get_syscall_info_seccomp(child, regs, - &info); + return PTRACE_SYSCALL_INFO_SECCOMP; + default: + return PTRACE_SYSCALL_INFO_NONE; + } +} + +static int +ptrace_get_syscall_info(struct task_struct *child, unsigned long user_size, + void __user *datavp) +{ + struct pt_regs *regs = task_pt_regs(child); + struct ptrace_syscall_info info = { + .op = ptrace_get_syscall_info_op(child), + .arch = syscall_get_arch(child), + .instruction_pointer = instruction_pointer(regs), + .stack_pointer = user_stack_pointer(regs), + }; + unsigned long actual_size = offsetof(struct ptrace_syscall_info, entry); + unsigned long write_size; + + switch (info.op) { + case PTRACE_SYSCALL_INFO_ENTRY: + actual_size = ptrace_get_syscall_info_entry(child, regs, &info); + break; + case PTRACE_SYSCALL_INFO_EXIT: + actual_size = ptrace_get_syscall_info_exit(child, regs, &info); + break; + case PTRACE_SYSCALL_INFO_SECCOMP: + actual_size = ptrace_get_syscall_info_seccomp(child, regs, &info); break; } write_size = min(actual_size, user_size); return copy_to_user(datavp, &info, write_size) ? -EFAULT : actual_size; } + +static int +ptrace_set_syscall_info_entry(struct task_struct *child, struct pt_regs *regs, + struct ptrace_syscall_info *info) +{ + unsigned long args[ARRAY_SIZE(info->entry.args)]; + int nr = info->entry.nr; + int i; + + /* + * Check that the syscall number specified in info->entry.nr + * is either a value of type "int" or a sign-extended value + * of type "int". + */ + if (nr != info->entry.nr) + return -ERANGE; + + for (i = 0; i < ARRAY_SIZE(args); i++) { + args[i] = info->entry.args[i]; + /* + * Check that the syscall argument specified in + * info->entry.args[i] is either a value of type + * "unsigned long" or a sign-extended value of type "long". + */ + if (args[i] != info->entry.args[i]) + return -ERANGE; + } + + syscall_set_nr(child, regs, nr); + /* + * If the syscall number is set to -1, setting syscall arguments is not + * just pointless, it would also clobber the syscall return value on + * those architectures that share the same register both for the first + * argument of syscall and its return value. + */ + if (nr != -1) + syscall_set_arguments(child, regs, args); + + return 0; +} + +static int +ptrace_set_syscall_info_seccomp(struct task_struct *child, struct pt_regs *regs, + struct ptrace_syscall_info *info) +{ + /* + * info->entry is currently a subset of info->seccomp, + * info->seccomp.ret_data is currently ignored. + */ + return ptrace_set_syscall_info_entry(child, regs, info); +} + +static int +ptrace_set_syscall_info_exit(struct task_struct *child, struct pt_regs *regs, + struct ptrace_syscall_info *info) +{ + long rval = info->exit.rval; + + /* + * Check that the return value specified in info->exit.rval + * is either a value of type "long" or a sign-extended value + * of type "long". + */ + if (rval != info->exit.rval) + return -ERANGE; + + if (info->exit.is_error) + syscall_set_return_value(child, regs, rval, 0); + else + syscall_set_return_value(child, regs, 0, rval); + + return 0; +} + +static int +ptrace_set_syscall_info(struct task_struct *child, unsigned long user_size, + const void __user *datavp) +{ + struct pt_regs *regs = task_pt_regs(child); + struct ptrace_syscall_info info; + + if (user_size < sizeof(info)) + return -EINVAL; + + /* + * The compatibility is tracked by info.op and info.flags: if user-space + * does not instruct us to use unknown extra bits from future versions + * of ptrace_syscall_info, we are not going to read them either. + */ + if (copy_from_user(&info, datavp, sizeof(info))) + return -EFAULT; + + /* Reserved for future use. */ + if (info.flags || info.reserved) + return -EINVAL; + + /* Changing the type of the system call stop is not supported yet. */ + if (ptrace_get_syscall_info_op(child) != info.op) + return -EINVAL; + + switch (info.op) { + case PTRACE_SYSCALL_INFO_ENTRY: + return ptrace_set_syscall_info_entry(child, regs, &info); + case PTRACE_SYSCALL_INFO_EXIT: + return ptrace_set_syscall_info_exit(child, regs, &info); + case PTRACE_SYSCALL_INFO_SECCOMP: + return ptrace_set_syscall_info_seccomp(child, regs, &info); + default: + /* Other types of system call stops are not supported yet. */ + return -EINVAL; + } +} #endif /* CONFIG_HAVE_ARCH_TRACEHOOK */ int ptrace_request(struct task_struct *child, long request, @@ -1204,9 +1311,7 @@ int ptrace_request(struct task_struct *child, long request, } #endif -#ifdef PTRACE_SINGLESTEP case PTRACE_SINGLESTEP: -#endif #ifdef PTRACE_SINGLEBLOCK case PTRACE_SINGLEBLOCK: #endif @@ -1219,9 +1324,8 @@ int ptrace_request(struct task_struct *child, long request, return ptrace_resume(child, request, data); case PTRACE_KILL: - if (child->exit_state) /* already dead */ - return 0; - return ptrace_resume(child, request, SIGKILL); + send_sig_info(SIGKILL, SEND_SIG_NOINFO, child); + return 0; #ifdef CONFIG_HAVE_ARCH_TRACEHOOK case PTRACE_GETREGSET: @@ -1245,6 +1349,10 @@ int ptrace_request(struct task_struct *child, long request, case PTRACE_GET_SYSCALL_INFO: ret = ptrace_get_syscall_info(child, addr, datavp); break; + + case PTRACE_SET_SYSCALL_INFO: + ret = ptrace_set_syscall_info(child, addr, datavp); + break; #endif case PTRACE_SECCOMP_GET_FILTER: @@ -1261,6 +1369,14 @@ int ptrace_request(struct task_struct *child, long request, break; #endif + case PTRACE_SET_SYSCALL_USER_DISPATCH_CONFIG: + ret = syscall_user_dispatch_set_config(child, addr, datavp); + break; + + case PTRACE_GET_SYSCALL_USER_DISPATCH_CONFIG: + ret = syscall_user_dispatch_get_config(child, addr, datavp); + break; + default: break; } @@ -1268,10 +1384,6 @@ int ptrace_request(struct task_struct *child, long request, return ret; } -#ifndef arch_ptrace_attach -#define arch_ptrace_attach(child) do { } while (0) -#endif - SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr, unsigned long, data) { @@ -1280,8 +1392,6 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr, if (request == PTRACE_TRACEME) { ret = ptrace_traceme(); - if (!ret) - arch_ptrace_attach(current); goto out; } @@ -1293,12 +1403,6 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr, if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) { ret = ptrace_attach(child, request, addr, data); - /* - * Some architectures need to do book-keeping after - * a ptrace attach. - */ - if (!ret) - arch_ptrace_attach(child); goto out_put_task_struct; } @@ -1438,12 +1542,6 @@ COMPAT_SYSCALL_DEFINE4(ptrace, compat_long_t, request, compat_long_t, pid, if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) { ret = ptrace_attach(child, request, addr, data); - /* - * Some architectures need to do book-keeping after - * a ptrace attach. - */ - if (!ret) - arch_ptrace_attach(child); goto out_put_task_struct; } |
