diff options
Diffstat (limited to 'kernel/ptrace.c')
| -rw-r--r-- | kernel/ptrace.c | 855 |
1 files changed, 596 insertions, 259 deletions
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 4041f5747e73..392ec2f75f01 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/kernel/ptrace.c * @@ -10,6 +11,9 @@ #include <linux/capability.h> #include <linux/export.h> #include <linux/sched.h> +#include <linux/sched/mm.h> +#include <linux/sched/coredump.h> +#include <linux/sched/task.h> #include <linux/errno.h> #include <linux/mm.h> #include <linux/highmem.h> @@ -26,12 +30,49 @@ #include <linux/hw_breakpoint.h> #include <linux/cn_proc.h> #include <linux/compat.h> +#include <linux/sched/signal.h> +#include <linux/minmax.h> +#include <linux/syscall_user_dispatch.h> +#include <asm/syscall.h> /* for syscall_get_* */ -static int ptrace_trapping_sleep_fn(void *flags) +/* + * Access another process' address space via ptrace. + * Source/target buffer must be kernel space, + * Do not walk the page table directly, use get_user_pages + */ +int ptrace_access_vm(struct task_struct *tsk, unsigned long addr, + void *buf, int len, unsigned int gup_flags) { - schedule(); - return 0; + struct mm_struct *mm; + int ret; + + mm = get_task_mm(tsk); + if (!mm) + return 0; + + if (!tsk->ptrace || + (current != tsk->parent) || + ((get_dumpable(mm) != SUID_DUMP_USER) && + !ptracer_capable(tsk, mm->user_ns))) { + mmput(mm); + return 0; + } + + ret = access_remote_vm(mm, addr, buf, len, gup_flags); + mmput(mm); + + return ret; +} + + +void __ptrace_link(struct task_struct *child, struct task_struct *new_parent, + const struct cred *ptracer_cred) +{ + BUG_ON(!list_empty(&child->ptrace_entry)); + list_add(&child->ptrace_entry, &new_parent->ptraced); + child->parent = new_parent; + child->ptracer_cred = get_cred(ptracer_cred); } /* @@ -40,11 +81,9 @@ static int ptrace_trapping_sleep_fn(void *flags) * * Must be called with the tasklist lock write-held. */ -void __ptrace_link(struct task_struct *child, struct task_struct *new_parent) +static void ptrace_link(struct task_struct *child, struct task_struct *new_parent) { - BUG_ON(!list_empty(&child->ptrace_entry)); - list_add(&child->ptrace_entry, &new_parent->ptraced); - child->parent = new_parent; + __ptrace_link(child, new_parent, current_cred()); } /** @@ -77,14 +116,22 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent) */ void __ptrace_unlink(struct task_struct *child) { + const struct cred *old_cred; BUG_ON(!child->ptrace); - child->ptrace = 0; + clear_task_syscall_work(child, SYSCALL_TRACE); +#if defined(CONFIG_GENERIC_ENTRY) || defined(TIF_SYSCALL_EMU) + clear_task_syscall_work(child, SYSCALL_EMU); +#endif + child->parent = child->real_parent; list_del_init(&child->ptrace_entry); + old_cred = child->ptracer_cred; + child->ptracer_cred = NULL; + put_cred(old_cred); spin_lock(&child->sighand->siglock); - + child->ptrace = 0; /* * Clear all pending traps and TRAPPING. TRAPPING should be * cleared regardless of JOBCTL_STOP_PENDING. Do it explicitly. @@ -98,20 +145,9 @@ void __ptrace_unlink(struct task_struct *child) */ if (!(child->flags & PF_EXITING) && (child->signal->flags & SIGNAL_STOP_STOPPED || - child->signal->group_stop_count)) { + child->signal->group_stop_count)) child->jobctl |= JOBCTL_STOP_PENDING; - /* - * This is only possible if this thread was cloned by the - * traced task running in the stopped group, set the signal - * for the future reports. - * FIXME: we should change ptrace_init_task() to handle this - * case. - */ - if (!(child->jobctl & JOBCTL_STOP_SIGMASK)) - child->jobctl |= SIGSTOP; - } - /* * If transition to TASK_STOPPED is pending or in TASK_TRACED, kick * @child in the butt. Note that @resume should be used iff @child @@ -124,7 +160,27 @@ void __ptrace_unlink(struct task_struct *child) spin_unlock(&child->sighand->siglock); } -/* Ensure that nothing can wake it up, even SIGKILL */ +static bool looks_like_a_spurious_pid(struct task_struct *task) +{ + if (task->exit_code != ((PTRACE_EVENT_EXEC << 8) | SIGTRAP)) + return false; + + if (task_pid_vnr(task) == task->ptrace_message) + return false; + /* + * The tracee changed its pid but the PTRACE_EVENT_EXEC event + * was not wait()'ed, most probably debugger targets the old + * leader which was destroyed in de_thread(). + */ + return true; +} + +/* + * Ensure that nothing can wake it up, even SIGKILL + * + * A task is switched to this state while a ptrace operation is in progress; + * such that the ptrace operation is uninterruptible. + */ static bool ptrace_freeze_traced(struct task_struct *task) { bool ret = false; @@ -134,8 +190,9 @@ static bool ptrace_freeze_traced(struct task_struct *task) return ret; spin_lock_irq(&task->sighand->siglock); - if (task_is_traced(task) && !__fatal_signal_pending(task)) { - task->state = __TASK_TRACED; + if (task_is_traced(task) && !looks_like_a_spurious_pid(task) && + !__fatal_signal_pending(task)) { + task->jobctl |= JOBCTL_PTRACE_FROZEN; ret = true; } spin_unlock_irq(&task->sighand->siglock); @@ -145,17 +202,21 @@ static bool ptrace_freeze_traced(struct task_struct *task) static void ptrace_unfreeze_traced(struct task_struct *task) { - if (task->state != __TASK_TRACED) - return; - - WARN_ON(!task->ptrace || task->parent != current); + unsigned long flags; - spin_lock_irq(&task->sighand->siglock); - if (__fatal_signal_pending(task)) - wake_up_state(task, __TASK_TRACED); - else - task->state = TASK_TRACED; - spin_unlock_irq(&task->sighand->siglock); + /* + * The child may be awake and may have cleared + * JOBCTL_PTRACE_FROZEN (see ptrace_resume). The child will + * not set JOBCTL_PTRACE_FROZEN or enter __TASK_TRACED anew. + */ + if (lock_task_sighand(task, &flags)) { + task->jobctl &= ~JOBCTL_PTRACE_FROZEN; + if (__fatal_signal_pending(task)) { + task->jobctl &= ~JOBCTL_TRACED; + wake_up_state(task, __TASK_TRACED); + } + unlock_task_sighand(task, &flags); + } } /** @@ -188,7 +249,6 @@ static int ptrace_check_attach(struct task_struct *child, bool ignore_state) */ read_lock(&tasklist_lock); if (child->ptrace && child->parent == current) { - WARN_ON(child->state == __TASK_TRACED); /* * child->sighand can't be NULL, release_task() * does ptrace_unlink() before __exit_signal(). @@ -198,33 +258,32 @@ static int ptrace_check_attach(struct task_struct *child, bool ignore_state) } read_unlock(&tasklist_lock); - if (!ret && !ignore_state) { - if (!wait_task_inactive(child, __TASK_TRACED)) { - /* - * This can only happen if may_ptrace_stop() fails and - * ptrace_stop() changes ->state back to TASK_RUNNING, - * so we should not worry about leaking __TASK_TRACED. - */ - WARN_ON(child->state == __TASK_TRACED); - ret = -ESRCH; - } - } + if (!ret && !ignore_state && + WARN_ON_ONCE(!wait_task_inactive(child, __TASK_TRACED|TASK_FROZEN))) + ret = -ESRCH; return ret; } -static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode) +static bool ptrace_has_cap(struct user_namespace *ns, unsigned int mode) { if (mode & PTRACE_MODE_NOAUDIT) - return has_ns_capability_noaudit(current, ns, CAP_SYS_PTRACE); - else - return has_ns_capability(current, ns, CAP_SYS_PTRACE); + return ns_capable_noaudit(ns, CAP_SYS_PTRACE); + return ns_capable(ns, CAP_SYS_PTRACE); } /* Returns 0 on success, -errno on denial. */ static int __ptrace_may_access(struct task_struct *task, unsigned int mode) { const struct cred *cred = current_cred(), *tcred; + struct mm_struct *mm; + kuid_t caller_uid; + kgid_t caller_gid; + + if (!(mode & PTRACE_MODE_FSCREDS) == !(mode & PTRACE_MODE_REALCREDS)) { + WARN(1, "denying ptrace access check without PTRACE_MODE_*CREDS\n"); + return -EPERM; + } /* May we inspect the given task? * This check is used both for attaching with ptrace @@ -234,18 +293,33 @@ static int __ptrace_may_access(struct task_struct *task, unsigned int mode) * because setting up the necessary parent/child relationship * or halting the specified task is impossible. */ - int dumpable = 0; + /* Don't let security modules deny introspection */ - if (task == current) + if (same_thread_group(task, current)) return 0; rcu_read_lock(); + if (mode & PTRACE_MODE_FSCREDS) { + caller_uid = cred->fsuid; + caller_gid = cred->fsgid; + } else { + /* + * Using the euid would make more sense here, but something + * in userland might rely on the old behavior, and this + * shouldn't be a security problem since + * PTRACE_MODE_REALCREDS implies that the caller explicitly + * used a syscall that requests access to another process + * (and not a filesystem syscall to procfs). + */ + caller_uid = cred->uid; + caller_gid = cred->gid; + } tcred = __task_cred(task); - if (uid_eq(cred->uid, tcred->euid) && - uid_eq(cred->uid, tcred->suid) && - uid_eq(cred->uid, tcred->uid) && - gid_eq(cred->gid, tcred->egid) && - gid_eq(cred->gid, tcred->sgid) && - gid_eq(cred->gid, tcred->gid)) + if (uid_eq(caller_uid, tcred->euid) && + uid_eq(caller_uid, tcred->suid) && + uid_eq(caller_uid, tcred->uid) && + gid_eq(caller_gid, tcred->egid) && + gid_eq(caller_gid, tcred->sgid) && + gid_eq(caller_gid, tcred->gid)) goto ok; if (ptrace_has_cap(tcred->user_ns, mode)) goto ok; @@ -253,15 +327,21 @@ static int __ptrace_may_access(struct task_struct *task, unsigned int mode) return -EPERM; ok: rcu_read_unlock(); + /* + * If a task drops privileges and becomes nondumpable (through a syscall + * like setresuid()) while we are trying to access it, we must ensure + * that the dumpability is read after the credentials; otherwise, + * we may be able to attach to a task that we shouldn't be able to + * attach to (as if the task had dropped privileges without becoming + * nondumpable). + * Pairs with a write barrier in commit_creds(). + */ smp_rmb(); - if (task->mm) - dumpable = get_dumpable(task->mm); - rcu_read_lock(); - if (!dumpable && !ptrace_has_cap(__task_cred(task)->user_ns, mode)) { - rcu_read_unlock(); - return -EPERM; - } - rcu_read_unlock(); + mm = task->mm; + if (mm && + ((get_dumpable(mm) != SUID_DUMP_USER) && + !ptrace_has_cap(mm->user_ns, mode))) + return -EPERM; return security_ptrace_access_check(task, mode); } @@ -275,6 +355,57 @@ bool ptrace_may_access(struct task_struct *task, unsigned int mode) return !err; } +static int check_ptrace_options(unsigned long data) +{ + if (data & ~(unsigned long)PTRACE_O_MASK) + return -EINVAL; + + if (unlikely(data & PTRACE_O_SUSPEND_SECCOMP)) { + if (!IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) || + !IS_ENABLED(CONFIG_SECCOMP)) + return -EINVAL; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (seccomp_mode(¤t->seccomp) != SECCOMP_MODE_DISABLED || + current->ptrace & PT_SUSPEND_SECCOMP) + return -EPERM; + } + return 0; +} + +static inline void ptrace_set_stopped(struct task_struct *task, bool seize) +{ + guard(spinlock)(&task->sighand->siglock); + + /* SEIZE doesn't trap tracee on attach */ + if (!seize) + send_signal_locked(SIGSTOP, SEND_SIG_PRIV, task, PIDTYPE_PID); + /* + * If the task is already STOPPED, set JOBCTL_TRAP_STOP and + * TRAPPING, and kick it so that it transits to TRACED. TRAPPING + * will be cleared if the child completes the transition or any + * event which clears the group stop states happens. We'll wait + * for the transition to complete before returning from this + * function. + * + * This hides STOPPED -> RUNNING -> TRACED transition from the + * attaching thread but a different thread in the same group can + * still observe the transient RUNNING state. IOW, if another + * thread's WNOHANG wait(2) on the stopped tracee races against + * ATTACH, the wait(2) may fail due to the transient RUNNING. + * + * The following task_is_stopped() test is safe as both transitions + * in and out of STOPPED are protected by siglock. + */ + if (task_is_stopped(task) && + task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING)) { + task->jobctl &= ~JOBCTL_STOPPED; + signal_wake_up_state(task, __TASK_STOPPED); + } +} + static int ptrace_attach(struct task_struct *task, long request, unsigned long addr, unsigned long flags) @@ -282,12 +413,20 @@ static int ptrace_attach(struct task_struct *task, long request, bool seize = (request == PTRACE_SEIZE); int retval; - retval = -EIO; if (seize) { if (addr != 0) - goto out; + return -EIO; + /* + * This duplicates the check in check_ptrace_options() because + * ptrace_attach() and ptrace_setoptions() have historically + * used different error codes for unknown ptrace options. + */ if (flags & ~(unsigned long)PTRACE_O_MASK) - goto out; + return -EIO; + + retval = check_ptrace_options(flags); + if (retval) + return retval; flags = PT_PTRACED | PT_SEIZED | (flags << PT_OPT_FLAG_SHIFT); } else { flags = PT_PTRACED; @@ -295,86 +434,48 @@ static int ptrace_attach(struct task_struct *task, long request, audit_ptrace(task); - retval = -EPERM; if (unlikely(task->flags & PF_KTHREAD)) - goto out; + return -EPERM; if (same_thread_group(task, current)) - goto out; + return -EPERM; /* * Protect exec's credential calculations against our interference; * SUID, SGID and LSM creds get determined differently * under ptrace. */ - retval = -ERESTARTNOINTR; - if (mutex_lock_interruptible(&task->signal->cred_guard_mutex)) - goto out; - - task_lock(task); - retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH); - task_unlock(task); - if (retval) - goto unlock_creds; - - write_lock_irq(&tasklist_lock); - retval = -EPERM; - if (unlikely(task->exit_state)) - goto unlock_tasklist; - if (task->ptrace) - goto unlock_tasklist; - - if (seize) - flags |= PT_SEIZED; - rcu_read_lock(); - if (ns_capable(__task_cred(task)->user_ns, CAP_SYS_PTRACE)) - flags |= PT_PTRACE_CAP; - rcu_read_unlock(); - task->ptrace = flags; + scoped_cond_guard (mutex_intr, return -ERESTARTNOINTR, + &task->signal->cred_guard_mutex) { - __ptrace_link(task, current); + scoped_guard (task_lock, task) { + retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH_REALCREDS); + if (retval) + return retval; + } - /* SEIZE doesn't trap tracee on attach */ - if (!seize) - send_sig_info(SIGSTOP, SEND_SIG_FORCED, task); + scoped_guard (write_lock_irq, &tasklist_lock) { + if (unlikely(task->exit_state)) + return -EPERM; + if (task->ptrace) + return -EPERM; - spin_lock(&task->sighand->siglock); + task->ptrace = flags; + ptrace_link(task, current); + ptrace_set_stopped(task, seize); + } + } /* - * If the task is already STOPPED, set JOBCTL_TRAP_STOP and - * TRAPPING, and kick it so that it transits to TRACED. TRAPPING - * will be cleared if the child completes the transition or any - * event which clears the group stop states happens. We'll wait - * for the transition to complete before returning from this - * function. - * - * This hides STOPPED -> RUNNING -> TRACED transition from the - * attaching thread but a different thread in the same group can - * still observe the transient RUNNING state. IOW, if another - * thread's WNOHANG wait(2) on the stopped tracee races against - * ATTACH, the wait(2) may fail due to the transient RUNNING. - * - * The following task_is_stopped() test is safe as both transitions - * in and out of STOPPED are protected by siglock. + * We do not bother to change retval or clear JOBCTL_TRAPPING + * if wait_on_bit() was interrupted by SIGKILL. The tracer will + * not return to user-mode, it will exit and clear this bit in + * __ptrace_unlink() if it wasn't already cleared by the tracee; + * and until then nobody can ptrace this task. */ - if (task_is_stopped(task) && - task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING)) - signal_wake_up_state(task, __TASK_STOPPED); + wait_on_bit(&task->jobctl, JOBCTL_TRAPPING_BIT, TASK_KILLABLE); + proc_ptrace_connector(task, PTRACE_ATTACH); - spin_unlock(&task->sighand->siglock); - - retval = 0; -unlock_tasklist: - write_unlock_irq(&tasklist_lock); -unlock_creds: - mutex_unlock(&task->signal->cred_guard_mutex); -out: - if (!retval) { - wait_on_bit(&task->jobctl, JOBCTL_TRAPPING_BIT, - ptrace_trapping_sleep_fn, TASK_UNINTERRUPTIBLE); - proc_ptrace_connector(task, PTRACE_ATTACH); - } - - return retval; + return 0; } /** @@ -398,7 +499,7 @@ static int ptrace_traceme(void) */ if (!ret && !(current->real_parent->flags & PF_EXITING)) { current->ptrace = PT_PTRACED; - __ptrace_link(current, current->real_parent); + ptrace_link(current, current->real_parent); } } write_unlock_irq(&tasklist_lock); @@ -461,66 +562,46 @@ static bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p) static int ptrace_detach(struct task_struct *child, unsigned int data) { - bool dead = false; - if (!valid_signal(data)) return -EIO; /* Architecture-specific hardware disable .. */ ptrace_disable(child); - clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - flush_ptrace_hw_breakpoint(child); write_lock_irq(&tasklist_lock); /* - * This child can be already killed. Make sure de_thread() or - * our sub-thread doing do_wait() didn't do release_task() yet. + * We rely on ptrace_freeze_traced(). It can't be killed and + * untraced by another thread, it can't be a zombie. */ - if (child->ptrace) { - child->exit_code = data; - dead = __ptrace_detach(current, child); - } + WARN_ON(!child->ptrace || child->exit_state); + /* + * tasklist_lock avoids the race with wait_task_stopped(), see + * the comment in ptrace_resume(). + */ + child->exit_code = data; + __ptrace_detach(current, child); write_unlock_irq(&tasklist_lock); proc_ptrace_connector(child, PTRACE_DETACH); - if (unlikely(dead)) - release_task(child); return 0; } /* * Detach all tasks we were using ptrace on. Called with tasklist held - * for writing, and returns with it held too. But note it can release - * and reacquire the lock. + * for writing. */ -void exit_ptrace(struct task_struct *tracer) - __releases(&tasklist_lock) - __acquires(&tasklist_lock) +void exit_ptrace(struct task_struct *tracer, struct list_head *dead) { struct task_struct *p, *n; - LIST_HEAD(ptrace_dead); - - if (likely(list_empty(&tracer->ptraced))) - return; list_for_each_entry_safe(p, n, &tracer->ptraced, ptrace_entry) { if (unlikely(p->ptrace & PT_EXITKILL)) - send_sig_info(SIGKILL, SEND_SIG_FORCED, p); + send_sig_info(SIGKILL, SEND_SIG_PRIV, p); if (__ptrace_detach(tracer, p)) - list_add(&p->ptrace_entry, &ptrace_dead); - } - - write_unlock_irq(&tasklist_lock); - BUG_ON(!list_empty(&tracer->ptraced)); - - list_for_each_entry_safe(p, n, &ptrace_dead, ptrace_entry) { - list_del_init(&p->ptrace_entry); - release_task(p); + list_add(&p->ptrace_entry, dead); } - - write_lock_irq(&tasklist_lock); } int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len) @@ -532,7 +613,8 @@ int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst int this_len, retval; this_len = (len > sizeof(buf)) ? sizeof(buf) : len; - retval = access_process_vm(tsk, src, buf, this_len, 0); + retval = ptrace_access_vm(tsk, src, buf, this_len, FOLL_FORCE); + if (!retval) { if (copied) break; @@ -559,7 +641,8 @@ int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long ds this_len = (len > sizeof(buf)) ? sizeof(buf) : len; if (copy_from_user(buf, src, this_len)) return -EFAULT; - retval = access_process_vm(tsk, dst, buf, this_len, 1); + retval = ptrace_access_vm(tsk, dst, buf, this_len, + FOLL_FORCE | FOLL_WRITE); if (!retval) { if (copied) break; @@ -576,9 +659,11 @@ int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long ds static int ptrace_setoptions(struct task_struct *child, unsigned long data) { unsigned flags; + int ret; - if (data & ~(unsigned long)PTRACE_O_MASK) - return -EINVAL; + ret = check_ptrace_options(data); + if (ret) + return ret; /* Avoid intermediate state when all opts are cleared */ flags = child->ptrace; @@ -589,7 +674,7 @@ static int ptrace_setoptions(struct task_struct *child, unsigned long data) return 0; } -static int ptrace_getsiginfo(struct task_struct *child, siginfo_t *info) +static int ptrace_getsiginfo(struct task_struct *child, kernel_siginfo_t *info) { unsigned long flags; int error = -ESRCH; @@ -597,7 +682,7 @@ static int ptrace_getsiginfo(struct task_struct *child, siginfo_t *info) if (lock_task_sighand(child, &flags)) { error = -EINVAL; if (likely(child->last_siginfo != NULL)) { - *info = *child->last_siginfo; + copy_siginfo(info, child->last_siginfo); error = 0; } unlock_task_sighand(child, &flags); @@ -605,7 +690,7 @@ static int ptrace_getsiginfo(struct task_struct *child, siginfo_t *info) return error; } -static int ptrace_setsiginfo(struct task_struct *child, const siginfo_t *info) +static int ptrace_setsiginfo(struct task_struct *child, const kernel_siginfo_t *info) { unsigned long flags; int error = -ESRCH; @@ -613,7 +698,7 @@ static int ptrace_setsiginfo(struct task_struct *child, const siginfo_t *info) if (lock_task_sighand(child, &flags)) { error = -EINVAL; if (likely(child->last_siginfo != NULL)) { - *child->last_siginfo = *info; + copy_siginfo(child->last_siginfo, info); error = 0; } unlock_task_sighand(child, &flags); @@ -641,33 +726,38 @@ static int ptrace_peek_siginfo(struct task_struct *child, if (arg.nr < 0) return -EINVAL; + /* Ensure arg.off fits in an unsigned long */ + if (arg.off > ULONG_MAX) + return 0; + if (arg.flags & PTRACE_PEEKSIGINFO_SHARED) pending = &child->signal->shared_pending; else pending = &child->pending; for (i = 0; i < arg.nr; ) { - siginfo_t info; - s32 off = arg.off + i; + kernel_siginfo_t info; + unsigned long off = arg.off + i; + bool found = false; spin_lock_irq(&child->sighand->siglock); list_for_each_entry(q, &pending->list, list) { if (!off--) { + found = true; copy_siginfo(&info, &q->info); break; } } spin_unlock_irq(&child->sighand->siglock); - if (off >= 0) /* beyond the end of the list */ + if (!found) /* beyond the end of the list */ break; #ifdef CONFIG_COMPAT - if (unlikely(is_compat_task())) { + if (unlikely(in_compat_syscall())) { compat_siginfo_t __user *uinfo = compat_ptr(data); - if (copy_siginfo_to_user32(uinfo, &info) || - __put_user(info.si_code, &uinfo->si_code)) { + if (copy_siginfo_to_user32(uinfo, &info)) { ret = -EFAULT; break; } @@ -677,8 +767,7 @@ static int ptrace_peek_siginfo(struct task_struct *child, { siginfo_t __user *uinfo = (siginfo_t __user *) data; - if (copy_siginfo_to_user(uinfo, &info) || - __put_user(info.si_code, &uinfo->si_code)) { + if (copy_siginfo_to_user(uinfo, &info)) { ret = -EFAULT; break; } @@ -699,12 +788,26 @@ static int ptrace_peek_siginfo(struct task_struct *child, return ret; } -#ifdef PTRACE_SINGLESTEP -#define is_singlestep(request) ((request) == PTRACE_SINGLESTEP) -#else -#define is_singlestep(request) 0 +#ifdef CONFIG_RSEQ +static long ptrace_get_rseq_configuration(struct task_struct *task, + unsigned long size, void __user *data) +{ + struct ptrace_rseq_configuration conf = { + .rseq_abi_pointer = (u64)(uintptr_t)task->rseq.usrptr, + .rseq_abi_size = task->rseq.len, + .signature = task->rseq.sig, + .flags = 0, + }; + + size = min_t(unsigned long, size, sizeof(conf)); + if (copy_to_user(data, &conf, size)) + return -EFAULT; + return sizeof(conf); +} #endif +#define is_singlestep(request) ((request) == PTRACE_SINGLESTEP) + #ifdef PTRACE_SINGLEBLOCK #define is_singleblock(request) ((request) == PTRACE_SINGLEBLOCK) #else @@ -724,15 +827,15 @@ static int ptrace_resume(struct task_struct *child, long request, return -EIO; if (request == PTRACE_SYSCALL) - set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); + set_task_syscall_work(child, SYSCALL_TRACE); else - clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); + clear_task_syscall_work(child, SYSCALL_TRACE); -#ifdef TIF_SYSCALL_EMU +#if defined(CONFIG_GENERIC_ENTRY) || defined(TIF_SYSCALL_EMU) if (request == PTRACE_SYSEMU || request == PTRACE_SYSEMU_SINGLESTEP) - set_tsk_thread_flag(child, TIF_SYSCALL_EMU); + set_task_syscall_work(child, SYSCALL_EMU); else - clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); + clear_task_syscall_work(child, SYSCALL_EMU); #endif if (is_singleblock(request)) { @@ -747,8 +850,20 @@ static int ptrace_resume(struct task_struct *child, long request, user_disable_single_step(child); } + /* + * Change ->exit_code and ->state under siglock to avoid the race + * with wait_task_stopped() in between; a non-zero ->exit_code will + * wrongly look like another report from tracee. + * + * Note that we need siglock even if ->exit_code == data and/or this + * status was not reported yet, the new status must not be cleared by + * wait_task_stopped() after resume. + */ + spin_lock_irq(&child->sighand->siglock); child->exit_code = data; + child->jobctl &= ~JOBCTL_TRACED; wake_up_state(child, __TASK_TRACED); + spin_unlock_irq(&child->sighand->siglock); return 0; } @@ -798,14 +913,232 @@ static int ptrace_regset(struct task_struct *task, int req, unsigned int type, * to ensure no machine forgets it. */ EXPORT_SYMBOL_GPL(task_user_regset_view); -#endif + +static unsigned long +ptrace_get_syscall_info_entry(struct task_struct *child, struct pt_regs *regs, + struct ptrace_syscall_info *info) +{ + unsigned long args[ARRAY_SIZE(info->entry.args)]; + int i; + + info->entry.nr = syscall_get_nr(child, regs); + syscall_get_arguments(child, regs, args); + for (i = 0; i < ARRAY_SIZE(args); i++) + info->entry.args[i] = args[i]; + + /* args is the last field in struct ptrace_syscall_info.entry */ + return offsetofend(struct ptrace_syscall_info, entry.args); +} + +static unsigned long +ptrace_get_syscall_info_seccomp(struct task_struct *child, struct pt_regs *regs, + struct ptrace_syscall_info *info) +{ + /* + * As struct ptrace_syscall_info.entry is currently a subset + * of struct ptrace_syscall_info.seccomp, it makes sense to + * initialize that subset using ptrace_get_syscall_info_entry(). + * This can be reconsidered in the future if these structures + * diverge significantly enough. + */ + ptrace_get_syscall_info_entry(child, regs, info); + info->seccomp.ret_data = child->ptrace_message; + + /* + * ret_data is the last non-reserved field + * in struct ptrace_syscall_info.seccomp + */ + return offsetofend(struct ptrace_syscall_info, seccomp.ret_data); +} + +static unsigned long +ptrace_get_syscall_info_exit(struct task_struct *child, struct pt_regs *regs, + struct ptrace_syscall_info *info) +{ + info->exit.rval = syscall_get_error(child, regs); + info->exit.is_error = !!info->exit.rval; + if (!info->exit.is_error) + info->exit.rval = syscall_get_return_value(child, regs); + + /* is_error is the last field in struct ptrace_syscall_info.exit */ + return offsetofend(struct ptrace_syscall_info, exit.is_error); +} + +static int +ptrace_get_syscall_info_op(struct task_struct *child) +{ + /* + * This does not need lock_task_sighand() to access + * child->last_siginfo because ptrace_freeze_traced() + * called earlier by ptrace_check_attach() ensures that + * the tracee cannot go away and clear its last_siginfo. + */ + switch (child->last_siginfo ? child->last_siginfo->si_code : 0) { + case SIGTRAP | 0x80: + switch (child->ptrace_message) { + case PTRACE_EVENTMSG_SYSCALL_ENTRY: + return PTRACE_SYSCALL_INFO_ENTRY; + case PTRACE_EVENTMSG_SYSCALL_EXIT: + return PTRACE_SYSCALL_INFO_EXIT; + default: + return PTRACE_SYSCALL_INFO_NONE; + } + case SIGTRAP | (PTRACE_EVENT_SECCOMP << 8): + return PTRACE_SYSCALL_INFO_SECCOMP; + default: + return PTRACE_SYSCALL_INFO_NONE; + } +} + +static int +ptrace_get_syscall_info(struct task_struct *child, unsigned long user_size, + void __user *datavp) +{ + struct pt_regs *regs = task_pt_regs(child); + struct ptrace_syscall_info info = { + .op = ptrace_get_syscall_info_op(child), + .arch = syscall_get_arch(child), + .instruction_pointer = instruction_pointer(regs), + .stack_pointer = user_stack_pointer(regs), + }; + unsigned long actual_size = offsetof(struct ptrace_syscall_info, entry); + unsigned long write_size; + + switch (info.op) { + case PTRACE_SYSCALL_INFO_ENTRY: + actual_size = ptrace_get_syscall_info_entry(child, regs, &info); + break; + case PTRACE_SYSCALL_INFO_EXIT: + actual_size = ptrace_get_syscall_info_exit(child, regs, &info); + break; + case PTRACE_SYSCALL_INFO_SECCOMP: + actual_size = ptrace_get_syscall_info_seccomp(child, regs, &info); + break; + } + + write_size = min(actual_size, user_size); + return copy_to_user(datavp, &info, write_size) ? -EFAULT : actual_size; +} + +static int +ptrace_set_syscall_info_entry(struct task_struct *child, struct pt_regs *regs, + struct ptrace_syscall_info *info) +{ + unsigned long args[ARRAY_SIZE(info->entry.args)]; + int nr = info->entry.nr; + int i; + + /* + * Check that the syscall number specified in info->entry.nr + * is either a value of type "int" or a sign-extended value + * of type "int". + */ + if (nr != info->entry.nr) + return -ERANGE; + + for (i = 0; i < ARRAY_SIZE(args); i++) { + args[i] = info->entry.args[i]; + /* + * Check that the syscall argument specified in + * info->entry.args[i] is either a value of type + * "unsigned long" or a sign-extended value of type "long". + */ + if (args[i] != info->entry.args[i]) + return -ERANGE; + } + + syscall_set_nr(child, regs, nr); + /* + * If the syscall number is set to -1, setting syscall arguments is not + * just pointless, it would also clobber the syscall return value on + * those architectures that share the same register both for the first + * argument of syscall and its return value. + */ + if (nr != -1) + syscall_set_arguments(child, regs, args); + + return 0; +} + +static int +ptrace_set_syscall_info_seccomp(struct task_struct *child, struct pt_regs *regs, + struct ptrace_syscall_info *info) +{ + /* + * info->entry is currently a subset of info->seccomp, + * info->seccomp.ret_data is currently ignored. + */ + return ptrace_set_syscall_info_entry(child, regs, info); +} + +static int +ptrace_set_syscall_info_exit(struct task_struct *child, struct pt_regs *regs, + struct ptrace_syscall_info *info) +{ + long rval = info->exit.rval; + + /* + * Check that the return value specified in info->exit.rval + * is either a value of type "long" or a sign-extended value + * of type "long". + */ + if (rval != info->exit.rval) + return -ERANGE; + + if (info->exit.is_error) + syscall_set_return_value(child, regs, rval, 0); + else + syscall_set_return_value(child, regs, 0, rval); + + return 0; +} + +static int +ptrace_set_syscall_info(struct task_struct *child, unsigned long user_size, + const void __user *datavp) +{ + struct pt_regs *regs = task_pt_regs(child); + struct ptrace_syscall_info info; + + if (user_size < sizeof(info)) + return -EINVAL; + + /* + * The compatibility is tracked by info.op and info.flags: if user-space + * does not instruct us to use unknown extra bits from future versions + * of ptrace_syscall_info, we are not going to read them either. + */ + if (copy_from_user(&info, datavp, sizeof(info))) + return -EFAULT; + + /* Reserved for future use. */ + if (info.flags || info.reserved) + return -EINVAL; + + /* Changing the type of the system call stop is not supported yet. */ + if (ptrace_get_syscall_info_op(child) != info.op) + return -EINVAL; + + switch (info.op) { + case PTRACE_SYSCALL_INFO_ENTRY: + return ptrace_set_syscall_info_entry(child, regs, &info); + case PTRACE_SYSCALL_INFO_EXIT: + return ptrace_set_syscall_info_exit(child, regs, &info); + case PTRACE_SYSCALL_INFO_SECCOMP: + return ptrace_set_syscall_info_seccomp(child, regs, &info); + default: + /* Other types of system call stops are not supported yet. */ + return -EINVAL; + } +} +#endif /* CONFIG_HAVE_ARCH_TRACEHOOK */ int ptrace_request(struct task_struct *child, long request, unsigned long addr, unsigned long data) { bool seized = child->ptrace & PT_SEIZED; int ret = -EIO; - siginfo_t siginfo, *si; + kernel_siginfo_t siginfo, *si; void __user *datavp = (void __user *) data; unsigned long __user *datalp = datavp; unsigned long flags; @@ -839,24 +1172,31 @@ int ptrace_request(struct task_struct *child, long request, break; case PTRACE_SETSIGINFO: - if (copy_from_user(&siginfo, datavp, sizeof siginfo)) - ret = -EFAULT; - else + ret = copy_siginfo_from_user(&siginfo, datavp); + if (!ret) ret = ptrace_setsiginfo(child, &siginfo); break; - case PTRACE_GETSIGMASK: + case PTRACE_GETSIGMASK: { + sigset_t *mask; + if (addr != sizeof(sigset_t)) { ret = -EINVAL; break; } - if (copy_to_user(datavp, &child->blocked, sizeof(sigset_t))) + if (test_tsk_restore_sigmask(child)) + mask = &child->saved_sigmask; + else + mask = &child->blocked; + + if (copy_to_user(datavp, mask, sizeof(sigset_t))) ret = -EFAULT; else ret = 0; break; + } case PTRACE_SETSIGMASK: { sigset_t new_set; @@ -882,6 +1222,8 @@ int ptrace_request(struct task_struct *child, long request, child->blocked = new_set; spin_unlock_irq(&child->sighand->siglock); + clear_tsk_restore_sigmask(child); + ret = 0; break; } @@ -969,9 +1311,7 @@ int ptrace_request(struct task_struct *child, long request, } #endif -#ifdef PTRACE_SINGLESTEP case PTRACE_SINGLESTEP: -#endif #ifdef PTRACE_SINGLEBLOCK case PTRACE_SINGLEBLOCK: #endif @@ -984,9 +1324,8 @@ int ptrace_request(struct task_struct *child, long request, return ptrace_resume(child, request, data); case PTRACE_KILL: - if (child->exit_state) /* already dead */ - return 0; - return ptrace_resume(child, request, SIGKILL); + send_sig_info(SIGKILL, SEND_SIG_NOINFO, child); + return 0; #ifdef CONFIG_HAVE_ARCH_TRACEHOOK case PTRACE_GETREGSET: @@ -994,7 +1333,7 @@ int ptrace_request(struct task_struct *child, long request, struct iovec kiov; struct iovec __user *uiov = datavp; - if (!access_ok(VERIFY_WRITE, uiov, sizeof(*uiov))) + if (!access_ok(uiov, sizeof(*uiov))) return -EFAULT; if (__get_user(kiov.iov_base, &uiov->iov_base) || @@ -1006,32 +1345,44 @@ int ptrace_request(struct task_struct *child, long request, ret = __put_user(kiov.iov_len, &uiov->iov_len); break; } + + case PTRACE_GET_SYSCALL_INFO: + ret = ptrace_get_syscall_info(child, addr, datavp); + break; + + case PTRACE_SET_SYSCALL_INFO: + ret = ptrace_set_syscall_info(child, addr, datavp); + break; #endif - default: + + case PTRACE_SECCOMP_GET_FILTER: + ret = seccomp_get_filter(child, addr, datavp); break; - } - return ret; -} + case PTRACE_SECCOMP_GET_METADATA: + ret = seccomp_get_metadata(child, addr, datavp); + break; -static struct task_struct *ptrace_get_task_struct(pid_t pid) -{ - struct task_struct *child; +#ifdef CONFIG_RSEQ + case PTRACE_GET_RSEQ_CONFIGURATION: + ret = ptrace_get_rseq_configuration(child, addr, datavp); + break; +#endif - rcu_read_lock(); - child = find_task_by_vpid(pid); - if (child) - get_task_struct(child); - rcu_read_unlock(); + case PTRACE_SET_SYSCALL_USER_DISPATCH_CONFIG: + ret = syscall_user_dispatch_set_config(child, addr, datavp); + break; - if (!child) - return ERR_PTR(-ESRCH); - return child; -} + case PTRACE_GET_SYSCALL_USER_DISPATCH_CONFIG: + ret = syscall_user_dispatch_get_config(child, addr, datavp); + break; -#ifndef arch_ptrace_attach -#define arch_ptrace_attach(child) do { } while (0) -#endif + default: + break; + } + + return ret; +} SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr, unsigned long, data) @@ -1041,25 +1392,17 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr, if (request == PTRACE_TRACEME) { ret = ptrace_traceme(); - if (!ret) - arch_ptrace_attach(current); goto out; } - child = ptrace_get_task_struct(pid); - if (IS_ERR(child)) { - ret = PTR_ERR(child); + child = find_get_task_by_vpid(pid); + if (!child) { + ret = -ESRCH; goto out; } if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) { ret = ptrace_attach(child, request, addr, data); - /* - * Some architectures need to do book-keeping after - * a ptrace attach. - */ - if (!ret) - arch_ptrace_attach(child); goto out_put_task_struct; } @@ -1084,7 +1427,7 @@ int generic_ptrace_peekdata(struct task_struct *tsk, unsigned long addr, unsigned long tmp; int copied; - copied = access_process_vm(tsk, addr, &tmp, sizeof(tmp), 0); + copied = ptrace_access_vm(tsk, addr, &tmp, sizeof(tmp), FOLL_FORCE); if (copied != sizeof(tmp)) return -EIO; return put_user(tmp, (unsigned long __user *)data); @@ -1095,25 +1438,26 @@ int generic_ptrace_pokedata(struct task_struct *tsk, unsigned long addr, { int copied; - copied = access_process_vm(tsk, addr, &data, sizeof(data), 1); + copied = ptrace_access_vm(tsk, addr, &data, sizeof(data), + FOLL_FORCE | FOLL_WRITE); return (copied == sizeof(data)) ? 0 : -EIO; } #if defined CONFIG_COMPAT -#include <linux/compat.h> int compat_ptrace_request(struct task_struct *child, compat_long_t request, compat_ulong_t addr, compat_ulong_t data) { compat_ulong_t __user *datap = compat_ptr(data); compat_ulong_t word; - siginfo_t siginfo; + kernel_siginfo_t siginfo; int ret; switch (request) { case PTRACE_PEEKTEXT: case PTRACE_PEEKDATA: - ret = access_process_vm(child, addr, &word, sizeof(word), 0); + ret = ptrace_access_vm(child, addr, &word, sizeof(word), + FOLL_FORCE); if (ret != sizeof(word)) ret = -EIO; else @@ -1122,7 +1466,8 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request, case PTRACE_POKETEXT: case PTRACE_POKEDATA: - ret = access_process_vm(child, addr, &data, sizeof(data), 1); + ret = ptrace_access_vm(child, addr, &data, sizeof(data), + FOLL_FORCE | FOLL_WRITE); ret = (ret != sizeof(data) ? -EIO : 0); break; @@ -1139,11 +1484,9 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request, break; case PTRACE_SETSIGINFO: - memset(&siginfo, 0, sizeof siginfo); - if (copy_siginfo_from_user32( - &siginfo, (struct compat_siginfo __user *) datap)) - ret = -EFAULT; - else + ret = copy_siginfo_from_user32( + &siginfo, (struct compat_siginfo __user *) datap); + if (!ret) ret = ptrace_setsiginfo(child, &siginfo); break; #ifdef CONFIG_HAVE_ARCH_TRACEHOOK @@ -1156,7 +1499,7 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request, compat_uptr_t ptr; compat_size_t len; - if (!access_ok(VERIFY_WRITE, uiov, sizeof(*uiov))) + if (!access_ok(uiov, sizeof(*uiov))) return -EFAULT; if (__get_user(ptr, &uiov->iov_base) || @@ -1180,8 +1523,8 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request, return ret; } -asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, - compat_long_t addr, compat_long_t data) +COMPAT_SYSCALL_DEFINE4(ptrace, compat_long_t, request, compat_long_t, pid, + compat_long_t, addr, compat_long_t, data) { struct task_struct *child; long ret; @@ -1191,20 +1534,14 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, goto out; } - child = ptrace_get_task_struct(pid); - if (IS_ERR(child)) { - ret = PTR_ERR(child); + child = find_get_task_by_vpid(pid); + if (!child) { + ret = -ESRCH; goto out; } if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) { ret = ptrace_attach(child, request, addr, data); - /* - * Some architectures need to do book-keeping after - * a ptrace attach. - */ - if (!ret) - arch_ptrace_attach(child); goto out_put_task_struct; } |
