diff options
Diffstat (limited to 'kernel/seccomp.c')
-rw-r--r-- | kernel/seccomp.c | 294 |
1 files changed, 207 insertions, 87 deletions
diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 952dc1c90229..aca7b437882e 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -29,6 +29,9 @@ #include <linux/syscalls.h> #include <linux/sysctl.h> +/* Not exposed in headers: strictly internal use only. */ +#define SECCOMP_MODE_DEAD (SECCOMP_MODE_FILTER + 1) + #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER #include <asm/syscall.h> #endif @@ -39,7 +42,6 @@ #include <linux/pid.h> #include <linux/ptrace.h> #include <linux/capability.h> -#include <linux/tracehook.h> #include <linux/uaccess.h> #include <linux/anon_inodes.h> #include <linux/lockdep.h> @@ -107,20 +109,27 @@ struct seccomp_knotif { * installing process should allocate the fd as normal. * @flags: The flags for the new file descriptor. At the moment, only O_CLOEXEC * is allowed. + * @ioctl_flags: The flags used for the seccomp_addfd ioctl. + * @setfd: whether or not SECCOMP_ADDFD_FLAG_SETFD was set during notify_addfd * @ret: The return value of the installing process. It is set to the fd num * upon success (>= 0). * @completion: Indicates that the installing process has completed fd * installation, or gone away (either due to successful * reply, or signal) + * @list: list_head for chaining seccomp_kaddfd together. * */ struct seccomp_kaddfd { struct file *file; int fd; unsigned int flags; + __u32 ioctl_flags; - /* To only be set on reply */ - int ret; + union { + bool setfd; + /* To only be set on reply */ + int ret; + }; struct completion completion; struct list_head list; }; @@ -131,14 +140,17 @@ struct seccomp_kaddfd { * structure is fairly large, we store the notification-specific stuff in a * separate structure. * - * @request: A semaphore that users of this notification can wait on for - * changes. Actual reads and writes are still controlled with - * filter->notify_lock. + * @requests: A semaphore that users of this notification can wait on for + * changes. Actual reads and writes are still controlled with + * filter->notify_lock. + * @flags: A set of SECCOMP_USER_NOTIF_FD_* flags. * @next_id: The id of the next request. * @notifications: A list of struct seccomp_knotif elements. */ + struct notification { - struct semaphore request; + atomic_t requests; + u32 flags; u64 next_id; struct list_head notifications; }; @@ -193,6 +205,8 @@ static inline void seccomp_cache_prepare(struct seccomp_filter *sfilter) * the filter can be freed. * @cache: cache of arch/syscall mappings to actions * @log: true if all actions except for SECCOMP_RET_ALLOW should be logged + * @wait_killable_recv: Put notifying process in killable state once the + * notification is received by the userspace listener. * @prev: points to a previously installed, or inherited, filter * @prog: the BPF program to evaluate * @notif: the struct that holds all notification related information @@ -213,6 +227,7 @@ struct seccomp_filter { refcount_t refs; refcount_t users; bool log; + bool wait_killable_recv; struct action_cache cache; struct seccomp_filter *prev; struct bpf_prog *prog; @@ -378,6 +393,7 @@ static inline bool seccomp_cache_check_allow(const struct seccomp_filter *sfilte } #endif /* SECCOMP_ARCH_NATIVE */ +#define ACTION_ONLY(ret) ((s32)((ret) & (SECCOMP_RET_ACTION_FULL))) /** * seccomp_run_filters - evaluates all seccomp filters against @sd * @sd: optional seccomp data to be passed to filters @@ -387,7 +403,6 @@ static inline bool seccomp_cache_check_allow(const struct seccomp_filter *sfilte * * Returns valid seccomp BPF response codes. */ -#define ACTION_ONLY(ret) ((s32)((ret) & (SECCOMP_RET_ACTION_FULL))) static u32 seccomp_run_filters(const struct seccomp_data *sd, struct seccomp_filter **match) { @@ -545,6 +560,8 @@ static void __seccomp_filter_release(struct seccomp_filter *orig) * drop its reference count, and notify * about unused filters * + * @tsk: task the filter should be released from. + * * This function should only be called when the task is exiting as * it detaches it from its filter tree. As such, READ_ONCE() and * barriers are not needed here, as would normally be needed. @@ -564,6 +581,8 @@ void seccomp_filter_release(struct task_struct *tsk) /** * seccomp_sync_threads: sets all threads to use current's filter * + * @flags: SECCOMP_FILTER_FLAG_* flags to set during sync. + * * Expects sighand and cred_guard_mutex locks to be held, and for * seccomp_can_sync_threads() to have returned success already * without dropping the locks. @@ -597,7 +616,7 @@ static inline void seccomp_sync_threads(unsigned long flags) smp_store_release(&thread->seccomp.filter, caller->seccomp.filter); atomic_set(&thread->seccomp.filter_count, - atomic_read(&thread->seccomp.filter_count)); + atomic_read(&caller->seccomp.filter_count)); /* * Don't let an unprivileged task work around @@ -817,7 +836,7 @@ static void seccomp_cache_prepare_bitmap(struct seccomp_filter *sfilter, } /** - * seccomp_cache_prepare - emulate the filter to find cachable syscalls + * seccomp_cache_prepare - emulate the filter to find cacheable syscalls * @sfilter: The seccomp filter * * Returns 0 if successful or -errno if error occurred. @@ -886,6 +905,10 @@ static long seccomp_attach_filter(unsigned int flags, if (flags & SECCOMP_FILTER_FLAG_LOG) filter->log = true; + /* Set wait killable flag, if present. */ + if (flags & SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV) + filter->wait_killable_recv = true; + /* * If there is an existing filter, make it the prev and don't drop its * task reference. @@ -917,30 +940,6 @@ void get_seccomp_filter(struct task_struct *tsk) refcount_inc(&orig->users); } -static void seccomp_init_siginfo(kernel_siginfo_t *info, int syscall, int reason) -{ - clear_siginfo(info); - info->si_signo = SIGSYS; - info->si_code = SYS_SECCOMP; - info->si_call_addr = (void __user *)KSTK_EIP(current); - info->si_errno = reason; - info->si_arch = syscall_get_arch(current); - info->si_syscall = syscall; -} - -/** - * seccomp_send_sigsys - signals the task to allow in-process syscall emulation - * @syscall: syscall number to send to userland - * @reason: filter-supplied reason code to send to userland (via si_errno) - * - * Forces a SIGSYS with a code of SYS_SECCOMP and related sigsys info. - */ -static void seccomp_send_sigsys(int syscall, int reason) -{ - struct kernel_siginfo info; - seccomp_init_siginfo(&info, syscall, reason); - force_sig_info(&info); -} #endif /* CONFIG_SECCOMP_FILTER */ /* For use with seccomp_actions_logged */ @@ -1029,6 +1028,7 @@ static void __secure_computing_strict(int this_syscall) #ifdef SECCOMP_DEBUG dump_stack(); #endif + current->seccomp.mode = SECCOMP_MODE_DEAD; seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL_THREAD, true); do_exit(SIGKILL); } @@ -1062,17 +1062,46 @@ static u64 seccomp_next_notify_id(struct seccomp_filter *filter) return filter->notif->next_id++; } -static void seccomp_handle_addfd(struct seccomp_kaddfd *addfd) +static void seccomp_handle_addfd(struct seccomp_kaddfd *addfd, struct seccomp_knotif *n) { + int fd; + /* * Remove the notification, and reset the list pointers, indicating * that it has been handled. */ list_del_init(&addfd->list); - addfd->ret = receive_fd_replace(addfd->fd, addfd->file, addfd->flags); + if (!addfd->setfd) + fd = receive_fd(addfd->file, NULL, addfd->flags); + else + fd = receive_fd_replace(addfd->fd, addfd->file, addfd->flags); + addfd->ret = fd; + + if (addfd->ioctl_flags & SECCOMP_ADDFD_FLAG_SEND) { + /* If we fail reset and return an error to the notifier */ + if (fd < 0) { + n->state = SECCOMP_NOTIFY_SENT; + } else { + /* Return the FD we just added */ + n->flags = 0; + n->error = 0; + n->val = fd; + } + } + + /* + * Mark the notification as completed. From this point, addfd mem + * might be invalidated and we can't safely read it anymore. + */ complete(&addfd->completion); } +static bool should_sleep_killable(struct seccomp_filter *match, + struct seccomp_knotif *n) +{ + return match->wait_killable_recv && n->state == SECCOMP_NOTIFY_SENT; +} + static int seccomp_do_user_notification(int this_syscall, struct seccomp_filter *match, const struct seccomp_data *sd) @@ -1093,33 +1122,52 @@ static int seccomp_do_user_notification(int this_syscall, n.data = sd; n.id = seccomp_next_notify_id(match); init_completion(&n.ready); - list_add(&n.list, &match->notif->notifications); + list_add_tail(&n.list, &match->notif->notifications); INIT_LIST_HEAD(&n.addfd); - up(&match->notif->request); - wake_up_poll(&match->wqh, EPOLLIN | EPOLLRDNORM); - mutex_unlock(&match->notify_lock); + atomic_inc(&match->notif->requests); + if (match->notif->flags & SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP) + wake_up_poll_on_current_cpu(&match->wqh, EPOLLIN | EPOLLRDNORM); + else + wake_up_poll(&match->wqh, EPOLLIN | EPOLLRDNORM); /* * This is where we wait for a reply from userspace. */ -wait: - err = wait_for_completion_interruptible(&n.ready); - mutex_lock(&match->notify_lock); - if (err == 0) { - /* Check if we were woken up by a addfd message */ + do { + bool wait_killable = should_sleep_killable(match, &n); + + mutex_unlock(&match->notify_lock); + if (wait_killable) + err = wait_for_completion_killable(&n.ready); + else + err = wait_for_completion_interruptible(&n.ready); + mutex_lock(&match->notify_lock); + + if (err != 0) { + /* + * Check to see if the notifcation got picked up and + * whether we should switch to wait killable. + */ + if (!wait_killable && should_sleep_killable(match, &n)) + continue; + + goto interrupted; + } + addfd = list_first_entry_or_null(&n.addfd, struct seccomp_kaddfd, list); - if (addfd && n.state != SECCOMP_NOTIFY_REPLIED) { - seccomp_handle_addfd(addfd); - mutex_unlock(&match->notify_lock); - goto wait; - } - ret = n.val; - err = n.error; - flags = n.flags; - } + /* Check if we were woken up by a addfd message */ + if (addfd) + seccomp_handle_addfd(addfd, &n); + + } while (n.state != SECCOMP_NOTIFY_REPLIED); + ret = n.val; + err = n.error; + flags = n.flags; + +interrupted: /* If there were any pending addfd calls, clear them out */ list_for_each_entry_safe(addfd, tmp, &n.addfd, list) { /* The process went away before we got a chance to handle it */ @@ -1164,7 +1212,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, * Make sure that any changes to mode from another thread have * been seen after SYSCALL_WORK_SECCOMP was seen. */ - rmb(); + smp_rmb(); if (!sd) { populate_seccomp_data(&sd_local); @@ -1188,7 +1236,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, /* Show the handler the original registers. */ syscall_rollback(current, current_pt_regs()); /* Let the filter pass back 16 bits of data. */ - seccomp_send_sigsys(this_syscall, data); + force_sig_seccomp(this_syscall, data, false); goto skip; case SECCOMP_RET_TRACE: @@ -1255,22 +1303,19 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, case SECCOMP_RET_KILL_THREAD: case SECCOMP_RET_KILL_PROCESS: default: + current->seccomp.mode = SECCOMP_MODE_DEAD; seccomp_log(this_syscall, SIGSYS, action, true); /* Dump core only if this is the last remaining thread. */ if (action != SECCOMP_RET_KILL_THREAD || - get_nr_threads(current) == 1) { - kernel_siginfo_t info; - + (atomic_read(¤t->signal->live) == 1)) { /* Show the original registers in the dump. */ syscall_rollback(current, current_pt_regs()); - /* Trigger a manual coredump since do_exit skips it. */ - seccomp_init_siginfo(&info, this_syscall, data); - do_coredump(&info); - } - if (action == SECCOMP_RET_KILL_THREAD) + /* Trigger a coredump with SIGSYS */ + force_sig_seccomp(this_syscall, data, true); + } else { do_exit(SIGSYS); - else - do_group_exit(SIGSYS); + } + return -1; /* skip the syscall go directly to signal handling */ } unreachable(); @@ -1284,6 +1329,8 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, const bool recheck_after_trace) { BUG(); + + return -1; } #endif @@ -1305,6 +1352,11 @@ int __secure_computing(const struct seccomp_data *sd) return 0; case SECCOMP_MODE_FILTER: return __seccomp_filter(this_syscall, sd, false); + /* Surviving SECCOMP_RET_KILL_* must be proactively impossible. */ + case SECCOMP_MODE_DEAD: + WARN_ON_ONCE(1); + do_exit(SIGKILL); + return -1; default: BUG(); } @@ -1410,6 +1462,37 @@ find_notification(struct seccomp_filter *filter, u64 id) return NULL; } +static int recv_wake_function(wait_queue_entry_t *wait, unsigned int mode, int sync, + void *key) +{ + /* Avoid a wakeup if event not interesting for us. */ + if (key && !(key_to_poll(key) & (EPOLLIN | EPOLLERR))) + return 0; + return autoremove_wake_function(wait, mode, sync, key); +} + +static int recv_wait_event(struct seccomp_filter *filter) +{ + DEFINE_WAIT_FUNC(wait, recv_wake_function); + int ret; + + if (atomic_dec_if_positive(&filter->notif->requests) >= 0) + return 0; + + for (;;) { + ret = prepare_to_wait_event(&filter->wqh, &wait, TASK_INTERRUPTIBLE); + + if (atomic_dec_if_positive(&filter->notif->requests) >= 0) + break; + + if (ret) + return ret; + + schedule(); + } + finish_wait(&filter->wqh, &wait); + return 0; +} static long seccomp_notify_recv(struct seccomp_filter *filter, void __user *buf) @@ -1427,7 +1510,7 @@ static long seccomp_notify_recv(struct seccomp_filter *filter, memset(&unotif, 0, sizeof(unotif)); - ret = down_interruptible(&filter->notif->request); + ret = recv_wait_event(filter); if (ret < 0) return ret; @@ -1471,8 +1554,12 @@ out: mutex_lock(&filter->notify_lock); knotif = find_notification(filter, unotif.id); if (knotif) { + /* Reset the process to make sure it's not stuck */ + if (should_sleep_killable(filter, knotif)) + complete(&knotif->ready); knotif->state = SECCOMP_NOTIFY_INIT; - up(&filter->notif->request); + atomic_inc(&filter->notif->requests); + wake_up_poll(&filter->wqh, EPOLLIN | EPOLLRDNORM); } mutex_unlock(&filter->notify_lock); } @@ -1518,7 +1605,10 @@ static long seccomp_notify_send(struct seccomp_filter *filter, knotif->error = resp.error; knotif->val = resp.val; knotif->flags = resp.flags; - complete(&knotif->ready); + if (filter->notif->flags & SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP) + complete_on_current_cpu(&knotif->ready); + else + complete(&knotif->ready); out: mutex_unlock(&filter->notify_lock); return ret; @@ -1548,6 +1638,22 @@ static long seccomp_notify_id_valid(struct seccomp_filter *filter, return ret; } +static long seccomp_notify_set_flags(struct seccomp_filter *filter, + unsigned long flags) +{ + long ret; + + if (flags & ~SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP) + return -EINVAL; + + ret = mutex_lock_interruptible(&filter->notify_lock); + if (ret < 0) + return ret; + filter->notif->flags = flags; + mutex_unlock(&filter->notify_lock); + return 0; +} + static long seccomp_notify_addfd(struct seccomp_filter *filter, struct seccomp_notif_addfd __user *uaddfd, unsigned int size) @@ -1570,7 +1676,7 @@ static long seccomp_notify_addfd(struct seccomp_filter *filter, if (addfd.newfd_flags & ~O_CLOEXEC) return -EINVAL; - if (addfd.flags & ~SECCOMP_ADDFD_FLAG_SETFD) + if (addfd.flags & ~(SECCOMP_ADDFD_FLAG_SETFD | SECCOMP_ADDFD_FLAG_SEND)) return -EINVAL; if (addfd.newfd && !(addfd.flags & SECCOMP_ADDFD_FLAG_SETFD)) @@ -1580,9 +1686,10 @@ static long seccomp_notify_addfd(struct seccomp_filter *filter, if (!kaddfd.file) return -EBADF; + kaddfd.ioctl_flags = addfd.flags; kaddfd.flags = addfd.newfd_flags; - kaddfd.fd = (addfd.flags & SECCOMP_ADDFD_FLAG_SETFD) ? - addfd.newfd : -1; + kaddfd.setfd = addfd.flags & SECCOMP_ADDFD_FLAG_SETFD; + kaddfd.fd = addfd.newfd; init_completion(&kaddfd.completion); ret = mutex_lock_interruptible(&filter->notify_lock); @@ -1605,6 +1712,23 @@ static long seccomp_notify_addfd(struct seccomp_filter *filter, goto out_unlock; } + if (addfd.flags & SECCOMP_ADDFD_FLAG_SEND) { + /* + * Disallow queuing an atomic addfd + send reply while there are + * some addfd requests still to process. + * + * There is no clear reason to support it and allows us to keep + * the loop on the other side straight-forward. + */ + if (!list_empty(&knotif->addfd)) { + ret = -EBUSY; + goto out_unlock; + } + + /* Allow exactly only one reply */ + knotif->state = SECCOMP_NOTIFY_REPLIED; + } + list_add(&kaddfd.list, &knotif->addfd); complete(&knotif->ready); mutex_unlock(&filter->notify_lock); @@ -1659,6 +1783,8 @@ static long seccomp_notify_ioctl(struct file *file, unsigned int cmd, case SECCOMP_IOCTL_NOTIF_ID_VALID_WRONG_DIR: case SECCOMP_IOCTL_NOTIF_ID_VALID: return seccomp_notify_id_valid(filter, buf); + case SECCOMP_IOCTL_NOTIF_SET_FLAGS: + return seccomp_notify_set_flags(filter, arg); } /* Extensible Argument ioctls */ @@ -1716,7 +1842,6 @@ static struct file *init_listener(struct seccomp_filter *filter) if (!filter->notif) goto out; - sema_init(&filter->notif->request, 0); filter->notif->next_id = get_random_u64(); INIT_LIST_HEAD(&filter->notif->notifications); @@ -1798,6 +1923,14 @@ static long seccomp_set_mode_filter(unsigned int flags, ((flags & SECCOMP_FILTER_FLAG_TSYNC_ESRCH) == 0)) return -EINVAL; + /* + * The SECCOMP_FILTER_FLAG_WAIT_KILLABLE_SENT flag doesn't make sense + * without the SECCOMP_FILTER_FLAG_NEW_LISTENER flag. + */ + if ((flags & SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV) && + ((flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) == 0)) + return -EINVAL; + /* Prepare the new filter before holding any locks. */ prepared = seccomp_prepare_user_filter(filter); if (IS_ERR(prepared)) @@ -2299,12 +2432,6 @@ static int seccomp_actions_logged_handler(struct ctl_table *ro_table, int write, return ret; } -static struct ctl_path seccomp_sysctl_path[] = { - { .procname = "kernel", }, - { .procname = "seccomp", }, - { } -}; - static struct ctl_table seccomp_sysctl_table[] = { { .procname = "actions_avail", @@ -2323,14 +2450,7 @@ static struct ctl_table seccomp_sysctl_table[] = { static int __init seccomp_sysctl_init(void) { - struct ctl_table_header *hdr; - - hdr = register_sysctl_paths(seccomp_sysctl_path, seccomp_sysctl_table); - if (!hdr) - pr_warn("sysctl registration failed\n"); - else - kmemleak_not_leak(hdr); - + register_sysctl_init("kernel/seccomp", seccomp_sysctl_table); return 0; } |