summaryrefslogtreecommitdiff
path: root/kernel/fork.c
diff options
context:
space:
mode:
authorOlof Johansson <olof@lixom.net>2019-12-05 13:16:58 -0800
committerOlof Johansson <olof@lixom.net>2019-12-05 13:18:54 -0800
commit942e6f8a8314e5550e254519dfba4ccd5170421d (patch)
tree75ec655b440fbc1c454247af38b5596dd8c78de9 /kernel/fork.c
parent336bab731be76a90291697e51d2aed0ad67d7cb5 (diff)
parentb08baef02b26cf7c2123e4a24a2fa1fb7a593ffb (diff)
Merge mainline/master into arm/fixes
This brings in the mainline tree right after armsoc contents was merged this release cycle, so that we can re-run savedefconfig, etc. Signed-off-by: Olof Johansson <olof@lixom.net>
Diffstat (limited to 'kernel/fork.c')
-rw-r--r--kernel/fork.c186
1 files changed, 147 insertions, 39 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index bcdf53125210..2508a4f238a3 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -40,7 +40,6 @@
#include <linux/binfmts.h>
#include <linux/mman.h>
#include <linux/mmu_notifier.h>
-#include <linux/hmm.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/vmacache.h>
@@ -94,6 +93,7 @@
#include <linux/livepatch.h>
#include <linux/thread_info.h>
#include <linux/stackleak.h>
+#include <linux/kasan.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -224,6 +224,9 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
if (!s)
continue;
+ /* Clear the KASAN shadow of the stack. */
+ kasan_unpoison_shadow(s->addr, THREAD_SIZE);
+
/* Clear stale pointers from reused stack. */
memset(s->addr, 0, THREAD_SIZE);
@@ -1283,24 +1286,8 @@ static int wait_for_vfork_done(struct task_struct *child,
* restoring the old one. . .
* Eric Biederman 10 January 1998
*/
-void mm_release(struct task_struct *tsk, struct mm_struct *mm)
+static void mm_release(struct task_struct *tsk, struct mm_struct *mm)
{
- /* Get rid of any futexes when releasing the mm */
-#ifdef CONFIG_FUTEX
- if (unlikely(tsk->robust_list)) {
- exit_robust_list(tsk);
- tsk->robust_list = NULL;
- }
-#ifdef CONFIG_COMPAT
- if (unlikely(tsk->compat_robust_list)) {
- compat_exit_robust_list(tsk);
- tsk->compat_robust_list = NULL;
- }
-#endif
- if (unlikely(!list_empty(&tsk->pi_state_list)))
- exit_pi_state_list(tsk);
-#endif
-
uprobe_free_utask(tsk);
/* Get rid of any cached register state */
@@ -1333,6 +1320,18 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
complete_vfork_done(tsk);
}
+void exit_mm_release(struct task_struct *tsk, struct mm_struct *mm)
+{
+ futex_exit_release(tsk);
+ mm_release(tsk, mm);
+}
+
+void exec_mm_release(struct task_struct *tsk, struct mm_struct *mm)
+{
+ futex_exec_release(tsk);
+ mm_release(tsk, mm);
+}
+
/**
* dup_mm() - duplicates an existing mm structure
* @tsk: the task_struct with which the new mm will be associated.
@@ -1517,6 +1516,11 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
spin_lock_irq(&current->sighand->siglock);
memcpy(sig->action, current->sighand->action, sizeof(sig->action));
spin_unlock_irq(&current->sighand->siglock);
+
+ /* Reset all signal handler not set to SIG_IGN to SIG_DFL. */
+ if (clone_flags & CLONE_CLEAR_SIGHAND)
+ flush_signal_handlers(tsk, 0);
+
return 0;
}
@@ -1695,12 +1699,68 @@ static int pidfd_release(struct inode *inode, struct file *file)
}
#ifdef CONFIG_PROC_FS
+/**
+ * pidfd_show_fdinfo - print information about a pidfd
+ * @m: proc fdinfo file
+ * @f: file referencing a pidfd
+ *
+ * Pid:
+ * This function will print the pid that a given pidfd refers to in the
+ * pid namespace of the procfs instance.
+ * If the pid namespace of the process is not a descendant of the pid
+ * namespace of the procfs instance 0 will be shown as its pid. This is
+ * similar to calling getppid() on a process whose parent is outside of
+ * its pid namespace.
+ *
+ * NSpid:
+ * If pid namespaces are supported then this function will also print
+ * the pid of a given pidfd refers to for all descendant pid namespaces
+ * starting from the current pid namespace of the instance, i.e. the
+ * Pid field and the first entry in the NSpid field will be identical.
+ * If the pid namespace of the process is not a descendant of the pid
+ * namespace of the procfs instance 0 will be shown as its first NSpid
+ * entry and no others will be shown.
+ * Note that this differs from the Pid and NSpid fields in
+ * /proc/<pid>/status where Pid and NSpid are always shown relative to
+ * the pid namespace of the procfs instance. The difference becomes
+ * obvious when sending around a pidfd between pid namespaces from a
+ * different branch of the tree, i.e. where no ancestoral relation is
+ * present between the pid namespaces:
+ * - create two new pid namespaces ns1 and ns2 in the initial pid
+ * namespace (also take care to create new mount namespaces in the
+ * new pid namespace and mount procfs)
+ * - create a process with a pidfd in ns1
+ * - send pidfd from ns1 to ns2
+ * - read /proc/self/fdinfo/<pidfd> and observe that both Pid and NSpid
+ * have exactly one entry, which is 0
+ */
static void pidfd_show_fdinfo(struct seq_file *m, struct file *f)
{
- struct pid_namespace *ns = proc_pid_ns(file_inode(m->file));
struct pid *pid = f->private_data;
+ struct pid_namespace *ns;
+ pid_t nr = -1;
+
+ if (likely(pid_has_task(pid, PIDTYPE_PID))) {
+ ns = proc_pid_ns(file_inode(m->file));
+ nr = pid_nr_ns(pid, ns);
+ }
+
+ seq_put_decimal_ll(m, "Pid:\t", nr);
+
+#ifdef CONFIG_PID_NS
+ seq_put_decimal_ll(m, "\nNSpid:\t", nr);
+ if (nr > 0) {
+ int i;
- seq_put_decimal_ull(m, "Pid:\t", pid_nr_ns(pid, ns));
+ /* If nr is non-zero it means that 'pid' is valid and that
+ * ns, i.e. the pid namespace associated with the procfs
+ * instance, is in the pid namespace hierarchy of pid.
+ * Start at one below the already printed level.
+ */
+ for (i = ns->level + 1; i <= pid->level; i++)
+ seq_put_decimal_ll(m, "\t", pid->numbers[i].nr);
+ }
+#endif
seq_putc(m, '\n');
}
#endif
@@ -1708,11 +1768,11 @@ static void pidfd_show_fdinfo(struct seq_file *m, struct file *f)
/*
* Poll support for process exit notification.
*/
-static unsigned int pidfd_poll(struct file *file, struct poll_table_struct *pts)
+static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts)
{
struct task_struct *task;
struct pid *pid = file->private_data;
- int poll_flags = 0;
+ __poll_t poll_flags = 0;
poll_wait(file, &pid->wait_pidfd, pts);
@@ -1724,7 +1784,7 @@ static unsigned int pidfd_poll(struct file *file, struct poll_table_struct *pts)
* group, then poll(2) should block, similar to the wait(2) family.
*/
if (!task || (task->exit_state && thread_group_empty(task)))
- poll_flags = POLLIN | POLLRDNORM;
+ poll_flags = EPOLLIN | EPOLLRDNORM;
rcu_read_unlock();
return poll_flags;
@@ -2026,7 +2086,8 @@ static __latent_entropy struct task_struct *copy_process(
stackleak_task_init(p);
if (pid != &init_struct_pid) {
- pid = alloc_pid(p->nsproxy->pid_ns_for_children);
+ pid = alloc_pid(p->nsproxy->pid_ns_for_children, args->set_tid,
+ args->set_tid_size);
if (IS_ERR(pid)) {
retval = PTR_ERR(pid);
goto bad_fork_cleanup_thread;
@@ -2062,14 +2123,8 @@ static __latent_entropy struct task_struct *copy_process(
#ifdef CONFIG_BLOCK
p->plug = NULL;
#endif
-#ifdef CONFIG_FUTEX
- p->robust_list = NULL;
-#ifdef CONFIG_COMPAT
- p->compat_robust_list = NULL;
-#endif
- INIT_LIST_HEAD(&p->pi_state_list);
- p->pi_state_cache = NULL;
-#endif
+ futex_init_task(p);
+
/*
* sigaltstack should be cleared when sharing the same VM
*/
@@ -2130,7 +2185,7 @@ static __latent_entropy struct task_struct *copy_process(
*/
p->start_time = ktime_get_ns();
- p->real_start_time = ktime_get_boottime_ns();
+ p->start_boottime = ktime_get_boottime_ns();
/*
* Make it visible to the rest of the system, but dont wake it up yet.
@@ -2529,6 +2584,7 @@ noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs,
{
int err;
struct clone_args args;
+ pid_t *kset_tid = kargs->set_tid;
if (unlikely(usize > PAGE_SIZE))
return -E2BIG;
@@ -2539,6 +2595,15 @@ noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs,
if (err)
return err;
+ if (unlikely(args.set_tid_size > MAX_PID_NS_LEVEL))
+ return -EINVAL;
+
+ if (unlikely(!args.set_tid && args.set_tid_size > 0))
+ return -EINVAL;
+
+ if (unlikely(args.set_tid && args.set_tid_size == 0))
+ return -EINVAL;
+
/*
* Verify that higher 32bits of exit_signal are unset and that
* it is a valid signal
@@ -2556,18 +2621,51 @@ noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs,
.stack = args.stack,
.stack_size = args.stack_size,
.tls = args.tls,
+ .set_tid_size = args.set_tid_size,
};
+ if (args.set_tid &&
+ copy_from_user(kset_tid, u64_to_user_ptr(args.set_tid),
+ (kargs->set_tid_size * sizeof(pid_t))))
+ return -EFAULT;
+
+ kargs->set_tid = kset_tid;
+
return 0;
}
-static bool clone3_args_valid(const struct kernel_clone_args *kargs)
+/**
+ * clone3_stack_valid - check and prepare stack
+ * @kargs: kernel clone args
+ *
+ * Verify that the stack arguments userspace gave us are sane.
+ * In addition, set the stack direction for userspace since it's easy for us to
+ * determine.
+ */
+static inline bool clone3_stack_valid(struct kernel_clone_args *kargs)
{
- /*
- * All lower bits of the flag word are taken.
- * Verify that no other unknown flags are passed along.
- */
- if (kargs->flags & ~CLONE_LEGACY_FLAGS)
+ if (kargs->stack == 0) {
+ if (kargs->stack_size > 0)
+ return false;
+ } else {
+ if (kargs->stack_size == 0)
+ return false;
+
+ if (!access_ok((void __user *)kargs->stack, kargs->stack_size))
+ return false;
+
+#if !defined(CONFIG_STACK_GROWSUP) && !defined(CONFIG_IA64)
+ kargs->stack += kargs->stack_size;
+#endif
+ }
+
+ return true;
+}
+
+static bool clone3_args_valid(struct kernel_clone_args *kargs)
+{
+ /* Verify that no unknown flags are passed along. */
+ if (kargs->flags & ~(CLONE_LEGACY_FLAGS | CLONE_CLEAR_SIGHAND))
return false;
/*
@@ -2577,10 +2675,17 @@ static bool clone3_args_valid(const struct kernel_clone_args *kargs)
if (kargs->flags & (CLONE_DETACHED | CSIGNAL))
return false;
+ if ((kargs->flags & (CLONE_SIGHAND | CLONE_CLEAR_SIGHAND)) ==
+ (CLONE_SIGHAND | CLONE_CLEAR_SIGHAND))
+ return false;
+
if ((kargs->flags & (CLONE_THREAD | CLONE_PARENT)) &&
kargs->exit_signal)
return false;
+ if (!clone3_stack_valid(kargs))
+ return false;
+
return true;
}
@@ -2600,6 +2705,9 @@ SYSCALL_DEFINE2(clone3, struct clone_args __user *, uargs, size_t, size)
int err;
struct kernel_clone_args kargs;
+ pid_t set_tid[MAX_PID_NS_LEVEL];
+
+ kargs.set_tid = set_tid;
err = copy_clone_args_from_user(&kargs, uargs, size);
if (err)