summaryrefslogtreecommitdiff
path: root/kernel/exit.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/exit.c')
-rw-r--r--kernel/exit.c185
1 files changed, 114 insertions, 71 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index dfb963d2f862..3485e5fc499e 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -25,7 +25,6 @@
#include <linux/acct.h>
#include <linux/tsacct_kern.h>
#include <linux/file.h>
-#include <linux/fdtable.h>
#include <linux/freezer.h>
#include <linux/binfmts.h>
#include <linux/nsproxy.h>
@@ -86,7 +85,7 @@
static unsigned int oops_limit = 10000;
#ifdef CONFIG_SYSCTL
-static struct ctl_table kern_exit_table[] = {
+static const struct ctl_table kern_exit_table[] = {
{
.procname = "oops_limit",
.data = &oops_limit,
@@ -94,7 +93,6 @@ static struct ctl_table kern_exit_table[] = {
.mode = 0644,
.proc_handler = proc_douintvec,
},
- { }
};
static __init int kernel_exit_sysctls_init(void)
@@ -278,7 +276,6 @@ repeat:
}
write_unlock_irq(&tasklist_lock);
- seccomp_filter_release(p);
proc_flush_pid(thread_pid);
put_pid(thread_pid);
release_thread(p);
@@ -414,10 +411,7 @@ static void coredump_task_exit(struct task_struct *tsk)
tsk->flags |= PF_POSTCOREDUMP;
core_state = tsk->signal->core_state;
spin_unlock_irq(&tsk->sighand->siglock);
-
- /* The vhost_worker does not particpate in coredumps */
- if (core_state &&
- ((tsk->flags & (PF_IO_WORKER | PF_USER_WORKER)) != PF_USER_WORKER)) {
+ if (core_state) {
struct core_thread self;
self.task = current;
@@ -433,7 +427,7 @@ static void coredump_task_exit(struct task_struct *tsk)
complete(&core_state->startup);
for (;;) {
- set_current_state(TASK_UNINTERRUPTIBLE|TASK_FREEZABLE);
+ set_current_state(TASK_IDLE|TASK_FREEZABLE);
if (!self.task) /* see coredump_finish() */
break;
schedule();
@@ -443,14 +437,46 @@ static void coredump_task_exit(struct task_struct *tsk)
}
#ifdef CONFIG_MEMCG
+/* drops tasklist_lock if succeeds */
+static bool __try_to_set_owner(struct task_struct *tsk, struct mm_struct *mm)
+{
+ bool ret = false;
+
+ task_lock(tsk);
+ if (likely(tsk->mm == mm)) {
+ /* tsk can't pass exit_mm/exec_mmap and exit */
+ read_unlock(&tasklist_lock);
+ WRITE_ONCE(mm->owner, tsk);
+ lru_gen_migrate_mm(mm);
+ ret = true;
+ }
+ task_unlock(tsk);
+ return ret;
+}
+
+static bool try_to_set_owner(struct task_struct *g, struct mm_struct *mm)
+{
+ struct task_struct *t;
+
+ for_each_thread(g, t) {
+ struct mm_struct *t_mm = READ_ONCE(t->mm);
+ if (t_mm == mm) {
+ if (__try_to_set_owner(t, mm))
+ return true;
+ } else if (t_mm)
+ break;
+ }
+
+ return false;
+}
+
/*
* A task is exiting. If it owned this mm, find a new owner for the mm.
*/
void mm_update_next_owner(struct mm_struct *mm)
{
- struct task_struct *c, *g, *p = current;
+ struct task_struct *g, *p = current;
-retry:
/*
* If the exiting or execing task is not the owner, it's
* someone else's problem.
@@ -471,31 +497,27 @@ retry:
/*
* Search in the children
*/
- list_for_each_entry(c, &p->children, sibling) {
- if (c->mm == mm)
- goto assign_new_owner;
+ list_for_each_entry(g, &p->children, sibling) {
+ if (try_to_set_owner(g, mm))
+ goto ret;
}
-
/*
* Search in the siblings
*/
- list_for_each_entry(c, &p->real_parent->children, sibling) {
- if (c->mm == mm)
- goto assign_new_owner;
+ list_for_each_entry(g, &p->real_parent->children, sibling) {
+ if (try_to_set_owner(g, mm))
+ goto ret;
}
-
/*
* Search through everything else, we should not get here often.
*/
for_each_process(g) {
+ if (atomic_read(&mm->mm_users) <= 1)
+ break;
if (g->flags & PF_KTHREAD)
continue;
- for_each_thread(g, c) {
- if (c->mm == mm)
- goto assign_new_owner;
- if (c->mm)
- break;
- }
+ if (try_to_set_owner(g, mm))
+ goto ret;
}
read_unlock(&tasklist_lock);
/*
@@ -504,30 +526,9 @@ retry:
* ptrace or page migration (get_task_mm()). Mark owner as NULL.
*/
WRITE_ONCE(mm->owner, NULL);
+ ret:
return;
-assign_new_owner:
- BUG_ON(c == p);
- get_task_struct(c);
- /*
- * The task_lock protects c->mm from changing.
- * We always want mm->owner->mm == mm
- */
- task_lock(c);
- /*
- * Delay read_unlock() till we have the task_lock()
- * to ensure that c does not slip away underneath us
- */
- read_unlock(&tasklist_lock);
- if (c->mm != mm) {
- task_unlock(c);
- put_task_struct(c);
- goto retry;
- }
- WRITE_ONCE(mm->owner, c);
- lru_gen_migrate_mm(mm);
- task_unlock(c);
- put_task_struct(c);
}
#endif /* CONFIG_MEMCG */
@@ -739,6 +740,13 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
kill_orphaned_pgrp(tsk->group_leader, NULL);
tsk->exit_state = EXIT_ZOMBIE;
+ /*
+ * sub-thread or delay_group_leader(), wake up the
+ * PIDFD_THREAD waiters.
+ */
+ if (!thread_group_empty(tsk))
+ do_notify_pidfd(tsk);
+
if (unlikely(tsk->ptrace)) {
int sig = thread_group_leader(tsk) &&
thread_group_empty(tsk) &&
@@ -769,6 +777,62 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
}
#ifdef CONFIG_DEBUG_STACK_USAGE
+unsigned long stack_not_used(struct task_struct *p)
+{
+ unsigned long *n = end_of_stack(p);
+
+ do { /* Skip over canary */
+# ifdef CONFIG_STACK_GROWSUP
+ n--;
+# else
+ n++;
+# endif
+ } while (!*n);
+
+# ifdef CONFIG_STACK_GROWSUP
+ return (unsigned long)end_of_stack(p) - (unsigned long)n;
+# else
+ return (unsigned long)n - (unsigned long)end_of_stack(p);
+# endif
+}
+
+/* Count the maximum pages reached in kernel stacks */
+static inline void kstack_histogram(unsigned long used_stack)
+{
+#ifdef CONFIG_VM_EVENT_COUNTERS
+ if (used_stack <= 1024)
+ count_vm_event(KSTACK_1K);
+#if THREAD_SIZE > 1024
+ else if (used_stack <= 2048)
+ count_vm_event(KSTACK_2K);
+#endif
+#if THREAD_SIZE > 2048
+ else if (used_stack <= 4096)
+ count_vm_event(KSTACK_4K);
+#endif
+#if THREAD_SIZE > 4096
+ else if (used_stack <= 8192)
+ count_vm_event(KSTACK_8K);
+#endif
+#if THREAD_SIZE > 8192
+ else if (used_stack <= 16384)
+ count_vm_event(KSTACK_16K);
+#endif
+#if THREAD_SIZE > 16384
+ else if (used_stack <= 32768)
+ count_vm_event(KSTACK_32K);
+#endif
+#if THREAD_SIZE > 32768
+ else if (used_stack <= 65536)
+ count_vm_event(KSTACK_64K);
+#endif
+#if THREAD_SIZE > 65536
+ else
+ count_vm_event(KSTACK_REST);
+#endif
+#endif /* CONFIG_VM_EVENT_COUNTERS */
+}
+
static void check_stack_usage(void)
{
static DEFINE_SPINLOCK(low_water_lock);
@@ -776,6 +840,7 @@ static void check_stack_usage(void)
unsigned long free;
free = stack_not_used(current);
+ kstack_histogram(THREAD_SIZE - free);
if (free >= lowest_to_date)
return;
@@ -829,6 +894,8 @@ void __noreturn do_exit(long code)
io_uring_files_cancel();
exit_signals(tsk); /* sets PF_EXITING */
+ seccomp_filter_release(tsk);
+
acct_update_integrals(tsk);
group_dead = atomic_dec_and_test(&tsk->signal->live);
if (group_dead) {
@@ -1889,30 +1956,6 @@ Efault:
}
#endif
-/**
- * thread_group_exited - check that a thread group has exited
- * @pid: tgid of thread group to be checked.
- *
- * Test if the thread group represented by tgid has exited (all
- * threads are zombies, dead or completely gone).
- *
- * Return: true if the thread group has exited. false otherwise.
- */
-bool thread_group_exited(struct pid *pid)
-{
- struct task_struct *task;
- bool exited;
-
- rcu_read_lock();
- task = pid_task(pid, PIDTYPE_PID);
- exited = !task ||
- (READ_ONCE(task->exit_state) && thread_group_empty(task));
- rcu_read_unlock();
-
- return exited;
-}
-EXPORT_SYMBOL(thread_group_exited);
-
/*
* This needs to be __function_aligned as GCC implicitly makes any
* implementation of abort() cold and drops alignment specified by