summaryrefslogtreecommitdiff
path: root/mm/oom_kill.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/oom_kill.c')
-rw-r--r--mm/oom_kill.c52
1 files changed, 29 insertions, 23 deletions
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 25923cfec9c6..c145b0feecc1 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/mm/oom_kill.c
- *
+ *
* Copyright (C) 1998,2000 Rik van Riel
* Thanks go out to Claus Fischer for some serious inspiration and
* for goading me into coding this file...
@@ -218,7 +218,7 @@ long oom_badness(struct task_struct *p, unsigned long totalpages)
*/
adj = (long)p->signal->oom_score_adj;
if (adj == OOM_SCORE_ADJ_MIN ||
- test_bit(MMF_OOM_SKIP, &p->mm->flags) ||
+ mm_flags_test(MMF_OOM_SKIP, p->mm) ||
in_vfork(p)) {
task_unlock(p);
return LONG_MIN;
@@ -325,7 +325,7 @@ static int oom_evaluate_task(struct task_struct *task, void *arg)
* any memory is quite low.
*/
if (!is_sysrq_oom(oc) && tsk_is_oom_victim(task)) {
- if (test_bit(MMF_OOM_SKIP, &task->signal->oom_mm->flags))
+ if (mm_flags_test(MMF_OOM_SKIP, task->signal->oom_mm))
goto next;
goto abort;
}
@@ -490,12 +490,12 @@ static bool oom_killer_disabled __read_mostly;
* task's threads: if one of those is using this mm then this task was also
* using it.
*/
-bool process_shares_mm(struct task_struct *p, struct mm_struct *mm)
+bool process_shares_mm(const struct task_struct *p, const struct mm_struct *mm)
{
- struct task_struct *t;
+ const struct task_struct *t;
for_each_thread(p, t) {
- struct mm_struct *t_mm = READ_ONCE(t->mm);
+ const struct mm_struct *t_mm = READ_ONCE(t->mm);
if (t_mm)
return t_mm == mm;
}
@@ -516,7 +516,7 @@ static bool __oom_reap_task_mm(struct mm_struct *mm)
{
struct vm_area_struct *vma;
bool ret = true;
- VMA_ITERATOR(vmi, mm, 0);
+ MA_STATE(mas, &mm->mm_mt, ULONG_MAX, ULONG_MAX);
/*
* Tell all users of get_user/copy_from_user etc... that the content
@@ -524,9 +524,15 @@ static bool __oom_reap_task_mm(struct mm_struct *mm)
* should imply barriers already and the reader would hit a page fault
* if it stumbled over a reaped memory.
*/
- set_bit(MMF_UNSTABLE, &mm->flags);
+ mm_flags_set(MMF_UNSTABLE, mm);
- for_each_vma(vmi, vma) {
+ /*
+ * It might start racing with the dying task and compete for shared
+ * resources - e.g. page table lock contention has been observed.
+ * Reduce those races by reaping the oom victim from the other end
+ * of the address space.
+ */
+ mas_for_each_rev(&mas, vma, 0) {
if (vma->vm_flags & (VM_HUGETLB|VM_PFNMAP))
continue;
@@ -583,7 +589,7 @@ static bool oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
* under mmap_lock for reading because it serializes against the
* mmap_write_lock();mmap_write_unlock() cycle in exit_mmap().
*/
- if (test_bit(MMF_OOM_SKIP, &mm->flags)) {
+ if (mm_flags_test(MMF_OOM_SKIP, mm)) {
trace_skip_task_reaping(tsk->pid);
goto out_unlock;
}
@@ -619,7 +625,7 @@ static void oom_reap_task(struct task_struct *tsk)
schedule_timeout_idle(HZ/10);
if (attempts <= MAX_OOM_REAP_RETRIES ||
- test_bit(MMF_OOM_SKIP, &mm->flags))
+ mm_flags_test(MMF_OOM_SKIP, mm))
goto done;
pr_info("oom_reaper: unable to reap pid:%d (%s)\n",
@@ -634,7 +640,7 @@ done:
* Hide this mm from OOM killer because it has been either reaped or
* somebody can't call mmap_write_unlock(mm).
*/
- set_bit(MMF_OOM_SKIP, &mm->flags);
+ mm_flags_set(MMF_OOM_SKIP, mm);
/* Drop a reference taken by queue_oom_reaper */
put_task_struct(tsk);
@@ -670,7 +676,7 @@ static void wake_oom_reaper(struct timer_list *timer)
unsigned long flags;
/* The victim managed to terminate on its own - see exit_mmap */
- if (test_bit(MMF_OOM_SKIP, &mm->flags)) {
+ if (mm_flags_test(MMF_OOM_SKIP, mm)) {
put_task_struct(tsk);
return;
}
@@ -695,7 +701,7 @@ static void wake_oom_reaper(struct timer_list *timer)
static void queue_oom_reaper(struct task_struct *tsk)
{
/* mm is already queued? */
- if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags))
+ if (mm_flags_test_and_set(MMF_OOM_REAP_QUEUED, tsk->signal->oom_mm))
return;
get_task_struct(tsk);
@@ -772,12 +778,12 @@ static void mark_oom_victim(struct task_struct *tsk)
mmgrab(tsk->signal->oom_mm);
/*
- * Make sure that the task is woken up from uninterruptible sleep
- * if it is frozen because OOM killer wouldn't be able to free
- * any memory and livelock. freezing_slow_path will tell the freezer
- * that TIF_MEMDIE tasks should be ignored.
+ * Make sure that the process is woken up from uninterruptible sleep
+ * if it is frozen because OOM killer wouldn't be able to free any
+ * memory and livelock. The freezer will thaw the tasks that are OOM
+ * victims regardless of the PM freezing and cgroup freezing states.
*/
- __thaw_task(tsk);
+ thaw_process(tsk);
atomic_inc(&oom_victims);
cred = get_task_cred(tsk);
trace_mark_victim(tsk, cred->uid.val);
@@ -892,7 +898,7 @@ static bool task_will_free_mem(struct task_struct *task)
* This task has already been drained by the oom reaper so there are
* only small chances it will free some more
*/
- if (test_bit(MMF_OOM_SKIP, &mm->flags))
+ if (mm_flags_test(MMF_OOM_SKIP, mm))
return false;
if (atomic_read(&mm->mm_users) <= 1)
@@ -977,7 +983,7 @@ static void __oom_kill_process(struct task_struct *victim, const char *message)
continue;
if (is_global_init(p)) {
can_oom_reap = false;
- set_bit(MMF_OOM_SKIP, &mm->flags);
+ mm_flags_set(MMF_OOM_SKIP, mm);
pr_info("oom killer %d (%s) has mm pinned by %d (%s)\n",
task_pid_nr(victim), victim->comm,
task_pid_nr(p), p->comm);
@@ -1235,7 +1241,7 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags)
reap = true;
else {
/* Error only if the work has not been done already */
- if (!test_bit(MMF_OOM_SKIP, &mm->flags))
+ if (!mm_flags_test(MMF_OOM_SKIP, mm))
ret = -EINVAL;
}
task_unlock(p);
@@ -1251,7 +1257,7 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags)
* Check MMF_OOM_SKIP again under mmap_read_lock protection to ensure
* possible change in exit_mmap is seen
*/
- if (!test_bit(MMF_OOM_SKIP, &mm->flags) && !__oom_reap_task_mm(mm))
+ if (!mm_flags_test(MMF_OOM_SKIP, mm) && !__oom_reap_task_mm(mm))
ret = -EAGAIN;
mmap_read_unlock(mm);