diff options
Diffstat (limited to 'mm/oom_kill.c')
-rw-r--r-- | mm/oom_kill.c | 52 |
1 files changed, 29 insertions, 23 deletions
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 25923cfec9c6..c145b0feecc1 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * linux/mm/oom_kill.c - * + * * Copyright (C) 1998,2000 Rik van Riel * Thanks go out to Claus Fischer for some serious inspiration and * for goading me into coding this file... @@ -218,7 +218,7 @@ long oom_badness(struct task_struct *p, unsigned long totalpages) */ adj = (long)p->signal->oom_score_adj; if (adj == OOM_SCORE_ADJ_MIN || - test_bit(MMF_OOM_SKIP, &p->mm->flags) || + mm_flags_test(MMF_OOM_SKIP, p->mm) || in_vfork(p)) { task_unlock(p); return LONG_MIN; @@ -325,7 +325,7 @@ static int oom_evaluate_task(struct task_struct *task, void *arg) * any memory is quite low. */ if (!is_sysrq_oom(oc) && tsk_is_oom_victim(task)) { - if (test_bit(MMF_OOM_SKIP, &task->signal->oom_mm->flags)) + if (mm_flags_test(MMF_OOM_SKIP, task->signal->oom_mm)) goto next; goto abort; } @@ -490,12 +490,12 @@ static bool oom_killer_disabled __read_mostly; * task's threads: if one of those is using this mm then this task was also * using it. */ -bool process_shares_mm(struct task_struct *p, struct mm_struct *mm) +bool process_shares_mm(const struct task_struct *p, const struct mm_struct *mm) { - struct task_struct *t; + const struct task_struct *t; for_each_thread(p, t) { - struct mm_struct *t_mm = READ_ONCE(t->mm); + const struct mm_struct *t_mm = READ_ONCE(t->mm); if (t_mm) return t_mm == mm; } @@ -516,7 +516,7 @@ static bool __oom_reap_task_mm(struct mm_struct *mm) { struct vm_area_struct *vma; bool ret = true; - VMA_ITERATOR(vmi, mm, 0); + MA_STATE(mas, &mm->mm_mt, ULONG_MAX, ULONG_MAX); /* * Tell all users of get_user/copy_from_user etc... that the content @@ -524,9 +524,15 @@ static bool __oom_reap_task_mm(struct mm_struct *mm) * should imply barriers already and the reader would hit a page fault * if it stumbled over a reaped memory. */ - set_bit(MMF_UNSTABLE, &mm->flags); + mm_flags_set(MMF_UNSTABLE, mm); - for_each_vma(vmi, vma) { + /* + * It might start racing with the dying task and compete for shared + * resources - e.g. page table lock contention has been observed. + * Reduce those races by reaping the oom victim from the other end + * of the address space. + */ + mas_for_each_rev(&mas, vma, 0) { if (vma->vm_flags & (VM_HUGETLB|VM_PFNMAP)) continue; @@ -583,7 +589,7 @@ static bool oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) * under mmap_lock for reading because it serializes against the * mmap_write_lock();mmap_write_unlock() cycle in exit_mmap(). */ - if (test_bit(MMF_OOM_SKIP, &mm->flags)) { + if (mm_flags_test(MMF_OOM_SKIP, mm)) { trace_skip_task_reaping(tsk->pid); goto out_unlock; } @@ -619,7 +625,7 @@ static void oom_reap_task(struct task_struct *tsk) schedule_timeout_idle(HZ/10); if (attempts <= MAX_OOM_REAP_RETRIES || - test_bit(MMF_OOM_SKIP, &mm->flags)) + mm_flags_test(MMF_OOM_SKIP, mm)) goto done; pr_info("oom_reaper: unable to reap pid:%d (%s)\n", @@ -634,7 +640,7 @@ done: * Hide this mm from OOM killer because it has been either reaped or * somebody can't call mmap_write_unlock(mm). */ - set_bit(MMF_OOM_SKIP, &mm->flags); + mm_flags_set(MMF_OOM_SKIP, mm); /* Drop a reference taken by queue_oom_reaper */ put_task_struct(tsk); @@ -670,7 +676,7 @@ static void wake_oom_reaper(struct timer_list *timer) unsigned long flags; /* The victim managed to terminate on its own - see exit_mmap */ - if (test_bit(MMF_OOM_SKIP, &mm->flags)) { + if (mm_flags_test(MMF_OOM_SKIP, mm)) { put_task_struct(tsk); return; } @@ -695,7 +701,7 @@ static void wake_oom_reaper(struct timer_list *timer) static void queue_oom_reaper(struct task_struct *tsk) { /* mm is already queued? */ - if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags)) + if (mm_flags_test_and_set(MMF_OOM_REAP_QUEUED, tsk->signal->oom_mm)) return; get_task_struct(tsk); @@ -772,12 +778,12 @@ static void mark_oom_victim(struct task_struct *tsk) mmgrab(tsk->signal->oom_mm); /* - * Make sure that the task is woken up from uninterruptible sleep - * if it is frozen because OOM killer wouldn't be able to free - * any memory and livelock. freezing_slow_path will tell the freezer - * that TIF_MEMDIE tasks should be ignored. + * Make sure that the process is woken up from uninterruptible sleep + * if it is frozen because OOM killer wouldn't be able to free any + * memory and livelock. The freezer will thaw the tasks that are OOM + * victims regardless of the PM freezing and cgroup freezing states. */ - __thaw_task(tsk); + thaw_process(tsk); atomic_inc(&oom_victims); cred = get_task_cred(tsk); trace_mark_victim(tsk, cred->uid.val); @@ -892,7 +898,7 @@ static bool task_will_free_mem(struct task_struct *task) * This task has already been drained by the oom reaper so there are * only small chances it will free some more */ - if (test_bit(MMF_OOM_SKIP, &mm->flags)) + if (mm_flags_test(MMF_OOM_SKIP, mm)) return false; if (atomic_read(&mm->mm_users) <= 1) @@ -977,7 +983,7 @@ static void __oom_kill_process(struct task_struct *victim, const char *message) continue; if (is_global_init(p)) { can_oom_reap = false; - set_bit(MMF_OOM_SKIP, &mm->flags); + mm_flags_set(MMF_OOM_SKIP, mm); pr_info("oom killer %d (%s) has mm pinned by %d (%s)\n", task_pid_nr(victim), victim->comm, task_pid_nr(p), p->comm); @@ -1235,7 +1241,7 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags) reap = true; else { /* Error only if the work has not been done already */ - if (!test_bit(MMF_OOM_SKIP, &mm->flags)) + if (!mm_flags_test(MMF_OOM_SKIP, mm)) ret = -EINVAL; } task_unlock(p); @@ -1251,7 +1257,7 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags) * Check MMF_OOM_SKIP again under mmap_read_lock protection to ensure * possible change in exit_mmap is seen */ - if (!test_bit(MMF_OOM_SKIP, &mm->flags) && !__oom_reap_task_mm(mm)) + if (!mm_flags_test(MMF_OOM_SKIP, mm) && !__oom_reap_task_mm(mm)) ret = -EAGAIN; mmap_read_unlock(mm); |