summaryrefslogtreecommitdiff
path: root/kernel/fork.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/fork.c')
-rw-r--r--kernel/fork.c79
1 files changed, 43 insertions, 36 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index 947eb1a6399a..5f986a32d0e6 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -37,13 +37,13 @@
#include <linux/fdtable.h>
#include <linux/iocontext.h>
#include <linux/key.h>
+#include <linux/kmsan.h>
#include <linux/binfmts.h>
#include <linux/mman.h>
#include <linux/mmu_notifier.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/mm_inline.h>
-#include <linux/vmacache.h>
#include <linux/nsproxy.h>
#include <linux/capability.h>
#include <linux/cpu.h>
@@ -475,7 +475,6 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
*/
*new = data_race(*orig);
INIT_LIST_HEAD(&new->anon_vma_chain);
- new->vm_next = new->vm_prev = NULL;
dup_anon_vma_name(orig, new);
}
return new;
@@ -580,11 +579,12 @@ static void dup_mm_exe_file(struct mm_struct *mm, struct mm_struct *oldmm)
static __latent_entropy int dup_mmap(struct mm_struct *mm,
struct mm_struct *oldmm)
{
- struct vm_area_struct *mpnt, *tmp, *prev, **pprev;
- struct rb_node **rb_link, *rb_parent;
+ struct vm_area_struct *mpnt, *tmp;
int retval;
- unsigned long charge;
+ unsigned long charge = 0;
LIST_HEAD(uf);
+ MA_STATE(old_mas, &oldmm->mm_mt, 0, 0);
+ MA_STATE(mas, &mm->mm_mt, 0, 0);
uprobe_start_dup_mmap();
if (mmap_write_lock_killable(oldmm)) {
@@ -606,16 +606,16 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
mm->exec_vm = oldmm->exec_vm;
mm->stack_vm = oldmm->stack_vm;
- rb_link = &mm->mm_rb.rb_node;
- rb_parent = NULL;
- pprev = &mm->mmap;
retval = ksm_fork(mm, oldmm);
if (retval)
goto out;
khugepaged_fork(mm, oldmm);
- prev = NULL;
- for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
+ retval = mas_expected_entries(&mas, oldmm->map_count);
+ if (retval)
+ goto out;
+
+ mas_for_each(&old_mas, mpnt, ULONG_MAX) {
struct file *file;
if (mpnt->vm_flags & VM_DONTCOPY) {
@@ -629,7 +629,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
*/
if (fatal_signal_pending(current)) {
retval = -EINTR;
- goto out;
+ goto loop_out;
}
if (mpnt->vm_flags & VM_ACCOUNT) {
unsigned long len = vma_pages(mpnt);
@@ -675,24 +675,17 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
}
/*
- * Clear hugetlb-related page reserves for children. This only
- * affects MAP_PRIVATE mappings. Faults generated by the child
- * are not guaranteed to succeed, even if read-only
+ * Copy/update hugetlb private vma information.
*/
if (is_vm_hugetlb_page(tmp))
- reset_vma_resv_huge_pages(tmp);
-
- /*
- * Link in the new vma and copy the page table entries.
- */
- *pprev = tmp;
- pprev = &tmp->vm_next;
- tmp->vm_prev = prev;
- prev = tmp;
+ hugetlb_dup_vma_private(tmp);
- __vma_link_rb(mm, tmp, rb_link, rb_parent);
- rb_link = &tmp->vm_rb.rb_right;
- rb_parent = &tmp->vm_rb;
+ /* Link the vma into the MT */
+ mas.index = tmp->vm_start;
+ mas.last = tmp->vm_end - 1;
+ mas_store(&mas, tmp);
+ if (mas_is_err(&mas))
+ goto fail_nomem_mas_store;
mm->map_count++;
if (!(tmp->vm_flags & VM_WIPEONFORK))
@@ -702,10 +695,12 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
tmp->vm_ops->open(tmp);
if (retval)
- goto out;
+ goto loop_out;
}
/* a new mm has just been created */
retval = arch_dup_mmap(oldmm, mm);
+loop_out:
+ mas_destroy(&mas);
out:
mmap_write_unlock(mm);
flush_tlb_mm(oldmm);
@@ -714,6 +709,9 @@ out:
fail_uprobe_end:
uprobe_end_dup_mmap();
return retval;
+
+fail_nomem_mas_store:
+ unlink_anon_vmas(tmp);
fail_nomem_anon_vma_fork:
mpol_put(vma_policy(tmp));
fail_nomem_policy:
@@ -721,7 +719,7 @@ fail_nomem_policy:
fail_nomem:
retval = -ENOMEM;
vm_unacct_memory(charge);
- goto out;
+ goto loop_out;
}
static inline int mm_alloc_pgd(struct mm_struct *mm)
@@ -1026,6 +1024,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
tsk->worker_private = NULL;
kcov_task_init(tsk);
+ kmsan_task_create(tsk);
kmap_local_fork(tsk);
#ifdef CONFIG_FAULT_INJECTION
@@ -1109,9 +1108,8 @@ static void mm_init_uprobes_state(struct mm_struct *mm)
static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
struct user_namespace *user_ns)
{
- mm->mmap = NULL;
- mm->mm_rb = RB_ROOT;
- mm->vmacache_seqnum = 0;
+ mt_init_flags(&mm->mm_mt, MM_MT_FLAGS);
+ mt_set_external_lock(&mm->mm_mt, &mm->mmap_lock);
atomic_set(&mm->mm_users, 1);
atomic_set(&mm->mm_count, 1);
seqcount_init(&mm->write_protect_seq);
@@ -1152,6 +1150,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
goto fail_nocontext;
mm->user_ns = get_user_ns(user_ns);
+ lru_gen_init_mm(mm);
return mm;
fail_nocontext:
@@ -1194,6 +1193,7 @@ static inline void __mmput(struct mm_struct *mm)
}
if (mm->binfmt)
module_put(mm->binfmt->module);
+ lru_gen_del_mm(mm);
mmdrop(mm);
}
@@ -1285,13 +1285,16 @@ int replace_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
/* Forbid mm->exe_file change if old file still mapped. */
old_exe_file = get_mm_exe_file(mm);
if (old_exe_file) {
+ VMA_ITERATOR(vmi, mm, 0);
mmap_read_lock(mm);
- for (vma = mm->mmap; vma && !ret; vma = vma->vm_next) {
+ for_each_vma(vmi, vma) {
if (!vma->vm_file)
continue;
if (path_equal(&vma->vm_file->f_path,
- &old_exe_file->f_path))
+ &old_exe_file->f_path)) {
ret = -EBUSY;
+ break;
+ }
}
mmap_read_unlock(mm);
fput(old_exe_file);
@@ -1566,9 +1569,6 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
if (!oldmm)
return 0;
- /* initialize the new vmacache entries */
- vmacache_flush(tsk);
-
if (clone_flags & CLONE_VM) {
mmget(oldmm);
mm = oldmm;
@@ -2693,6 +2693,13 @@ pid_t kernel_clone(struct kernel_clone_args *args)
get_task_struct(p);
}
+ if (IS_ENABLED(CONFIG_LRU_GEN) && !(clone_flags & CLONE_VM)) {
+ /* lock the task to synchronize with memcg migration */
+ task_lock(p);
+ lru_gen_add_mm(p->mm);
+ task_unlock(p);
+ }
+
wake_up_new_task(p);
/* forking complete and child started to run, tell ptracer */