From bebeb3d68b24bb4132d452c5707fe321208bcbcd Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Fri, 22 Feb 2013 16:32:37 -0800 Subject: mm: introduce mm_populate() for populating new vmas When creating new mappings using the MAP_POPULATE / MAP_LOCKED flags (or with MCL_FUTURE in effect), we want to populate the pages within the newly created vmas. This may take a while as we may have to read pages from disk, so ideally we want to do this outside of the write-locked mmap_sem region. This change introduces mm_populate(), which is used to defer populating such mappings until after the mmap_sem write lock has been released. This is implemented as a generalization of the former do_mlock_pages(), which accomplished the same task but was using during mlock() / mlockall(). Signed-off-by: Michel Lespinasse Reported-by: Andy Lutomirski Acked-by: Rik van Riel Tested-by: Andy Lutomirski Cc: Greg Ungerer Cc: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mlock.c | 17 +++++++++++------ mm/mmap.c | 20 +++++++++++++++----- mm/nommu.c | 5 ++++- mm/util.c | 6 +++++- 4 files changed, 35 insertions(+), 13 deletions(-) (limited to 'mm') diff --git a/mm/mlock.c b/mm/mlock.c index c9bd528b01d2..a296a49865df 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -416,7 +416,14 @@ static int do_mlock(unsigned long start, size_t len, int on) return error; } -static int do_mlock_pages(unsigned long start, size_t len, int ignore_errors) +/* + * __mm_populate - populate and/or mlock pages within a range of address space. + * + * This is used to implement mlock() and the MAP_POPULATE / MAP_LOCKED mmap + * flags. VMAs must be already marked with the desired vm_flags, and + * mmap_sem must not be held. + */ +int __mm_populate(unsigned long start, unsigned long len, int ignore_errors) { struct mm_struct *mm = current->mm; unsigned long end, nstart, nend; @@ -498,7 +505,7 @@ SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len) error = do_mlock(start, len, 1); up_write(¤t->mm->mmap_sem); if (!error) - error = do_mlock_pages(start, len, 0); + error = __mm_populate(start, len, 0); return error; } @@ -564,10 +571,8 @@ SYSCALL_DEFINE1(mlockall, int, flags) capable(CAP_IPC_LOCK)) ret = do_mlockall(flags); up_write(¤t->mm->mmap_sem); - if (!ret && (flags & MCL_CURRENT)) { - /* Ignore errors */ - do_mlock_pages(0, TASK_SIZE, 1); - } + if (!ret && (flags & MCL_CURRENT)) + mm_populate(0, TASK_SIZE); out: return ret; } diff --git a/mm/mmap.c b/mm/mmap.c index 09da0b264982..9b12e3047a86 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1154,12 +1154,15 @@ static inline unsigned long round_hint_to_min(unsigned long hint) unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, - unsigned long flags, unsigned long pgoff) + unsigned long flags, unsigned long pgoff, + bool *populate) { struct mm_struct * mm = current->mm; struct inode *inode; vm_flags_t vm_flags; + *populate = false; + /* * Does the application expect PROT_READ to imply PROT_EXEC? * @@ -1280,7 +1283,12 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, } } - return mmap_region(file, addr, len, flags, vm_flags, pgoff); + addr = mmap_region(file, addr, len, flags, vm_flags, pgoff); + if (!IS_ERR_VALUE(addr) && + ((vm_flags & VM_LOCKED) || + (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE)) + *populate = true; + return addr; } SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, @@ -1531,10 +1539,12 @@ out: vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT); if (vm_flags & VM_LOCKED) { - if (!mlock_vma_pages_range(vma, addr, addr + len)) + if (!((vm_flags & VM_SPECIAL) || is_vm_hugetlb_page(vma) || + vma == get_gate_vma(current->mm))) mm->locked_vm += (len >> PAGE_SHIFT); - } else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK)) - make_pages_present(addr, addr + len); + else + vma->vm_flags &= ~VM_LOCKED; + } if (file) uprobe_mmap(vma); diff --git a/mm/nommu.c b/mm/nommu.c index b20db4e22263..7296a5a280e7 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1250,7 +1250,8 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long len, unsigned long prot, unsigned long flags, - unsigned long pgoff) + unsigned long pgoff, + bool *populate) { struct vm_area_struct *vma; struct vm_region *region; @@ -1260,6 +1261,8 @@ unsigned long do_mmap_pgoff(struct file *file, kenter(",%lx,%lx,%lx,%lx,%lx", addr, len, prot, flags, pgoff); + *populate = false; + /* decide whether we should attempt the mapping, and if so what sort of * mapping */ ret = validate_mmap_request(file, addr, len, prot, flags, pgoff, diff --git a/mm/util.c b/mm/util.c index c55e26b17d93..13467e043e9e 100644 --- a/mm/util.c +++ b/mm/util.c @@ -355,12 +355,16 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr, { unsigned long ret; struct mm_struct *mm = current->mm; + bool populate; ret = security_mmap_file(file, prot, flag); if (!ret) { down_write(&mm->mmap_sem); - ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff); + ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff, + &populate); up_write(&mm->mmap_sem); + if (!IS_ERR_VALUE(ret) && populate) + mm_populate(ret, len); } return ret; } -- cgit