diff options
Diffstat (limited to 'mm/mmap.c')
-rw-r--r-- | mm/mmap.c | 229 |
1 files changed, 144 insertions, 85 deletions
diff --git a/mm/mmap.c b/mm/mmap.c index dd4b35a25aeb..79d541f1502b 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -344,7 +344,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr, * to. we assume access permissions have been handled by the open * of the memory object, so we don't do any here. */ - vm_flags |= calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) | + vm_flags |= calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(file, flags) | mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; /* Obtain the address to map to. we verify (or select) it and ensure @@ -900,7 +900,8 @@ __get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, if (get_area) { addr = get_area(file, addr, len, pgoff, flags); - } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { + } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) + && IS_ALIGNED(len, PMD_SIZE)) { /* Ensures that larger anonymous mappings are THP aligned. */ addr = thp_get_unmapped_area_vmflags(file, addr, len, pgoff, flags, vm_flags); @@ -1357,21 +1358,19 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, return do_vmi_munmap(&vmi, mm, start, len, uf, false); } -unsigned long mmap_region(struct file *file, unsigned long addr, +static unsigned long __mmap_region(struct file *file, unsigned long addr, unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, struct list_head *uf) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma = NULL; pgoff_t pglen = PHYS_PFN(len); - struct vm_area_struct *merge; unsigned long charged = 0; struct vma_munmap_struct vms; struct ma_state mas_detach; struct maple_tree mt_detach; unsigned long end = addr + len; - bool writable_file_mapping = false; - int error = -ENOMEM; + int error; VMA_ITERATOR(vmi, mm, addr); VMG_STATE(vmg, mm, &vmi, addr, end, vm_flags, pgoff); @@ -1396,8 +1395,10 @@ unsigned long mmap_region(struct file *file, unsigned long addr, } /* Check against address space limit. */ - if (!may_expand_vm(mm, vm_flags, pglen - vms.nr_pages)) + if (!may_expand_vm(mm, vm_flags, pglen - vms.nr_pages)) { + error = -ENOMEM; goto abort_munmap; + } /* * Private writable mapping: check memory availability @@ -1405,14 +1406,24 @@ unsigned long mmap_region(struct file *file, unsigned long addr, if (accountable_mapping(file, vm_flags)) { charged = pglen; charged -= vms.nr_accounted; - if (charged && security_vm_enough_memory_mm(mm, charged)) - goto abort_munmap; + if (charged) { + error = security_vm_enough_memory_mm(mm, charged); + if (error) + goto abort_munmap; + } vms.nr_accounted = 0; vm_flags |= VM_ACCOUNT; vmg.flags = vm_flags; } + /* + * clear PTEs while the vma is still in the tree so that rmap + * cannot race with the freeing later in the truncate scenario. + * This is also needed for mmap_file(), which is why vm_ops + * close function is called. + */ + vms_clean_up_area(&vms, &mas_detach); vma = vma_merge_new_range(&vmg); if (vma) goto expanded; @@ -1422,47 +1433,45 @@ unsigned long mmap_region(struct file *file, unsigned long addr, * not unmapped, but the maps are removed from the list. */ vma = vm_area_alloc(mm); - if (!vma) + if (!vma) { + error = -ENOMEM; goto unacct_error; + } vma_iter_config(&vmi, addr, end); vma_set_range(vma, addr, end, pgoff); vm_flags_init(vma, vm_flags); vma->vm_page_prot = vm_get_page_prot(vm_flags); + if (vma_iter_prealloc(&vmi, vma)) { + error = -ENOMEM; + goto free_vma; + } + if (file) { vma->vm_file = get_file(file); - /* - * call_mmap() may map PTE, so ensure there are no existing PTEs - * and call the vm_ops close function if one exists. - */ - vms_clean_up_area(&vms, &mas_detach); - error = call_mmap(file, vma); + error = mmap_file(file, vma); if (error) - goto unmap_and_free_vma; - - if (vma_is_shared_maywrite(vma)) { - error = mapping_map_writable(file->f_mapping); - if (error) - goto close_and_free_vma; - - writable_file_mapping = true; - } + goto unmap_and_free_file_vma; + /* Drivers cannot alter the address of the VMA. */ + WARN_ON_ONCE(addr != vma->vm_start); /* - * Expansion is handled above, merging is handled below. - * Drivers should not alter the address of the VMA. + * Drivers should not permit writability when previously it was + * disallowed. */ - error = -EINVAL; - if (WARN_ON((addr != vma->vm_start))) - goto close_and_free_vma; + VM_WARN_ON_ONCE(vm_flags != vma->vm_flags && + !(vm_flags & VM_MAYWRITE) && + (vma->vm_flags & VM_MAYWRITE)); vma_iter_config(&vmi, addr, end); /* - * If vm_flags changed after call_mmap(), we should try merge + * If vm_flags changed after mmap_file(), we should try merge * vma again as we may succeed this time. */ if (unlikely(vm_flags != vma->vm_flags && vmg.prev)) { + struct vm_area_struct *merge; + vmg.flags = vma->vm_flags; /* If this fails, state is reset ready for a reattempt. */ merge = vma_merge_new_range(&vmg); @@ -1480,7 +1489,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, vma = merge; /* Update vm_flags to pick up the change. */ vm_flags = vma->vm_flags; - goto unmap_writable; + goto file_expanded; } vma_iter_config(&vmi, addr, end); } @@ -1489,24 +1498,15 @@ unsigned long mmap_region(struct file *file, unsigned long addr, } else if (vm_flags & VM_SHARED) { error = shmem_zero_setup(vma); if (error) - goto free_vma; + goto free_iter_vma; } else { vma_set_anonymous(vma); } - if (map_deny_write_exec(vma, vma->vm_flags)) { - error = -EACCES; - goto close_and_free_vma; - } - - /* Allow architectures to sanity-check the vm_flags */ - error = -EINVAL; - if (!arch_validate_flags(vma->vm_flags)) - goto close_and_free_vma; - - error = -ENOMEM; - if (vma_iter_prealloc(&vmi, vma)) - goto close_and_free_vma; +#ifdef CONFIG_SPARC64 + /* TODO: Fix SPARC ADI! */ + WARN_ON_ONCE(!arch_validate_flags(vm_flags)); +#endif /* Lock the VMA since it is modified after insertion into VMA tree */ vma_start_write(vma); @@ -1520,10 +1520,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, */ khugepaged_enter_vma(vma, vma->vm_flags); - /* Once vma denies write, undo our temporary denial count */ -unmap_writable: - if (writable_file_mapping) - mapping_unmap_writable(file->f_mapping); +file_expanded: file = vma->vm_file; ksm_add_vma(vma); expanded: @@ -1556,24 +1553,17 @@ expanded: vma_set_page_prot(vma); - validate_mm(mm); return addr; -close_and_free_vma: - if (file && !vms.closed_vm_ops && vma->vm_ops && vma->vm_ops->close) - vma->vm_ops->close(vma); - - if (file || vma->vm_file) { -unmap_and_free_vma: - fput(vma->vm_file); - vma->vm_file = NULL; +unmap_and_free_file_vma: + fput(vma->vm_file); + vma->vm_file = NULL; - vma_iter_set(&vmi, vma->vm_end); - /* Undo any partial mapping done by a device driver. */ - unmap_region(&vmi.mas, vma, vmg.prev, vmg.next); - } - if (writable_file_mapping) - mapping_unmap_writable(file->f_mapping); + vma_iter_set(&vmi, vma->vm_end); + /* Undo any partial mapping done by a device driver. */ + unmap_region(&vmi.mas, vma, vmg.prev, vmg.next); +free_iter_vma: + vma_iter_free(&vmi); free_vma: vm_area_free(vma); unacct_error: @@ -1583,10 +1573,43 @@ unacct_error: abort_munmap: vms_abort_munmap_vmas(&vms, &mas_detach); gather_failed: - validate_mm(mm); return error; } +unsigned long mmap_region(struct file *file, unsigned long addr, + unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, + struct list_head *uf) +{ + unsigned long ret; + bool writable_file_mapping = false; + + /* Check to see if MDWE is applicable. */ + if (map_deny_write_exec(vm_flags, vm_flags)) + return -EACCES; + + /* Allow architectures to sanity-check the vm_flags. */ + if (!arch_validate_flags(vm_flags)) + return -EINVAL; + + /* Map writable and ensure this isn't a sealed memfd. */ + if (file && is_shared_maywrite(vm_flags)) { + int error = mapping_map_writable(file->f_mapping); + + if (error) + return error; + writable_file_mapping = true; + } + + ret = __mmap_region(file, addr, len, vm_flags, pgoff, uf); + + /* Clear our write mapping regardless of error. */ + if (writable_file_mapping) + mapping_unmap_writable(file->f_mapping); + + validate_mm(current->mm); + return ret; +} + static int __vm_munmap(unsigned long start, size_t len, bool unlock) { int ret; @@ -1630,6 +1653,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, unsigned long populate = 0; unsigned long ret = -EINVAL; struct file *file; + vm_flags_t vm_flags; pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. See Documentation/mm/remap_file_pages.rst.\n", current->comm, current->pid); @@ -1646,12 +1670,60 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, if (pgoff + (size >> PAGE_SHIFT) < pgoff) return ret; - if (mmap_write_lock_killable(mm)) + if (mmap_read_lock_killable(mm)) + return -EINTR; + + /* + * Look up VMA under read lock first so we can perform the security + * without holding locks (which can be problematic). We reacquire a + * write lock later and check nothing changed underneath us. + */ + vma = vma_lookup(mm, start); + + if (!vma || !(vma->vm_flags & VM_SHARED)) { + mmap_read_unlock(mm); + return -EINVAL; + } + + prot |= vma->vm_flags & VM_READ ? PROT_READ : 0; + prot |= vma->vm_flags & VM_WRITE ? PROT_WRITE : 0; + prot |= vma->vm_flags & VM_EXEC ? PROT_EXEC : 0; + + flags &= MAP_NONBLOCK; + flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE; + if (vma->vm_flags & VM_LOCKED) + flags |= MAP_LOCKED; + + /* Save vm_flags used to calculate prot and flags, and recheck later. */ + vm_flags = vma->vm_flags; + file = get_file(vma->vm_file); + + mmap_read_unlock(mm); + + /* Call outside mmap_lock to be consistent with other callers. */ + ret = security_mmap_file(file, prot, flags); + if (ret) { + fput(file); + return ret; + } + + ret = -EINVAL; + + /* OK security check passed, take write lock + let it rip. */ + if (mmap_write_lock_killable(mm)) { + fput(file); return -EINTR; + } vma = vma_lookup(mm, start); - if (!vma || !(vma->vm_flags & VM_SHARED)) + if (!vma) + goto out; + + /* Make sure things didn't change under us. */ + if (vma->vm_flags != vm_flags) + goto out; + if (vma->vm_file != file) goto out; if (start + size > vma->vm_end) { @@ -1679,25 +1751,11 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, goto out; } - prot |= vma->vm_flags & VM_READ ? PROT_READ : 0; - prot |= vma->vm_flags & VM_WRITE ? PROT_WRITE : 0; - prot |= vma->vm_flags & VM_EXEC ? PROT_EXEC : 0; - - flags &= MAP_NONBLOCK; - flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE; - if (vma->vm_flags & VM_LOCKED) - flags |= MAP_LOCKED; - - file = get_file(vma->vm_file); - ret = security_mmap_file(vma->vm_file, prot, flags); - if (ret) - goto out_fput; ret = do_mmap(vma->vm_file, start, size, prot, flags, 0, pgoff, &populate, NULL); -out_fput: - fput(file); out: mmap_write_unlock(mm); + fput(file); if (populate) mm_populate(ret, populate); if (!IS_ERR_VALUE(ret)) @@ -1744,7 +1802,8 @@ static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma, VMG_STATE(vmg, mm, vmi, addr, addr + len, flags, PHYS_PFN(addr)); vmg.prev = vma; - vma_iter_next_range(vmi); + /* vmi is positioned at prev, which this mode expects. */ + vmg.merge_flags = VMG_FLAG_JUST_EXPAND; if (vma_merge_new_range(&vmg)) goto out; @@ -1885,7 +1944,7 @@ void exit_mmap(struct mm_struct *mm) do { if (vma->vm_flags & VM_ACCOUNT) nr_accounted += vma_pages(vma); - remove_vma(vma, /* unreachable = */ true, /* closed = */ false); + remove_vma(vma, /* unreachable = */ true); count++; cond_resched(); vma = vma_next(&vmi); |