summaryrefslogtreecommitdiff
path: root/mm/mremap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/mremap.c')
-rw-r--r--mm/mremap.c625
1 files changed, 410 insertions, 215 deletions
diff --git a/mm/mremap.c b/mm/mremap.c
index 60f6b8d0d5f0..e15cf2e444c7 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -52,7 +52,7 @@ struct vma_remap_struct {
unsigned long addr; /* User-specified address from which we remap. */
unsigned long old_len; /* Length of range being remapped. */
unsigned long new_len; /* Desired new length of mapping. */
- unsigned long flags; /* user-specified MREMAP_* flags. */
+ const unsigned long flags; /* user-specified MREMAP_* flags. */
unsigned long new_addr; /* Optionally, desired new address. */
/* uffd state. */
@@ -65,10 +65,11 @@ struct vma_remap_struct {
/* Internal state, determined in do_mremap(). */
unsigned long delta; /* Absolute delta of old_len,new_len. */
- bool mlocked; /* Was the VMA mlock()'d? */
+ bool populate_expand; /* mlock()'d expanded, must populate. */
enum mremap_type remap_type; /* expand, shrink, etc. */
bool mmap_locked; /* Is mm currently write-locked? */
unsigned long charged; /* If VM_ACCOUNT, # pages to account. */
+ bool vmi_needs_invalidate; /* Is the VMA iterator invalidated? */
};
static pud_t *get_old_pud(struct mm_struct *mm, unsigned long addr)
@@ -170,13 +171,29 @@ static pte_t move_soft_dirty_pte(pte_t pte)
return pte;
}
+static int mremap_folio_pte_batch(struct vm_area_struct *vma, unsigned long addr,
+ pte_t *ptep, pte_t pte, int max_nr)
+{
+ struct folio *folio;
+
+ if (max_nr == 1)
+ return 1;
+
+ folio = vm_normal_folio(vma, addr, pte);
+ if (!folio || !folio_test_large(folio))
+ return 1;
+
+ return folio_pte_batch(folio, ptep, pte, max_nr);
+}
+
static int move_ptes(struct pagetable_move_control *pmc,
unsigned long extent, pmd_t *old_pmd, pmd_t *new_pmd)
{
struct vm_area_struct *vma = pmc->old;
bool need_clear_uffd_wp = vma_has_uffd_without_event_remap(vma);
struct mm_struct *mm = vma->vm_mm;
- pte_t *old_pte, *new_pte, pte;
+ pte_t *old_ptep, *new_ptep;
+ pte_t old_pte, pte;
pmd_t dummy_pmdval;
spinlock_t *old_ptl, *new_ptl;
bool force_flush = false;
@@ -184,6 +201,8 @@ static int move_ptes(struct pagetable_move_control *pmc,
unsigned long new_addr = pmc->new_addr;
unsigned long old_end = old_addr + extent;
unsigned long len = old_end - old_addr;
+ int max_nr_ptes;
+ int nr_ptes;
int err = 0;
/*
@@ -211,8 +230,8 @@ static int move_ptes(struct pagetable_move_control *pmc,
* We don't have to worry about the ordering of src and dst
* pte locks because exclusive mmap_lock prevents deadlock.
*/
- old_pte = pte_offset_map_lock(mm, old_pmd, old_addr, &old_ptl);
- if (!old_pte) {
+ old_ptep = pte_offset_map_lock(mm, old_pmd, old_addr, &old_ptl);
+ if (!old_ptep) {
err = -EAGAIN;
goto out;
}
@@ -223,10 +242,10 @@ static int move_ptes(struct pagetable_move_control *pmc,
* mmap_lock, so this new_pte page is stable, so there is no need to get
* pmdval and do pmd_same() check.
*/
- new_pte = pte_offset_map_rw_nolock(mm, new_pmd, new_addr, &dummy_pmdval,
+ new_ptep = pte_offset_map_rw_nolock(mm, new_pmd, new_addr, &dummy_pmdval,
&new_ptl);
- if (!new_pte) {
- pte_unmap_unlock(old_pte, old_ptl);
+ if (!new_ptep) {
+ pte_unmap_unlock(old_ptep, old_ptl);
err = -EAGAIN;
goto out;
}
@@ -235,14 +254,16 @@ static int move_ptes(struct pagetable_move_control *pmc,
flush_tlb_batched_pending(vma->vm_mm);
arch_enter_lazy_mmu_mode();
- for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE,
- new_pte++, new_addr += PAGE_SIZE) {
- VM_WARN_ON_ONCE(!pte_none(*new_pte));
+ for (; old_addr < old_end; old_ptep += nr_ptes, old_addr += nr_ptes * PAGE_SIZE,
+ new_ptep += nr_ptes, new_addr += nr_ptes * PAGE_SIZE) {
+ VM_WARN_ON_ONCE(!pte_none(*new_ptep));
- if (pte_none(ptep_get(old_pte)))
+ nr_ptes = 1;
+ max_nr_ptes = (old_end - old_addr) >> PAGE_SHIFT;
+ old_pte = ptep_get(old_ptep);
+ if (pte_none(old_pte))
continue;
- pte = ptep_get_and_clear(mm, old_addr, old_pte);
/*
* If we are remapping a valid PTE, make sure
* to flush TLB before we drop the PTL for the
@@ -254,13 +275,17 @@ static int move_ptes(struct pagetable_move_control *pmc,
* the TLB entry for the old mapping has been
* flushed.
*/
- if (pte_present(pte))
+ if (pte_present(old_pte)) {
+ nr_ptes = mremap_folio_pte_batch(vma, old_addr, old_ptep,
+ old_pte, max_nr_ptes);
force_flush = true;
+ }
+ pte = get_and_clear_full_ptes(mm, old_addr, old_ptep, nr_ptes, 0);
pte = move_pte(pte, old_addr, new_addr);
pte = move_soft_dirty_pte(pte);
if (need_clear_uffd_wp && pte_marker_uffd_wp(pte))
- pte_clear(mm, new_addr, new_pte);
+ pte_clear(mm, new_addr, new_ptep);
else {
if (need_clear_uffd_wp) {
if (pte_present(pte))
@@ -268,7 +293,7 @@ static int move_ptes(struct pagetable_move_control *pmc,
else if (is_swap_pte(pte))
pte = pte_swp_clear_uffd_wp(pte);
}
- set_pte_at(mm, new_addr, new_pte, pte);
+ set_ptes(mm, new_addr, new_ptep, pte, nr_ptes);
}
}
@@ -277,8 +302,8 @@ static int move_ptes(struct pagetable_move_control *pmc,
flush_tlb_range(vma, old_end - len, old_end);
if (new_ptl != old_ptl)
spin_unlock(new_ptl);
- pte_unmap(new_pte - 1);
- pte_unmap_unlock(old_pte - 1, old_ptl);
+ pte_unmap(new_ptep - 1);
+ pte_unmap_unlock(old_ptep - 1, old_ptl);
out:
if (pmc->need_rmap_locks)
drop_rmap_locks(vma);
@@ -794,7 +819,7 @@ unsigned long move_page_tables(struct pagetable_move_control *pmc)
new_pud = alloc_new_pud(mm, pmc->new_addr);
if (!new_pud)
break;
- if (pud_trans_huge(*old_pud) || pud_devmap(*old_pud)) {
+ if (pud_trans_huge(*old_pud)) {
if (extent == HPAGE_PUD_SIZE) {
move_pgt_entry(pmc, HPAGE_PUD, old_pud, new_pud);
/* We ignore and continue on error? */
@@ -813,8 +838,7 @@ unsigned long move_page_tables(struct pagetable_move_control *pmc)
if (!new_pmd)
break;
again:
- if (is_swap_pmd(*old_pmd) || pmd_trans_huge(*old_pmd) ||
- pmd_devmap(*old_pmd)) {
+ if (is_swap_pmd(*old_pmd) || pmd_trans_huge(*old_pmd)) {
if (extent == HPAGE_PMD_SIZE &&
move_pgt_entry(pmc, HPAGE_PMD, old_pmd, new_pmd))
continue;
@@ -886,7 +910,11 @@ static bool vrm_overlaps(struct vma_remap_struct *vrm)
return false;
}
-/* Do the mremap() flags require that the new_addr parameter be specified? */
+/*
+ * Will a new address definitely be assigned? This either if the user specifies
+ * it via MREMAP_FIXED, or if MREMAP_DONTUNMAP is used, indicating we will
+ * always detemrine a target address.
+ */
static bool vrm_implies_new_addr(struct vma_remap_struct *vrm)
{
return vrm->flags & (MREMAP_FIXED | MREMAP_DONTUNMAP);
@@ -932,7 +960,7 @@ static unsigned long vrm_set_new_addr(struct vma_remap_struct *vrm)
*
* Returns true on success, false if insufficient memory to charge.
*/
-static bool vrm_charge(struct vma_remap_struct *vrm)
+static bool vrm_calc_charge(struct vma_remap_struct *vrm)
{
unsigned long charged;
@@ -983,10 +1011,8 @@ static void vrm_stat_account(struct vma_remap_struct *vrm,
struct vm_area_struct *vma = vrm->vma;
vm_stat_account(mm, vma->vm_flags, pages);
- if (vma->vm_flags & VM_LOCKED) {
+ if (vma->vm_flags & VM_LOCKED)
mm->locked_vm += pages;
- vrm->mlocked = true;
- }
}
/*
@@ -999,7 +1025,7 @@ static unsigned long prep_move_vma(struct vma_remap_struct *vrm)
struct vm_area_struct *vma = vrm->vma;
unsigned long old_addr = vrm->addr;
unsigned long old_len = vrm->old_len;
- unsigned long dummy = vma->vm_flags;
+ vm_flags_t dummy = vma->vm_flags;
/*
* We'd prefer to avoid failure later on in do_munmap:
@@ -1086,6 +1112,7 @@ static void unmap_source_vma(struct vma_remap_struct *vrm)
err = do_vmi_munmap(&vmi, mm, addr, len, vrm->uf_unmap, /* unlock= */false);
vrm->vma = NULL; /* Invalidated. */
+ vrm->vmi_needs_invalidate = true;
if (err) {
/* OOM: unable to split vma, just get accounts right */
vm_acct_memory(len >> PAGE_SHIFT);
@@ -1161,6 +1188,10 @@ static int copy_vma_and_data(struct vma_remap_struct *vrm,
*new_vma_ptr = NULL;
return -ENOMEM;
}
+ /* By merging, we may have invalidated any iterator in use. */
+ if (vma != vrm->vma)
+ vrm->vmi_needs_invalidate = true;
+
vrm->vma = vma;
pmc.old = vma;
pmc.new = new_vma;
@@ -1237,8 +1268,11 @@ static unsigned long move_vma(struct vma_remap_struct *vrm)
if (err)
return err;
- /* If accounted, charge the number of bytes the operation will use. */
- if (!vrm_charge(vrm))
+ /*
+ * If accounted, determine the number of bytes the operation will
+ * charge.
+ */
+ if (!vrm_calc_charge(vrm))
return -ENOMEM;
/* We don't want racing faults. */
@@ -1277,64 +1311,6 @@ static unsigned long move_vma(struct vma_remap_struct *vrm)
}
/*
- * resize_is_valid() - Ensure the vma can be resized to the new length at the give
- * address.
- *
- * Return 0 on success, error otherwise.
- */
-static int resize_is_valid(struct vma_remap_struct *vrm)
-{
- struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma = vrm->vma;
- unsigned long addr = vrm->addr;
- unsigned long old_len = vrm->old_len;
- unsigned long new_len = vrm->new_len;
- unsigned long pgoff;
-
- /*
- * !old_len is a special case where an attempt is made to 'duplicate'
- * a mapping. This makes no sense for private mappings as it will
- * instead create a fresh/new mapping unrelated to the original. This
- * is contrary to the basic idea of mremap which creates new mappings
- * based on the original. There are no known use cases for this
- * behavior. As a result, fail such attempts.
- */
- if (!old_len && !(vma->vm_flags & (VM_SHARED | VM_MAYSHARE))) {
- pr_warn_once("%s (%d): attempted to duplicate a private mapping with mremap. This is not supported.\n",
- current->comm, current->pid);
- return -EINVAL;
- }
-
- if ((vrm->flags & MREMAP_DONTUNMAP) &&
- (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP)))
- return -EINVAL;
-
- /* We can't remap across vm area boundaries */
- if (old_len > vma->vm_end - addr)
- return -EFAULT;
-
- if (new_len == old_len)
- return 0;
-
- /* Need to be careful about a growing mapping */
- pgoff = (addr - vma->vm_start) >> PAGE_SHIFT;
- pgoff += vma->vm_pgoff;
- if (pgoff + (new_len >> PAGE_SHIFT) < pgoff)
- return -EINVAL;
-
- if (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP))
- return -EFAULT;
-
- if (!mlock_future_ok(mm, vma->vm_flags, vrm->delta))
- return -EAGAIN;
-
- if (!may_expand_vm(mm, vma->vm_flags, vrm->delta >> PAGE_SHIFT))
- return -ENOMEM;
-
- return 0;
-}
-
-/*
* The user has requested that the VMA be shrunk (i.e., old_len > new_len), so
* execute this, optionally dropping the mmap lock when we do so.
*
@@ -1383,14 +1359,6 @@ static unsigned long mremap_to(struct vma_remap_struct *vrm)
struct mm_struct *mm = current->mm;
unsigned long err;
- /* Is the new length or address silly? */
- if (vrm->new_len > TASK_SIZE ||
- vrm->new_addr > TASK_SIZE - vrm->new_len)
- return -EINVAL;
-
- if (vrm_overlaps(vrm))
- return -EINVAL;
-
if (vrm->flags & MREMAP_FIXED) {
/*
* In mremap_to().
@@ -1400,6 +1368,7 @@ static unsigned long mremap_to(struct vma_remap_struct *vrm)
err = do_munmap(mm, vrm->new_addr, vrm->new_len,
vrm->uf_unmap_early);
vrm->vma = NULL; /* Invalidated. */
+ vrm->vmi_needs_invalidate = true;
if (err)
return err;
@@ -1421,10 +1390,6 @@ static unsigned long mremap_to(struct vma_remap_struct *vrm)
vrm->old_len = vrm->new_len;
}
- err = resize_is_valid(vrm);
- if (err)
- return err;
-
/* MREMAP_DONTUNMAP expands by old_len since old_len == new_len */
if (vrm->flags & MREMAP_DONTUNMAP) {
vm_flags_t vm_flags = vrm->vma->vm_flags;
@@ -1473,68 +1438,6 @@ static bool vrm_can_expand_in_place(struct vma_remap_struct *vrm)
}
/*
- * Are the parameters passed to mremap() valid? If so return 0, otherwise return
- * error.
- */
-static unsigned long check_mremap_params(struct vma_remap_struct *vrm)
-
-{
- unsigned long addr = vrm->addr;
- unsigned long flags = vrm->flags;
-
- /* Ensure no unexpected flag values. */
- if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP))
- return -EINVAL;
-
- /* Start address must be page-aligned. */
- if (offset_in_page(addr))
- return -EINVAL;
-
- /*
- * We allow a zero old-len as a special case
- * for DOS-emu "duplicate shm area" thing. But
- * a zero new-len is nonsensical.
- */
- if (!PAGE_ALIGN(vrm->new_len))
- return -EINVAL;
-
- /* Remainder of checks are for cases with specific new_addr. */
- if (!vrm_implies_new_addr(vrm))
- return 0;
-
- /* The new address must be page-aligned. */
- if (offset_in_page(vrm->new_addr))
- return -EINVAL;
-
- /* A fixed address implies a move. */
- if (!(flags & MREMAP_MAYMOVE))
- return -EINVAL;
-
- /* MREMAP_DONTUNMAP does not allow resizing in the process. */
- if (flags & MREMAP_DONTUNMAP && vrm->old_len != vrm->new_len)
- return -EINVAL;
-
- /*
- * move_vma() need us to stay 4 maps below the threshold, otherwise
- * it will bail out at the very beginning.
- * That is a problem if we have already unmaped the regions here
- * (new_addr, and old_addr), because userspace will not know the
- * state of the vma's after it gets -ENOMEM.
- * So, to avoid such scenario we can pre-compute if the whole
- * operation has high chances to success map-wise.
- * Worst-scenario case is when both vma's (new_addr and old_addr) get
- * split in 3 before unmapping it.
- * That means 2 more maps (1 for each) to the ones we already hold.
- * Check whether current map count plus 2 still leads us to 4 maps below
- * the threshold, otherwise return -ENOMEM here to be more safe.
- */
- if ((current->mm->map_count + 2) >= sysctl_max_map_count - 3)
- return -ENOMEM;
-
- return 0;
-}
-
-/*
* We know we can expand the VMA in-place by delta pages, so do so.
*
* If we discover the VMA is locked, update mm_struct statistics accordingly and
@@ -1546,7 +1449,7 @@ static unsigned long expand_vma_in_place(struct vma_remap_struct *vrm)
struct vm_area_struct *vma = vrm->vma;
VMA_ITERATOR(vmi, mm, vma->vm_end);
- if (!vrm_charge(vrm))
+ if (!vrm_calc_charge(vrm))
return -ENOMEM;
/*
@@ -1590,8 +1493,6 @@ static bool align_hugetlb(struct vma_remap_struct *vrm)
if (vrm->new_len > vrm->old_len)
return false;
- vrm_set_delta(vrm);
-
return true;
}
@@ -1605,11 +1506,6 @@ static bool align_hugetlb(struct vma_remap_struct *vrm)
static unsigned long expand_vma(struct vma_remap_struct *vrm)
{
unsigned long err;
- unsigned long addr = vrm->addr;
-
- err = resize_is_valid(vrm);
- if (err)
- return err;
/*
* [addr, old_len) spans precisely to the end of the VMA, so try to
@@ -1620,16 +1516,8 @@ static unsigned long expand_vma(struct vma_remap_struct *vrm)
if (err)
return err;
- /*
- * We want to populate the newly expanded portion of the VMA to
- * satisfy the expectation that mlock()'ing a VMA maintains all
- * of its pages in memory.
- */
- if (vrm->mlocked)
- vrm->new_addr = addr;
-
/* OK we're done! */
- return addr;
+ return vrm->addr;
}
/*
@@ -1680,64 +1568,371 @@ static unsigned long mremap_at(struct vma_remap_struct *vrm)
return expand_vma(vrm);
}
- BUG();
+ /* Should not be possible. */
+ WARN_ON_ONCE(1);
+ return -EINVAL;
}
-static unsigned long do_mremap(struct vma_remap_struct *vrm)
+/*
+ * Will this operation result in the VMA being expanded or moved and thus need
+ * to map a new portion of virtual address space?
+ */
+static bool vrm_will_map_new(struct vma_remap_struct *vrm)
+{
+ if (vrm->remap_type == MREMAP_EXPAND)
+ return true;
+
+ if (vrm_implies_new_addr(vrm))
+ return true;
+
+ return false;
+}
+
+/* Does this remap ONLY move mappings? */
+static bool vrm_move_only(struct vma_remap_struct *vrm)
+{
+ if (!(vrm->flags & MREMAP_FIXED))
+ return false;
+
+ if (vrm->old_len != vrm->new_len)
+ return false;
+
+ return true;
+}
+
+static void notify_uffd(struct vma_remap_struct *vrm, bool failed)
{
struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
- unsigned long ret;
- ret = check_mremap_params(vrm);
- if (ret)
- return ret;
+ /* Regardless of success/failure, we always notify of any unmaps. */
+ userfaultfd_unmap_complete(mm, vrm->uf_unmap_early);
+ if (failed)
+ mremap_userfaultfd_fail(vrm->uf);
+ else
+ mremap_userfaultfd_complete(vrm->uf, vrm->addr,
+ vrm->new_addr, vrm->old_len);
+ userfaultfd_unmap_complete(mm, vrm->uf_unmap);
+}
- vrm->old_len = PAGE_ALIGN(vrm->old_len);
- vrm->new_len = PAGE_ALIGN(vrm->new_len);
- vrm_set_delta(vrm);
+static bool vma_multi_allowed(struct vm_area_struct *vma)
+{
+ struct file *file;
- if (mmap_write_lock_killable(mm))
- return -EINTR;
- vrm->mmap_locked = true;
+ /*
+ * We can't support moving multiple uffd VMAs as notify requires
+ * mmap lock to be dropped.
+ */
+ if (userfaultfd_armed(vma))
+ return false;
- vma = vrm->vma = vma_lookup(mm, vrm->addr);
- if (!vma) {
- ret = -EFAULT;
- goto out;
+ /*
+ * Custom get unmapped area might result in MREMAP_FIXED not
+ * being obeyed.
+ */
+ file = vma->vm_file;
+ if (file && !vma_is_shmem(vma) && !is_vm_hugetlb_page(vma)) {
+ const struct file_operations *fop = file->f_op;
+
+ if (fop->get_unmapped_area)
+ return false;
}
+ return true;
+}
+
+static int check_prep_vma(struct vma_remap_struct *vrm)
+{
+ struct vm_area_struct *vma = vrm->vma;
+ struct mm_struct *mm = current->mm;
+ unsigned long addr = vrm->addr;
+ unsigned long old_len, new_len, pgoff;
+
+ if (!vma)
+ return -EFAULT;
+
/* If mseal()'d, mremap() is prohibited. */
- if (!can_modify_vma(vma)) {
- ret = -EPERM;
- goto out;
- }
+ if (!can_modify_vma(vma))
+ return -EPERM;
/* Align to hugetlb page size, if required. */
- if (is_vm_hugetlb_page(vma) && !align_hugetlb(vrm)) {
- ret = -EINVAL;
- goto out;
- }
+ if (is_vm_hugetlb_page(vma) && !align_hugetlb(vrm))
+ return -EINVAL;
+ vrm_set_delta(vrm);
vrm->remap_type = vrm_remap_type(vrm);
+ /* For convenience, we set new_addr even if VMA won't move. */
+ if (!vrm_implies_new_addr(vrm))
+ vrm->new_addr = addr;
- /* Actually execute mremap. */
- ret = vrm_implies_new_addr(vrm) ? mremap_to(vrm) : mremap_at(vrm);
+ /* Below only meaningful if we expand or move a VMA. */
+ if (!vrm_will_map_new(vrm))
+ return 0;
-out:
- if (vrm->mmap_locked) {
- mmap_write_unlock(mm);
- vrm->mmap_locked = false;
+ old_len = vrm->old_len;
+ new_len = vrm->new_len;
- if (!offset_in_page(ret) && vrm->mlocked && vrm->new_len > vrm->old_len)
- mm_populate(vrm->new_addr + vrm->old_len, vrm->delta);
+ /*
+ * !old_len is a special case where an attempt is made to 'duplicate'
+ * a mapping. This makes no sense for private mappings as it will
+ * instead create a fresh/new mapping unrelated to the original. This
+ * is contrary to the basic idea of mremap which creates new mappings
+ * based on the original. There are no known use cases for this
+ * behavior. As a result, fail such attempts.
+ */
+ if (!old_len && !(vma->vm_flags & (VM_SHARED | VM_MAYSHARE))) {
+ pr_warn_once("%s (%d): attempted to duplicate a private mapping with mremap. This is not supported.\n",
+ current->comm, current->pid);
+ return -EINVAL;
}
- userfaultfd_unmap_complete(mm, vrm->uf_unmap_early);
- mremap_userfaultfd_complete(vrm->uf, vrm->addr, ret, vrm->old_len);
- userfaultfd_unmap_complete(mm, vrm->uf_unmap);
+ if ((vrm->flags & MREMAP_DONTUNMAP) &&
+ (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP)))
+ return -EINVAL;
+
+ /*
+ * We permit crossing of boundaries for the range being unmapped due to
+ * a shrink.
+ */
+ if (vrm->remap_type == MREMAP_SHRINK)
+ old_len = new_len;
+
+ /*
+ * We can't remap across the end of VMAs, as another VMA may be
+ * adjacent:
+ *
+ * addr vma->vm_end
+ * |-----.----------|
+ * | . |
+ * |-----.----------|
+ * .<--------->xxx>
+ * old_len
+ *
+ * We also require that vma->vm_start <= addr < vma->vm_end.
+ */
+ if (old_len > vma->vm_end - addr)
+ return -EFAULT;
+
+ if (new_len == old_len)
+ return 0;
+
+ /* We are expanding and the VMA is mlock()'d so we need to populate. */
+ if (vma->vm_flags & VM_LOCKED)
+ vrm->populate_expand = true;
+
+ /* Need to be careful about a growing mapping */
+ pgoff = (addr - vma->vm_start) >> PAGE_SHIFT;
+ pgoff += vma->vm_pgoff;
+ if (pgoff + (new_len >> PAGE_SHIFT) < pgoff)
+ return -EINVAL;
+
+ if (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP))
+ return -EFAULT;
+
+ if (!mlock_future_ok(mm, vma->vm_flags, vrm->delta))
+ return -EAGAIN;
+
+ if (!may_expand_vm(mm, vma->vm_flags, vrm->delta >> PAGE_SHIFT))
+ return -ENOMEM;
+
+ return 0;
+}
+
+/*
+ * Are the parameters passed to mremap() valid? If so return 0, otherwise return
+ * error.
+ */
+static unsigned long check_mremap_params(struct vma_remap_struct *vrm)
+
+{
+ unsigned long addr = vrm->addr;
+ unsigned long flags = vrm->flags;
+
+ /* Ensure no unexpected flag values. */
+ if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP))
+ return -EINVAL;
+
+ /* Start address must be page-aligned. */
+ if (offset_in_page(addr))
+ return -EINVAL;
+
+ /*
+ * We allow a zero old-len as a special case
+ * for DOS-emu "duplicate shm area" thing. But
+ * a zero new-len is nonsensical.
+ */
+ if (!vrm->new_len)
+ return -EINVAL;
+
+ /* Is the new length or address silly? */
+ if (vrm->new_len > TASK_SIZE ||
+ vrm->new_addr > TASK_SIZE - vrm->new_len)
+ return -EINVAL;
- return ret;
+ /* Remainder of checks are for cases with specific new_addr. */
+ if (!vrm_implies_new_addr(vrm))
+ return 0;
+
+ /* The new address must be page-aligned. */
+ if (offset_in_page(vrm->new_addr))
+ return -EINVAL;
+
+ /* A fixed address implies a move. */
+ if (!(flags & MREMAP_MAYMOVE))
+ return -EINVAL;
+
+ /* MREMAP_DONTUNMAP does not allow resizing in the process. */
+ if (flags & MREMAP_DONTUNMAP && vrm->old_len != vrm->new_len)
+ return -EINVAL;
+
+ /* Target VMA must not overlap source VMA. */
+ if (vrm_overlaps(vrm))
+ return -EINVAL;
+
+ /*
+ * move_vma() need us to stay 4 maps below the threshold, otherwise
+ * it will bail out at the very beginning.
+ * That is a problem if we have already unmaped the regions here
+ * (new_addr, and old_addr), because userspace will not know the
+ * state of the vma's after it gets -ENOMEM.
+ * So, to avoid such scenario we can pre-compute if the whole
+ * operation has high chances to success map-wise.
+ * Worst-scenario case is when both vma's (new_addr and old_addr) get
+ * split in 3 before unmapping it.
+ * That means 2 more maps (1 for each) to the ones we already hold.
+ * Check whether current map count plus 2 still leads us to 4 maps below
+ * the threshold, otherwise return -ENOMEM here to be more safe.
+ */
+ if ((current->mm->map_count + 2) >= sysctl_max_map_count - 3)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static unsigned long remap_move(struct vma_remap_struct *vrm)
+{
+ struct vm_area_struct *vma;
+ unsigned long start = vrm->addr;
+ unsigned long end = vrm->addr + vrm->old_len;
+ unsigned long new_addr = vrm->new_addr;
+ bool allowed = true, seen_vma = false;
+ unsigned long target_addr = new_addr;
+ unsigned long res = -EFAULT;
+ unsigned long last_end;
+ VMA_ITERATOR(vmi, current->mm, start);
+
+ /*
+ * When moving VMAs we allow for batched moves across multiple VMAs,
+ * with all VMAs in the input range [addr, addr + old_len) being moved
+ * (and split as necessary).
+ */
+ for_each_vma_range(vmi, vma, end) {
+ /* Account for start, end not aligned with VMA start, end. */
+ unsigned long addr = max(vma->vm_start, start);
+ unsigned long len = min(end, vma->vm_end) - addr;
+ unsigned long offset, res_vma;
+
+ if (!allowed)
+ return -EFAULT;
+
+ /* No gap permitted at the start of the range. */
+ if (!seen_vma && start < vma->vm_start)
+ return -EFAULT;
+
+ /*
+ * To sensibly move multiple VMAs, accounting for the fact that
+ * get_unmapped_area() may align even MAP_FIXED moves, we simply
+ * attempt to move such that the gaps between source VMAs remain
+ * consistent in destination VMAs, e.g.:
+ *
+ * X Y X Y
+ * <---> <-> <---> <->
+ * |-------| |-----| |-----| |-------| |-----| |-----|
+ * | A | | B | | C | ---> | A' | | B' | | C' |
+ * |-------| |-----| |-----| |-------| |-----| |-----|
+ * new_addr
+ *
+ * So we map B' at A'->vm_end + X, and C' at B'->vm_end + Y.
+ */
+ offset = seen_vma ? vma->vm_start - last_end : 0;
+ last_end = vma->vm_end;
+
+ vrm->vma = vma;
+ vrm->addr = addr;
+ vrm->new_addr = target_addr + offset;
+ vrm->old_len = vrm->new_len = len;
+
+ allowed = vma_multi_allowed(vma);
+ if (seen_vma && !allowed)
+ return -EFAULT;
+
+ res_vma = check_prep_vma(vrm);
+ if (!res_vma)
+ res_vma = mremap_to(vrm);
+ if (IS_ERR_VALUE(res_vma))
+ return res_vma;
+
+ if (!seen_vma) {
+ VM_WARN_ON_ONCE(allowed && res_vma != new_addr);
+ res = res_vma;
+ }
+
+ /* mmap lock is only dropped on shrink. */
+ VM_WARN_ON_ONCE(!vrm->mmap_locked);
+ /* This is a move, no expand should occur. */
+ VM_WARN_ON_ONCE(vrm->populate_expand);
+
+ if (vrm->vmi_needs_invalidate) {
+ vma_iter_invalidate(&vmi);
+ vrm->vmi_needs_invalidate = false;
+ }
+ seen_vma = true;
+ target_addr = res_vma + vrm->new_len;
+ }
+
+ return res;
+}
+
+static unsigned long do_mremap(struct vma_remap_struct *vrm)
+{
+ struct mm_struct *mm = current->mm;
+ unsigned long res;
+ bool failed;
+
+ vrm->old_len = PAGE_ALIGN(vrm->old_len);
+ vrm->new_len = PAGE_ALIGN(vrm->new_len);
+
+ res = check_mremap_params(vrm);
+ if (res)
+ return res;
+
+ if (mmap_write_lock_killable(mm))
+ return -EINTR;
+ vrm->mmap_locked = true;
+
+ if (vrm_move_only(vrm)) {
+ res = remap_move(vrm);
+ } else {
+ vrm->vma = vma_lookup(current->mm, vrm->addr);
+ res = check_prep_vma(vrm);
+ if (res)
+ goto out;
+
+ /* Actually execute mremap. */
+ res = vrm_implies_new_addr(vrm) ? mremap_to(vrm) : mremap_at(vrm);
+ }
+
+out:
+ failed = IS_ERR_VALUE(res);
+
+ if (vrm->mmap_locked)
+ mmap_write_unlock(mm);
+
+ /* VMA mlock'd + was expanded, so populated expanded region. */
+ if (!failed && vrm->populate_expand)
+ mm_populate(vrm->new_addr + vrm->old_len, vrm->delta);
+
+ notify_uffd(vrm, failed);
+ return res;
}
/*