summaryrefslogtreecommitdiff
path: root/mm/vma.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/vma.c')
-rw-r--r--mm/vma.c229
1 files changed, 138 insertions, 91 deletions
diff --git a/mm/vma.c b/mm/vma.c
index abe0da33c844..fc90befd162f 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -34,7 +34,9 @@ struct mmap_state {
struct maple_tree mt_detach;
/* Determine if we can check KSM flags early in mmap() logic. */
- bool check_ksm_early;
+ bool check_ksm_early :1;
+ /* If we map new, hold the file rmap lock on mapping. */
+ bool hold_file_rmap_lock :1;
};
#define MMAP_STATE(name, mm_, vmi_, addr_, len_, pgoff_, vm_flags_, file_) \
@@ -87,15 +89,7 @@ static inline bool is_mergeable_vma(struct vma_merge_struct *vmg, bool merge_nex
if (!mpol_equal(vmg->policy, vma_policy(vma)))
return false;
- /*
- * VM_SOFTDIRTY should not prevent from VMA merging, if we
- * match the flags but dirty bit -- the caller should mark
- * merged VMA as dirty. If dirty bit won't be excluded from
- * comparison, we increase pressure on the memory system forcing
- * the kernel to generate new VMAs when old one could be
- * extended instead.
- */
- if ((vma->vm_flags ^ vmg->vm_flags) & ~VM_SOFTDIRTY)
+ if ((vma->vm_flags ^ vmg->vm_flags) & ~VM_IGNORE_MERGE)
return false;
if (vma->vm_file != vmg->file)
return false;
@@ -109,7 +103,7 @@ static inline bool is_mergeable_vma(struct vma_merge_struct *vmg, bool merge_nex
static bool is_mergeable_anon_vma(struct vma_merge_struct *vmg, bool merge_next)
{
struct vm_area_struct *tgt = merge_next ? vmg->next : vmg->prev;
- struct vm_area_struct *src = vmg->middle; /* exisitng merge case. */
+ struct vm_area_struct *src = vmg->middle; /* existing merge case. */
struct anon_vma *tgt_anon = tgt->anon_vma;
struct anon_vma *src_anon = vmg->anon_vma;
@@ -481,8 +475,7 @@ void unmap_region(struct ma_state *mas, struct vm_area_struct *vma,
tlb_gather_mmu(&tlb, mm);
update_hiwater_rss(mm);
- unmap_vmas(&tlb, mas, vma, vma->vm_start, vma->vm_end, vma->vm_end,
- /* mm_wr_locked = */ true);
+ unmap_vmas(&tlb, mas, vma, vma->vm_start, vma->vm_end, vma->vm_end);
mas_set(mas, vma->vm_end);
free_pgtables(&tlb, mas, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
next ? next->vm_start : USER_PGTABLES_CEILING,
@@ -798,7 +791,7 @@ static bool can_merge_remove_vma(struct vm_area_struct *vma)
* Returns: The merged VMA if merge succeeds, or NULL otherwise.
*
* ASSUMPTIONS:
- * - The caller must assign the VMA to be modifed to @vmg->middle.
+ * - The caller must assign the VMA to be modified to @vmg->middle.
* - The caller must have set @vmg->prev to the previous VMA, if there is one.
* - The caller must not set @vmg->next, as we determine this.
* - The caller must hold a WRITE lock on the mm_struct->mmap_lock.
@@ -807,6 +800,7 @@ static bool can_merge_remove_vma(struct vm_area_struct *vma)
static __must_check struct vm_area_struct *vma_merge_existing_range(
struct vma_merge_struct *vmg)
{
+ vm_flags_t sticky_flags = vmg->vm_flags & VM_STICKY;
struct vm_area_struct *middle = vmg->middle;
struct vm_area_struct *prev = vmg->prev;
struct vm_area_struct *next;
@@ -899,11 +893,13 @@ static __must_check struct vm_area_struct *vma_merge_existing_range(
if (merge_right) {
vma_start_write(next);
vmg->target = next;
+ sticky_flags |= (next->vm_flags & VM_STICKY);
}
if (merge_left) {
vma_start_write(prev);
vmg->target = prev;
+ sticky_flags |= (prev->vm_flags & VM_STICKY);
}
if (merge_both) {
@@ -973,6 +969,7 @@ static __must_check struct vm_area_struct *vma_merge_existing_range(
if (err || commit_merge(vmg))
goto abort;
+ vm_flags_set(vmg->target, sticky_flags);
khugepaged_enter_vma(vmg->target, vmg->vm_flags);
vmg->state = VMA_MERGE_SUCCESS;
return vmg->target;
@@ -1123,6 +1120,10 @@ int vma_expand(struct vma_merge_struct *vmg)
bool remove_next = false;
struct vm_area_struct *target = vmg->target;
struct vm_area_struct *next = vmg->next;
+ vm_flags_t sticky_flags;
+
+ sticky_flags = vmg->vm_flags & VM_STICKY;
+ sticky_flags |= target->vm_flags & VM_STICKY;
VM_WARN_ON_VMG(!target, vmg);
@@ -1132,6 +1133,7 @@ int vma_expand(struct vma_merge_struct *vmg)
if (next && (target != next) && (vmg->end == next->vm_end)) {
int ret;
+ sticky_flags |= next->vm_flags & VM_STICKY;
remove_next = true;
/* This should already have been checked by this point. */
VM_WARN_ON_VMG(!can_merge_remove_vma(next), vmg);
@@ -1158,6 +1160,7 @@ int vma_expand(struct vma_merge_struct *vmg)
if (commit_merge(vmg))
goto nomem;
+ vm_flags_set(target, sticky_flags);
return 0;
nomem:
@@ -1226,7 +1229,7 @@ static inline void vms_clear_ptes(struct vma_munmap_struct *vms,
tlb_gather_mmu(&tlb, vms->vma->vm_mm);
update_hiwater_rss(vms->vma->vm_mm);
unmap_vmas(&tlb, mas_detach, vms->vma, vms->start, vms->end,
- vms->vma_count, mm_wr_locked);
+ vms->vma_count);
mas_set(mas_detach, 1);
/* start and end may be different if there is no prev or next vma. */
@@ -1637,25 +1640,35 @@ static struct vm_area_struct *vma_modify(struct vma_merge_struct *vmg)
return vma;
}
-struct vm_area_struct *vma_modify_flags(
- struct vma_iterator *vmi, struct vm_area_struct *prev,
- struct vm_area_struct *vma, unsigned long start, unsigned long end,
- vm_flags_t vm_flags)
+struct vm_area_struct *vma_modify_flags(struct vma_iterator *vmi,
+ struct vm_area_struct *prev, struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ vm_flags_t *vm_flags_ptr)
{
VMG_VMA_STATE(vmg, vmi, prev, vma, start, end);
+ const vm_flags_t vm_flags = *vm_flags_ptr;
+ struct vm_area_struct *ret;
vmg.vm_flags = vm_flags;
- return vma_modify(&vmg);
+ ret = vma_modify(&vmg);
+ if (IS_ERR(ret))
+ return ret;
+
+ /*
+ * For a merge to succeed, the flags must match those
+ * requested. However, sticky flags may have been retained, so propagate
+ * them to the caller.
+ */
+ if (vmg.state == VMA_MERGE_SUCCESS)
+ *vm_flags_ptr = ret->vm_flags;
+ return ret;
}
-struct vm_area_struct
-*vma_modify_name(struct vma_iterator *vmi,
- struct vm_area_struct *prev,
- struct vm_area_struct *vma,
- unsigned long start,
- unsigned long end,
- struct anon_vma_name *new_name)
+struct vm_area_struct *vma_modify_name(struct vma_iterator *vmi,
+ struct vm_area_struct *prev, struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ struct anon_vma_name *new_name)
{
VMG_VMA_STATE(vmg, vmi, prev, vma, start, end);
@@ -1664,12 +1677,10 @@ struct vm_area_struct
return vma_modify(&vmg);
}
-struct vm_area_struct
-*vma_modify_policy(struct vma_iterator *vmi,
- struct vm_area_struct *prev,
- struct vm_area_struct *vma,
- unsigned long start, unsigned long end,
- struct mempolicy *new_pol)
+struct vm_area_struct *vma_modify_policy(struct vma_iterator *vmi,
+ struct vm_area_struct *prev, struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ struct mempolicy *new_pol)
{
VMG_VMA_STATE(vmg, vmi, prev, vma, start, end);
@@ -1678,14 +1689,10 @@ struct vm_area_struct
return vma_modify(&vmg);
}
-struct vm_area_struct
-*vma_modify_flags_uffd(struct vma_iterator *vmi,
- struct vm_area_struct *prev,
- struct vm_area_struct *vma,
- unsigned long start, unsigned long end,
- vm_flags_t vm_flags,
- struct vm_userfaultfd_ctx new_ctx,
- bool give_up_on_oom)
+struct vm_area_struct *vma_modify_flags_uffd(struct vma_iterator *vmi,
+ struct vm_area_struct *prev, struct vm_area_struct *vma,
+ unsigned long start, unsigned long end, vm_flags_t vm_flags,
+ struct vm_userfaultfd_ctx new_ctx, bool give_up_on_oom)
{
VMG_VMA_STATE(vmg, vmi, prev, vma, start, end);
@@ -1754,24 +1761,7 @@ void unlink_file_vma_batch_final(struct unlink_vma_file_batch *vb)
unlink_file_vma_batch_process(vb);
}
-/*
- * Unlink a file-based vm structure from its interval tree, to hide
- * vma from rmap and vmtruncate before freeing its page tables.
- */
-void unlink_file_vma(struct vm_area_struct *vma)
-{
- struct file *file = vma->vm_file;
-
- if (file) {
- struct address_space *mapping = file->f_mapping;
-
- i_mmap_lock_write(mapping);
- __remove_shared_vm_struct(vma, mapping);
- i_mmap_unlock_write(mapping);
- }
-}
-
-void vma_link_file(struct vm_area_struct *vma)
+static void vma_link_file(struct vm_area_struct *vma, bool hold_rmap_lock)
{
struct file *file = vma->vm_file;
struct address_space *mapping;
@@ -1780,11 +1770,12 @@ void vma_link_file(struct vm_area_struct *vma)
mapping = file->f_mapping;
i_mmap_lock_write(mapping);
__vma_link_file(vma, mapping);
- i_mmap_unlock_write(mapping);
+ if (!hold_rmap_lock)
+ i_mmap_unlock_write(mapping);
}
}
-int vma_link(struct mm_struct *mm, struct vm_area_struct *vma)
+static int vma_link(struct mm_struct *mm, struct vm_area_struct *vma)
{
VMA_ITERATOR(vmi, mm, 0);
@@ -1794,7 +1785,7 @@ int vma_link(struct mm_struct *mm, struct vm_area_struct *vma)
vma_start_write(vma);
vma_iter_store_new(&vmi, vma);
- vma_link_file(vma);
+ vma_link_file(vma, /* hold_rmap_lock= */false);
mm->map_count++;
validate_mm(mm);
return 0;
@@ -1917,7 +1908,7 @@ static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *
return a->vm_end == b->vm_start &&
mpol_equal(vma_policy(a), vma_policy(b)) &&
a->vm_file == b->vm_file &&
- !((a->vm_flags ^ b->vm_flags) & ~(VM_ACCESS_FLAGS | VM_SOFTDIRTY)) &&
+ !((a->vm_flags ^ b->vm_flags) & ~(VM_ACCESS_FLAGS | VM_IGNORE_MERGE)) &&
b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT);
}
@@ -2328,17 +2319,33 @@ static void update_ksm_flags(struct mmap_state *map)
map->vm_flags = ksm_vma_flags(map->mm, map->file, map->vm_flags);
}
+static void set_desc_from_map(struct vm_area_desc *desc,
+ const struct mmap_state *map)
+{
+ desc->start = map->addr;
+ desc->end = map->end;
+
+ desc->pgoff = map->pgoff;
+ desc->vm_file = map->file;
+ desc->vm_flags = map->vm_flags;
+ desc->page_prot = map->page_prot;
+}
+
/*
- * __mmap_prepare() - Prepare to gather any overlapping VMAs that need to be
+ * __mmap_setup() - Prepare to gather any overlapping VMAs that need to be
* unmapped once the map operation is completed, check limits, account mapping
* and clean up any pre-existing VMAs.
*
+ * As a result it sets up the @map and @desc objects.
+ *
* @map: Mapping state.
+ * @desc: VMA descriptor
* @uf: Userfaultfd context list.
*
* Returns: 0 on success, error code otherwise.
*/
-static int __mmap_prepare(struct mmap_state *map, struct list_head *uf)
+static int __mmap_setup(struct mmap_state *map, struct vm_area_desc *desc,
+ struct list_head *uf)
{
int error;
struct vma_iterator *vmi = map->vmi;
@@ -2395,6 +2402,7 @@ static int __mmap_prepare(struct mmap_state *map, struct list_head *uf)
*/
vms_clean_up_area(vms, &map->mas_detach);
+ set_desc_from_map(desc, map);
return 0;
}
@@ -2496,7 +2504,7 @@ static int __mmap_new_vma(struct mmap_state *map, struct vm_area_struct **vmap)
vma_start_write(vma);
vma_iter_store_new(vmi, vma);
map->mm->map_count++;
- vma_link_file(vma);
+ vma_link_file(vma, map->hold_file_rmap_lock);
/*
* vma_merge_new_range() calls khugepaged_enter_vma() too, the below
@@ -2551,11 +2559,23 @@ static void __mmap_complete(struct mmap_state *map, struct vm_area_struct *vma)
* then new mapped in-place (which must be aimed as
* a completely new data area).
*/
- vm_flags_set(vma, VM_SOFTDIRTY);
+ if (pgtable_supports_soft_dirty())
+ vm_flags_set(vma, VM_SOFTDIRTY);
vma_set_page_prot(vma);
}
+static void call_action_prepare(struct mmap_state *map,
+ struct vm_area_desc *desc)
+{
+ struct mmap_action *action = &desc->action;
+
+ mmap_action_prepare(action, desc);
+
+ if (action->hide_from_rmap_until_complete)
+ map->hold_file_rmap_lock = true;
+}
+
/*
* Invoke the f_op->mmap_prepare() callback for a file-backed mapping that
* specifies it.
@@ -2567,34 +2587,26 @@ static void __mmap_complete(struct mmap_state *map, struct vm_area_struct *vma)
*
* Returns 0 on success, or an error code otherwise.
*/
-static int call_mmap_prepare(struct mmap_state *map)
+static int call_mmap_prepare(struct mmap_state *map,
+ struct vm_area_desc *desc)
{
int err;
- struct vm_area_desc desc = {
- .mm = map->mm,
- .file = map->file,
- .start = map->addr,
- .end = map->end,
-
- .pgoff = map->pgoff,
- .vm_file = map->file,
- .vm_flags = map->vm_flags,
- .page_prot = map->page_prot,
- };
/* Invoke the hook. */
- err = vfs_mmap_prepare(map->file, &desc);
+ err = vfs_mmap_prepare(map->file, desc);
if (err)
return err;
+ call_action_prepare(map, desc);
+
/* Update fields permitted to be changed. */
- map->pgoff = desc.pgoff;
- map->file = desc.vm_file;
- map->vm_flags = desc.vm_flags;
- map->page_prot = desc.page_prot;
+ map->pgoff = desc->pgoff;
+ map->file = desc->vm_file;
+ map->vm_flags = desc->vm_flags;
+ map->page_prot = desc->page_prot;
/* User-defined fields. */
- map->vm_ops = desc.vm_ops;
- map->vm_private_data = desc.private_data;
+ map->vm_ops = desc->vm_ops;
+ map->vm_private_data = desc->private_data;
return 0;
}
@@ -2636,22 +2648,48 @@ static bool can_set_ksm_flags_early(struct mmap_state *map)
return false;
}
+static int call_action_complete(struct mmap_state *map,
+ struct vm_area_desc *desc,
+ struct vm_area_struct *vma)
+{
+ struct mmap_action *action = &desc->action;
+ int ret;
+
+ ret = mmap_action_complete(action, vma);
+
+ /* If we held the file rmap we need to release it. */
+ if (map->hold_file_rmap_lock) {
+ struct file *file = vma->vm_file;
+
+ i_mmap_unlock_write(file->f_mapping);
+ }
+ return ret;
+}
+
static unsigned long __mmap_region(struct file *file, unsigned long addr,
unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
struct list_head *uf)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma = NULL;
- int error;
bool have_mmap_prepare = file && file->f_op->mmap_prepare;
VMA_ITERATOR(vmi, mm, addr);
MMAP_STATE(map, mm, &vmi, addr, len, pgoff, vm_flags, file);
+ struct vm_area_desc desc = {
+ .mm = mm,
+ .file = file,
+ .action = {
+ .type = MMAP_NOTHING, /* Default to no further action. */
+ },
+ };
+ bool allocated_new = false;
+ int error;
map.check_ksm_early = can_set_ksm_flags_early(&map);
- error = __mmap_prepare(&map, uf);
+ error = __mmap_setup(&map, &desc, uf);
if (!error && have_mmap_prepare)
- error = call_mmap_prepare(&map);
+ error = call_mmap_prepare(&map, &desc);
if (error)
goto abort_munmap;
@@ -2670,6 +2708,7 @@ static unsigned long __mmap_region(struct file *file, unsigned long addr,
error = __mmap_new_vma(&map, &vma);
if (error)
goto unacct_error;
+ allocated_new = true;
}
if (have_mmap_prepare)
@@ -2677,9 +2716,16 @@ static unsigned long __mmap_region(struct file *file, unsigned long addr,
__mmap_complete(&map, vma);
+ if (have_mmap_prepare && allocated_new) {
+ error = call_action_complete(&map, &desc, vma);
+
+ if (error)
+ return error;
+ }
+
return addr;
- /* Accounting was done by __mmap_prepare(). */
+ /* Accounting was done by __mmap_setup(). */
unacct_error:
if (map.charged)
vm_unacct_memory(map.charged);
@@ -2819,7 +2865,8 @@ out:
mm->data_vm += len >> PAGE_SHIFT;
if (vm_flags & VM_LOCKED)
mm->locked_vm += (len >> PAGE_SHIFT);
- vm_flags_set(vma, VM_SOFTDIRTY);
+ if (pgtable_supports_soft_dirty())
+ vm_flags_set(vma, VM_SOFTDIRTY);
return 0;
mas_store_fail: