diff options
Diffstat (limited to 'mm/vma.c')
| -rw-r--r-- | mm/vma.c | 229 |
1 files changed, 138 insertions, 91 deletions
@@ -34,7 +34,9 @@ struct mmap_state { struct maple_tree mt_detach; /* Determine if we can check KSM flags early in mmap() logic. */ - bool check_ksm_early; + bool check_ksm_early :1; + /* If we map new, hold the file rmap lock on mapping. */ + bool hold_file_rmap_lock :1; }; #define MMAP_STATE(name, mm_, vmi_, addr_, len_, pgoff_, vm_flags_, file_) \ @@ -87,15 +89,7 @@ static inline bool is_mergeable_vma(struct vma_merge_struct *vmg, bool merge_nex if (!mpol_equal(vmg->policy, vma_policy(vma))) return false; - /* - * VM_SOFTDIRTY should not prevent from VMA merging, if we - * match the flags but dirty bit -- the caller should mark - * merged VMA as dirty. If dirty bit won't be excluded from - * comparison, we increase pressure on the memory system forcing - * the kernel to generate new VMAs when old one could be - * extended instead. - */ - if ((vma->vm_flags ^ vmg->vm_flags) & ~VM_SOFTDIRTY) + if ((vma->vm_flags ^ vmg->vm_flags) & ~VM_IGNORE_MERGE) return false; if (vma->vm_file != vmg->file) return false; @@ -109,7 +103,7 @@ static inline bool is_mergeable_vma(struct vma_merge_struct *vmg, bool merge_nex static bool is_mergeable_anon_vma(struct vma_merge_struct *vmg, bool merge_next) { struct vm_area_struct *tgt = merge_next ? vmg->next : vmg->prev; - struct vm_area_struct *src = vmg->middle; /* exisitng merge case. */ + struct vm_area_struct *src = vmg->middle; /* existing merge case. */ struct anon_vma *tgt_anon = tgt->anon_vma; struct anon_vma *src_anon = vmg->anon_vma; @@ -481,8 +475,7 @@ void unmap_region(struct ma_state *mas, struct vm_area_struct *vma, tlb_gather_mmu(&tlb, mm); update_hiwater_rss(mm); - unmap_vmas(&tlb, mas, vma, vma->vm_start, vma->vm_end, vma->vm_end, - /* mm_wr_locked = */ true); + unmap_vmas(&tlb, mas, vma, vma->vm_start, vma->vm_end, vma->vm_end); mas_set(mas, vma->vm_end); free_pgtables(&tlb, mas, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS, next ? next->vm_start : USER_PGTABLES_CEILING, @@ -798,7 +791,7 @@ static bool can_merge_remove_vma(struct vm_area_struct *vma) * Returns: The merged VMA if merge succeeds, or NULL otherwise. * * ASSUMPTIONS: - * - The caller must assign the VMA to be modifed to @vmg->middle. + * - The caller must assign the VMA to be modified to @vmg->middle. * - The caller must have set @vmg->prev to the previous VMA, if there is one. * - The caller must not set @vmg->next, as we determine this. * - The caller must hold a WRITE lock on the mm_struct->mmap_lock. @@ -807,6 +800,7 @@ static bool can_merge_remove_vma(struct vm_area_struct *vma) static __must_check struct vm_area_struct *vma_merge_existing_range( struct vma_merge_struct *vmg) { + vm_flags_t sticky_flags = vmg->vm_flags & VM_STICKY; struct vm_area_struct *middle = vmg->middle; struct vm_area_struct *prev = vmg->prev; struct vm_area_struct *next; @@ -899,11 +893,13 @@ static __must_check struct vm_area_struct *vma_merge_existing_range( if (merge_right) { vma_start_write(next); vmg->target = next; + sticky_flags |= (next->vm_flags & VM_STICKY); } if (merge_left) { vma_start_write(prev); vmg->target = prev; + sticky_flags |= (prev->vm_flags & VM_STICKY); } if (merge_both) { @@ -973,6 +969,7 @@ static __must_check struct vm_area_struct *vma_merge_existing_range( if (err || commit_merge(vmg)) goto abort; + vm_flags_set(vmg->target, sticky_flags); khugepaged_enter_vma(vmg->target, vmg->vm_flags); vmg->state = VMA_MERGE_SUCCESS; return vmg->target; @@ -1123,6 +1120,10 @@ int vma_expand(struct vma_merge_struct *vmg) bool remove_next = false; struct vm_area_struct *target = vmg->target; struct vm_area_struct *next = vmg->next; + vm_flags_t sticky_flags; + + sticky_flags = vmg->vm_flags & VM_STICKY; + sticky_flags |= target->vm_flags & VM_STICKY; VM_WARN_ON_VMG(!target, vmg); @@ -1132,6 +1133,7 @@ int vma_expand(struct vma_merge_struct *vmg) if (next && (target != next) && (vmg->end == next->vm_end)) { int ret; + sticky_flags |= next->vm_flags & VM_STICKY; remove_next = true; /* This should already have been checked by this point. */ VM_WARN_ON_VMG(!can_merge_remove_vma(next), vmg); @@ -1158,6 +1160,7 @@ int vma_expand(struct vma_merge_struct *vmg) if (commit_merge(vmg)) goto nomem; + vm_flags_set(target, sticky_flags); return 0; nomem: @@ -1226,7 +1229,7 @@ static inline void vms_clear_ptes(struct vma_munmap_struct *vms, tlb_gather_mmu(&tlb, vms->vma->vm_mm); update_hiwater_rss(vms->vma->vm_mm); unmap_vmas(&tlb, mas_detach, vms->vma, vms->start, vms->end, - vms->vma_count, mm_wr_locked); + vms->vma_count); mas_set(mas_detach, 1); /* start and end may be different if there is no prev or next vma. */ @@ -1637,25 +1640,35 @@ static struct vm_area_struct *vma_modify(struct vma_merge_struct *vmg) return vma; } -struct vm_area_struct *vma_modify_flags( - struct vma_iterator *vmi, struct vm_area_struct *prev, - struct vm_area_struct *vma, unsigned long start, unsigned long end, - vm_flags_t vm_flags) +struct vm_area_struct *vma_modify_flags(struct vma_iterator *vmi, + struct vm_area_struct *prev, struct vm_area_struct *vma, + unsigned long start, unsigned long end, + vm_flags_t *vm_flags_ptr) { VMG_VMA_STATE(vmg, vmi, prev, vma, start, end); + const vm_flags_t vm_flags = *vm_flags_ptr; + struct vm_area_struct *ret; vmg.vm_flags = vm_flags; - return vma_modify(&vmg); + ret = vma_modify(&vmg); + if (IS_ERR(ret)) + return ret; + + /* + * For a merge to succeed, the flags must match those + * requested. However, sticky flags may have been retained, so propagate + * them to the caller. + */ + if (vmg.state == VMA_MERGE_SUCCESS) + *vm_flags_ptr = ret->vm_flags; + return ret; } -struct vm_area_struct -*vma_modify_name(struct vma_iterator *vmi, - struct vm_area_struct *prev, - struct vm_area_struct *vma, - unsigned long start, - unsigned long end, - struct anon_vma_name *new_name) +struct vm_area_struct *vma_modify_name(struct vma_iterator *vmi, + struct vm_area_struct *prev, struct vm_area_struct *vma, + unsigned long start, unsigned long end, + struct anon_vma_name *new_name) { VMG_VMA_STATE(vmg, vmi, prev, vma, start, end); @@ -1664,12 +1677,10 @@ struct vm_area_struct return vma_modify(&vmg); } -struct vm_area_struct -*vma_modify_policy(struct vma_iterator *vmi, - struct vm_area_struct *prev, - struct vm_area_struct *vma, - unsigned long start, unsigned long end, - struct mempolicy *new_pol) +struct vm_area_struct *vma_modify_policy(struct vma_iterator *vmi, + struct vm_area_struct *prev, struct vm_area_struct *vma, + unsigned long start, unsigned long end, + struct mempolicy *new_pol) { VMG_VMA_STATE(vmg, vmi, prev, vma, start, end); @@ -1678,14 +1689,10 @@ struct vm_area_struct return vma_modify(&vmg); } -struct vm_area_struct -*vma_modify_flags_uffd(struct vma_iterator *vmi, - struct vm_area_struct *prev, - struct vm_area_struct *vma, - unsigned long start, unsigned long end, - vm_flags_t vm_flags, - struct vm_userfaultfd_ctx new_ctx, - bool give_up_on_oom) +struct vm_area_struct *vma_modify_flags_uffd(struct vma_iterator *vmi, + struct vm_area_struct *prev, struct vm_area_struct *vma, + unsigned long start, unsigned long end, vm_flags_t vm_flags, + struct vm_userfaultfd_ctx new_ctx, bool give_up_on_oom) { VMG_VMA_STATE(vmg, vmi, prev, vma, start, end); @@ -1754,24 +1761,7 @@ void unlink_file_vma_batch_final(struct unlink_vma_file_batch *vb) unlink_file_vma_batch_process(vb); } -/* - * Unlink a file-based vm structure from its interval tree, to hide - * vma from rmap and vmtruncate before freeing its page tables. - */ -void unlink_file_vma(struct vm_area_struct *vma) -{ - struct file *file = vma->vm_file; - - if (file) { - struct address_space *mapping = file->f_mapping; - - i_mmap_lock_write(mapping); - __remove_shared_vm_struct(vma, mapping); - i_mmap_unlock_write(mapping); - } -} - -void vma_link_file(struct vm_area_struct *vma) +static void vma_link_file(struct vm_area_struct *vma, bool hold_rmap_lock) { struct file *file = vma->vm_file; struct address_space *mapping; @@ -1780,11 +1770,12 @@ void vma_link_file(struct vm_area_struct *vma) mapping = file->f_mapping; i_mmap_lock_write(mapping); __vma_link_file(vma, mapping); - i_mmap_unlock_write(mapping); + if (!hold_rmap_lock) + i_mmap_unlock_write(mapping); } } -int vma_link(struct mm_struct *mm, struct vm_area_struct *vma) +static int vma_link(struct mm_struct *mm, struct vm_area_struct *vma) { VMA_ITERATOR(vmi, mm, 0); @@ -1794,7 +1785,7 @@ int vma_link(struct mm_struct *mm, struct vm_area_struct *vma) vma_start_write(vma); vma_iter_store_new(&vmi, vma); - vma_link_file(vma); + vma_link_file(vma, /* hold_rmap_lock= */false); mm->map_count++; validate_mm(mm); return 0; @@ -1917,7 +1908,7 @@ static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct * return a->vm_end == b->vm_start && mpol_equal(vma_policy(a), vma_policy(b)) && a->vm_file == b->vm_file && - !((a->vm_flags ^ b->vm_flags) & ~(VM_ACCESS_FLAGS | VM_SOFTDIRTY)) && + !((a->vm_flags ^ b->vm_flags) & ~(VM_ACCESS_FLAGS | VM_IGNORE_MERGE)) && b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT); } @@ -2328,17 +2319,33 @@ static void update_ksm_flags(struct mmap_state *map) map->vm_flags = ksm_vma_flags(map->mm, map->file, map->vm_flags); } +static void set_desc_from_map(struct vm_area_desc *desc, + const struct mmap_state *map) +{ + desc->start = map->addr; + desc->end = map->end; + + desc->pgoff = map->pgoff; + desc->vm_file = map->file; + desc->vm_flags = map->vm_flags; + desc->page_prot = map->page_prot; +} + /* - * __mmap_prepare() - Prepare to gather any overlapping VMAs that need to be + * __mmap_setup() - Prepare to gather any overlapping VMAs that need to be * unmapped once the map operation is completed, check limits, account mapping * and clean up any pre-existing VMAs. * + * As a result it sets up the @map and @desc objects. + * * @map: Mapping state. + * @desc: VMA descriptor * @uf: Userfaultfd context list. * * Returns: 0 on success, error code otherwise. */ -static int __mmap_prepare(struct mmap_state *map, struct list_head *uf) +static int __mmap_setup(struct mmap_state *map, struct vm_area_desc *desc, + struct list_head *uf) { int error; struct vma_iterator *vmi = map->vmi; @@ -2395,6 +2402,7 @@ static int __mmap_prepare(struct mmap_state *map, struct list_head *uf) */ vms_clean_up_area(vms, &map->mas_detach); + set_desc_from_map(desc, map); return 0; } @@ -2496,7 +2504,7 @@ static int __mmap_new_vma(struct mmap_state *map, struct vm_area_struct **vmap) vma_start_write(vma); vma_iter_store_new(vmi, vma); map->mm->map_count++; - vma_link_file(vma); + vma_link_file(vma, map->hold_file_rmap_lock); /* * vma_merge_new_range() calls khugepaged_enter_vma() too, the below @@ -2551,11 +2559,23 @@ static void __mmap_complete(struct mmap_state *map, struct vm_area_struct *vma) * then new mapped in-place (which must be aimed as * a completely new data area). */ - vm_flags_set(vma, VM_SOFTDIRTY); + if (pgtable_supports_soft_dirty()) + vm_flags_set(vma, VM_SOFTDIRTY); vma_set_page_prot(vma); } +static void call_action_prepare(struct mmap_state *map, + struct vm_area_desc *desc) +{ + struct mmap_action *action = &desc->action; + + mmap_action_prepare(action, desc); + + if (action->hide_from_rmap_until_complete) + map->hold_file_rmap_lock = true; +} + /* * Invoke the f_op->mmap_prepare() callback for a file-backed mapping that * specifies it. @@ -2567,34 +2587,26 @@ static void __mmap_complete(struct mmap_state *map, struct vm_area_struct *vma) * * Returns 0 on success, or an error code otherwise. */ -static int call_mmap_prepare(struct mmap_state *map) +static int call_mmap_prepare(struct mmap_state *map, + struct vm_area_desc *desc) { int err; - struct vm_area_desc desc = { - .mm = map->mm, - .file = map->file, - .start = map->addr, - .end = map->end, - - .pgoff = map->pgoff, - .vm_file = map->file, - .vm_flags = map->vm_flags, - .page_prot = map->page_prot, - }; /* Invoke the hook. */ - err = vfs_mmap_prepare(map->file, &desc); + err = vfs_mmap_prepare(map->file, desc); if (err) return err; + call_action_prepare(map, desc); + /* Update fields permitted to be changed. */ - map->pgoff = desc.pgoff; - map->file = desc.vm_file; - map->vm_flags = desc.vm_flags; - map->page_prot = desc.page_prot; + map->pgoff = desc->pgoff; + map->file = desc->vm_file; + map->vm_flags = desc->vm_flags; + map->page_prot = desc->page_prot; /* User-defined fields. */ - map->vm_ops = desc.vm_ops; - map->vm_private_data = desc.private_data; + map->vm_ops = desc->vm_ops; + map->vm_private_data = desc->private_data; return 0; } @@ -2636,22 +2648,48 @@ static bool can_set_ksm_flags_early(struct mmap_state *map) return false; } +static int call_action_complete(struct mmap_state *map, + struct vm_area_desc *desc, + struct vm_area_struct *vma) +{ + struct mmap_action *action = &desc->action; + int ret; + + ret = mmap_action_complete(action, vma); + + /* If we held the file rmap we need to release it. */ + if (map->hold_file_rmap_lock) { + struct file *file = vma->vm_file; + + i_mmap_unlock_write(file->f_mapping); + } + return ret; +} + static unsigned long __mmap_region(struct file *file, unsigned long addr, unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, struct list_head *uf) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma = NULL; - int error; bool have_mmap_prepare = file && file->f_op->mmap_prepare; VMA_ITERATOR(vmi, mm, addr); MMAP_STATE(map, mm, &vmi, addr, len, pgoff, vm_flags, file); + struct vm_area_desc desc = { + .mm = mm, + .file = file, + .action = { + .type = MMAP_NOTHING, /* Default to no further action. */ + }, + }; + bool allocated_new = false; + int error; map.check_ksm_early = can_set_ksm_flags_early(&map); - error = __mmap_prepare(&map, uf); + error = __mmap_setup(&map, &desc, uf); if (!error && have_mmap_prepare) - error = call_mmap_prepare(&map); + error = call_mmap_prepare(&map, &desc); if (error) goto abort_munmap; @@ -2670,6 +2708,7 @@ static unsigned long __mmap_region(struct file *file, unsigned long addr, error = __mmap_new_vma(&map, &vma); if (error) goto unacct_error; + allocated_new = true; } if (have_mmap_prepare) @@ -2677,9 +2716,16 @@ static unsigned long __mmap_region(struct file *file, unsigned long addr, __mmap_complete(&map, vma); + if (have_mmap_prepare && allocated_new) { + error = call_action_complete(&map, &desc, vma); + + if (error) + return error; + } + return addr; - /* Accounting was done by __mmap_prepare(). */ + /* Accounting was done by __mmap_setup(). */ unacct_error: if (map.charged) vm_unacct_memory(map.charged); @@ -2819,7 +2865,8 @@ out: mm->data_vm += len >> PAGE_SHIFT; if (vm_flags & VM_LOCKED) mm->locked_vm += (len >> PAGE_SHIFT); - vm_flags_set(vma, VM_SOFTDIRTY); + if (pgtable_supports_soft_dirty()) + vm_flags_set(vma, VM_SOFTDIRTY); return 0; mas_store_fail: |
