From fd892593d44d8b649caf30a67f0c7696d976d901 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Mon, 24 Jul 2023 14:31:45 -0400 Subject: mm: change do_vmi_align_munmap() tracking of VMAs to remove The majority of the calls to munmap a vm range is within a single vma. The maple tree is able to store a single entry at 0, with a size of 1 as a pointer and avoid any allocations. Change do_vmi_align_munmap() to store the VMAs being munmap()'ed into a tree indexed by the count. This will leverage the ability to store the first entry without a node allocation. Storing the entries into a tree by the count and not the vma start and end means changing the functions which iterate over the entries. Update unmap_vmas() and free_pgtables() to take a maple state and a tree end address to support this functionality. Passing through the same maple state to unmap_vmas() and free_pgtables() means the state needs to be reset between calls. This happens in the static unmap_region() and exit_mmap(). Link: https://lkml.kernel.org/r/20230724183157.3939892-4-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Cc: Peng Zhang Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- mm/mmap.c | 41 ++++++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 17 deletions(-) (limited to 'mm/mmap.c') diff --git a/mm/mmap.c b/mm/mmap.c index 4a9466b76648..5212a0b66b8f 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -76,10 +76,10 @@ int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS; static bool ignore_rlimit_data; core_param(ignore_rlimit_data, ignore_rlimit_data, bool, 0644); -static void unmap_region(struct mm_struct *mm, struct maple_tree *mt, +static void unmap_region(struct mm_struct *mm, struct ma_state *mas, struct vm_area_struct *vma, struct vm_area_struct *prev, struct vm_area_struct *next, unsigned long start, - unsigned long end, bool mm_wr_locked); + unsigned long end, unsigned long tree_end, bool mm_wr_locked); static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags) { @@ -2293,18 +2293,20 @@ static inline void remove_mt(struct mm_struct *mm, struct ma_state *mas) * * Called with the mm semaphore held. */ -static void unmap_region(struct mm_struct *mm, struct maple_tree *mt, +static void unmap_region(struct mm_struct *mm, struct ma_state *mas, struct vm_area_struct *vma, struct vm_area_struct *prev, - struct vm_area_struct *next, - unsigned long start, unsigned long end, bool mm_wr_locked) + struct vm_area_struct *next, unsigned long start, + unsigned long end, unsigned long tree_end, bool mm_wr_locked) { struct mmu_gather tlb; + unsigned long mt_start = mas->index; lru_add_drain(); tlb_gather_mmu(&tlb, mm); update_hiwater_rss(mm); - unmap_vmas(&tlb, mt, vma, start, end, mm_wr_locked); - free_pgtables(&tlb, mt, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS, + unmap_vmas(&tlb, mas, vma, start, end, tree_end, mm_wr_locked); + mas_set(mas, mt_start); + free_pgtables(&tlb, mas, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS, next ? next->vm_start : USER_PGTABLES_CEILING, mm_wr_locked); tlb_finish_mmu(&tlb); @@ -2472,7 +2474,7 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, goto end_split_failed; } vma_start_write(next); - mas_set_range(&mas_detach, next->vm_start, next->vm_end - 1); + mas_set(&mas_detach, count); error = mas_store_gfp(&mas_detach, next, GFP_KERNEL); if (error) goto munmap_gather_failed; @@ -2511,17 +2513,17 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, #if defined(CONFIG_DEBUG_VM_MAPLE_TREE) /* Make sure no VMAs are about to be lost. */ { - MA_STATE(test, &mt_detach, start, end - 1); + MA_STATE(test, &mt_detach, 0, 0); struct vm_area_struct *vma_mas, *vma_test; int test_count = 0; vma_iter_set(vmi, start); rcu_read_lock(); - vma_test = mas_find(&test, end - 1); + vma_test = mas_find(&test, count - 1); for_each_vma_range(*vmi, vma_mas, end) { BUG_ON(vma_mas != vma_test); test_count++; - vma_test = mas_next(&test, end - 1); + vma_test = mas_next(&test, count - 1); } rcu_read_unlock(); BUG_ON(count != test_count); @@ -2542,9 +2544,11 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, * We can free page tables without write-locking mmap_lock because VMAs * were isolated before we downgraded mmap_lock. */ - unmap_region(mm, &mt_detach, vma, prev, next, start, end, !unlock); + mas_set(&mas_detach, 1); + unmap_region(mm, &mas_detach, vma, prev, next, start, end, count, + !unlock); /* Statistics and freeing VMAs */ - mas_set(&mas_detach, start); + mas_set(&mas_detach, 0); remove_mt(mm, &mas_detach); validate_mm(mm); if (unlock) @@ -2864,9 +2868,10 @@ unmap_and_free_vma: fput(vma->vm_file); vma->vm_file = NULL; + vma_iter_set(&vmi, vma->vm_end); /* Undo any partial mapping done by a device driver. */ - unmap_region(mm, &mm->mm_mt, vma, prev, next, vma->vm_start, - vma->vm_end, true); + unmap_region(mm, &vmi.mas, vma, prev, next, vma->vm_start, + vma->vm_end, vma->vm_end, true); } if (file && (vm_flags & VM_SHARED)) mapping_unmap_writable(file->f_mapping); @@ -3185,7 +3190,7 @@ void exit_mmap(struct mm_struct *mm) tlb_gather_mmu_fullmm(&tlb, mm); /* update_hiwater_rss(mm) here? but nobody should be looking */ /* Use ULONG_MAX here to ensure all VMAs in the mm are unmapped */ - unmap_vmas(&tlb, &mm->mm_mt, vma, 0, ULONG_MAX, false); + unmap_vmas(&tlb, &mas, vma, 0, ULONG_MAX, ULONG_MAX, false); mmap_read_unlock(mm); /* @@ -3195,7 +3200,8 @@ void exit_mmap(struct mm_struct *mm) set_bit(MMF_OOM_SKIP, &mm->flags); mmap_write_lock(mm); mt_clear_in_rcu(&mm->mm_mt); - free_pgtables(&tlb, &mm->mm_mt, vma, FIRST_USER_ADDRESS, + mas_set(&mas, vma->vm_end); + free_pgtables(&tlb, &mas, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING, true); tlb_finish_mmu(&tlb); @@ -3204,6 +3210,7 @@ void exit_mmap(struct mm_struct *mm) * enabled, without holding any MM locks besides the unreachable * mmap_write_lock. */ + mas_set(&mas, vma->vm_end); do { if (vma->vm_flags & VM_ACCOUNT) nr_accounted += vma_pages(vma); -- cgit