diff options
Diffstat (limited to 'mm/mremap.c')
-rw-r--r-- | mm/mremap.c | 196 |
1 files changed, 113 insertions, 83 deletions
diff --git a/mm/mremap.c b/mm/mremap.c index 3a2ac167e876..0865387531ed 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -108,8 +108,7 @@ static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr) return pmd; } -static pud_t *alloc_new_pud(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long addr) +static pud_t *alloc_new_pud(struct mm_struct *mm, unsigned long addr) { pgd_t *pgd; p4d_t *p4d; @@ -122,13 +121,12 @@ static pud_t *alloc_new_pud(struct mm_struct *mm, struct vm_area_struct *vma, return pud_alloc(mm, p4d, addr); } -static pmd_t *alloc_new_pmd(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long addr) +static pmd_t *alloc_new_pmd(struct mm_struct *mm, unsigned long addr) { pud_t *pud; pmd_t *pmd; - pud = alloc_new_pud(mm, vma, addr); + pud = alloc_new_pud(mm, addr); if (!pud) return NULL; @@ -172,17 +170,19 @@ static pte_t move_soft_dirty_pte(pte_t pte) return pte; } -static int move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, - unsigned long old_addr, unsigned long old_end, - struct vm_area_struct *new_vma, pmd_t *new_pmd, - unsigned long new_addr, bool need_rmap_locks) +static int move_ptes(struct pagetable_move_control *pmc, + unsigned long extent, pmd_t *old_pmd, pmd_t *new_pmd) { + struct vm_area_struct *vma = pmc->old; bool need_clear_uffd_wp = vma_has_uffd_without_event_remap(vma); struct mm_struct *mm = vma->vm_mm; pte_t *old_pte, *new_pte, pte; pmd_t dummy_pmdval; spinlock_t *old_ptl, *new_ptl; bool force_flush = false; + unsigned long old_addr = pmc->old_addr; + unsigned long new_addr = pmc->new_addr; + unsigned long old_end = old_addr + extent; unsigned long len = old_end - old_addr; int err = 0; @@ -204,7 +204,7 @@ static int move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, * serialize access to individual ptes, but only rmap traversal * order guarantees that we won't miss both the old and new ptes). */ - if (need_rmap_locks) + if (pmc->need_rmap_locks) take_rmap_locks(vma); /* @@ -278,7 +278,7 @@ static int move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, pte_unmap(new_pte - 1); pte_unmap_unlock(old_pte - 1, old_ptl); out: - if (need_rmap_locks) + if (pmc->need_rmap_locks) drop_rmap_locks(vma); return err; } @@ -293,10 +293,11 @@ static inline bool arch_supports_page_table_move(void) #endif #ifdef CONFIG_HAVE_MOVE_PMD -static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr, - unsigned long new_addr, pmd_t *old_pmd, pmd_t *new_pmd) +static bool move_normal_pmd(struct pagetable_move_control *pmc, + pmd_t *old_pmd, pmd_t *new_pmd) { spinlock_t *old_ptl, *new_ptl; + struct vm_area_struct *vma = pmc->old; struct mm_struct *mm = vma->vm_mm; bool res = false; pmd_t pmd; @@ -342,7 +343,7 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr, * We don't have to worry about the ordering of src and dst * ptlocks because exclusive mmap_lock prevents deadlock. */ - old_ptl = pmd_lock(vma->vm_mm, old_pmd); + old_ptl = pmd_lock(mm, old_pmd); new_ptl = pmd_lockptr(mm, new_pmd); if (new_ptl != old_ptl) spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); @@ -359,7 +360,7 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr, VM_BUG_ON(!pmd_none(*new_pmd)); pmd_populate(mm, new_pmd, pmd_pgtable(pmd)); - flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE); + flush_tlb_range(vma, pmc->old_addr, pmc->old_addr + PMD_SIZE); out_unlock: if (new_ptl != old_ptl) spin_unlock(new_ptl); @@ -368,19 +369,19 @@ out_unlock: return res; } #else -static inline bool move_normal_pmd(struct vm_area_struct *vma, - unsigned long old_addr, unsigned long new_addr, pmd_t *old_pmd, - pmd_t *new_pmd) +static inline bool move_normal_pmd(struct pagetable_move_control *pmc, + pmd_t *old_pmd, pmd_t *new_pmd) { return false; } #endif #if CONFIG_PGTABLE_LEVELS > 2 && defined(CONFIG_HAVE_MOVE_PUD) -static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr, - unsigned long new_addr, pud_t *old_pud, pud_t *new_pud) +static bool move_normal_pud(struct pagetable_move_control *pmc, + pud_t *old_pud, pud_t *new_pud) { spinlock_t *old_ptl, *new_ptl; + struct vm_area_struct *vma = pmc->old; struct mm_struct *mm = vma->vm_mm; pud_t pud; @@ -406,7 +407,7 @@ static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr, * We don't have to worry about the ordering of src and dst * ptlocks because exclusive mmap_lock prevents deadlock. */ - old_ptl = pud_lock(vma->vm_mm, old_pud); + old_ptl = pud_lock(mm, old_pud); new_ptl = pud_lockptr(mm, new_pud); if (new_ptl != old_ptl) spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); @@ -418,7 +419,7 @@ static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr, VM_BUG_ON(!pud_none(*new_pud)); pud_populate(mm, new_pud, pud_pgtable(pud)); - flush_tlb_range(vma, old_addr, old_addr + PUD_SIZE); + flush_tlb_range(vma, pmc->old_addr, pmc->old_addr + PUD_SIZE); if (new_ptl != old_ptl) spin_unlock(new_ptl); spin_unlock(old_ptl); @@ -426,19 +427,19 @@ static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr, return true; } #else -static inline bool move_normal_pud(struct vm_area_struct *vma, - unsigned long old_addr, unsigned long new_addr, pud_t *old_pud, - pud_t *new_pud) +static inline bool move_normal_pud(struct pagetable_move_control *pmc, + pud_t *old_pud, pud_t *new_pud) { return false; } #endif #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) -static bool move_huge_pud(struct vm_area_struct *vma, unsigned long old_addr, - unsigned long new_addr, pud_t *old_pud, pud_t *new_pud) +static bool move_huge_pud(struct pagetable_move_control *pmc, + pud_t *old_pud, pud_t *new_pud) { spinlock_t *old_ptl, *new_ptl; + struct vm_area_struct *vma = pmc->old; struct mm_struct *mm = vma->vm_mm; pud_t pud; @@ -453,7 +454,7 @@ static bool move_huge_pud(struct vm_area_struct *vma, unsigned long old_addr, * We don't have to worry about the ordering of src and dst * ptlocks because exclusive mmap_lock prevents deadlock. */ - old_ptl = pud_lock(vma->vm_mm, old_pud); + old_ptl = pud_lock(mm, old_pud); new_ptl = pud_lockptr(mm, new_pud); if (new_ptl != old_ptl) spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); @@ -466,8 +467,8 @@ static bool move_huge_pud(struct vm_area_struct *vma, unsigned long old_addr, /* Set the new pud */ /* mark soft_ditry when we add pud level soft dirty support */ - set_pud_at(mm, new_addr, new_pud, pud); - flush_pud_tlb_range(vma, old_addr, old_addr + HPAGE_PUD_SIZE); + set_pud_at(mm, pmc->new_addr, new_pud, pud); + flush_pud_tlb_range(vma, pmc->old_addr, pmc->old_addr + HPAGE_PUD_SIZE); if (new_ptl != old_ptl) spin_unlock(new_ptl); spin_unlock(old_ptl); @@ -475,8 +476,9 @@ static bool move_huge_pud(struct vm_area_struct *vma, unsigned long old_addr, return true; } #else -static bool move_huge_pud(struct vm_area_struct *vma, unsigned long old_addr, - unsigned long new_addr, pud_t *old_pud, pud_t *new_pud) +static bool move_huge_pud(struct pagetable_move_control *pmc, + pud_t *old_pud, pud_t *new_pud) + { WARN_ON_ONCE(1); return false; @@ -497,10 +499,12 @@ enum pgt_entry { * destination pgt_entry. */ static __always_inline unsigned long get_extent(enum pgt_entry entry, - unsigned long old_addr, unsigned long old_end, - unsigned long new_addr) + struct pagetable_move_control *pmc) { unsigned long next, extent, mask, size; + unsigned long old_addr = pmc->old_addr; + unsigned long old_end = pmc->old_end; + unsigned long new_addr = pmc->new_addr; switch (entry) { case HPAGE_PMD: @@ -530,37 +534,50 @@ static __always_inline unsigned long get_extent(enum pgt_entry entry, } /* + * Should move_pgt_entry() acquire the rmap locks? This is either expressed in + * the PMC, or overridden in the case of normal, larger page tables. + */ +static bool should_take_rmap_locks(struct pagetable_move_control *pmc, + enum pgt_entry entry) +{ + switch (entry) { + case NORMAL_PMD: + case NORMAL_PUD: + return true; + default: + return pmc->need_rmap_locks; + } +} + +/* * Attempts to speedup the move by moving entry at the level corresponding to * pgt_entry. Returns true if the move was successful, else false. */ -static bool move_pgt_entry(enum pgt_entry entry, struct vm_area_struct *vma, - unsigned long old_addr, unsigned long new_addr, - void *old_entry, void *new_entry, bool need_rmap_locks) +static bool move_pgt_entry(struct pagetable_move_control *pmc, + enum pgt_entry entry, void *old_entry, void *new_entry) { bool moved = false; + bool need_rmap_locks = should_take_rmap_locks(pmc, entry); /* See comment in move_ptes() */ if (need_rmap_locks) - take_rmap_locks(vma); + take_rmap_locks(pmc->old); switch (entry) { case NORMAL_PMD: - moved = move_normal_pmd(vma, old_addr, new_addr, old_entry, - new_entry); + moved = move_normal_pmd(pmc, old_entry, new_entry); break; case NORMAL_PUD: - moved = move_normal_pud(vma, old_addr, new_addr, old_entry, - new_entry); + moved = move_normal_pud(pmc, old_entry, new_entry); break; case HPAGE_PMD: moved = IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && - move_huge_pmd(vma, old_addr, new_addr, old_entry, + move_huge_pmd(pmc->old, pmc->old_addr, pmc->new_addr, old_entry, new_entry); break; case HPAGE_PUD: moved = IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && - move_huge_pud(vma, old_addr, new_addr, old_entry, - new_entry); + move_huge_pud(pmc, old_entry, new_entry); break; default: @@ -569,7 +586,7 @@ static bool move_pgt_entry(enum pgt_entry entry, struct vm_area_struct *vma, } if (need_rmap_locks) - drop_rmap_locks(vma); + drop_rmap_locks(pmc->old); return moved; } @@ -705,108 +722,121 @@ static void try_realign_addr(struct pagetable_move_control *pmc, pmc->new_addr &= pagetable_mask; } +/* Is the page table move operation done? */ +static bool pmc_done(struct pagetable_move_control *pmc) +{ + return pmc->old_addr >= pmc->old_end; +} + +/* Advance to the next page table, offset by extent bytes. */ +static void pmc_next(struct pagetable_move_control *pmc, unsigned long extent) +{ + pmc->old_addr += extent; + pmc->new_addr += extent; +} + +/* + * Determine how many bytes in the specified input range have had their page + * tables moved so far. + */ +static unsigned long pmc_progress(struct pagetable_move_control *pmc) +{ + unsigned long orig_old_addr = pmc->old_end - pmc->len_in; + unsigned long old_addr = pmc->old_addr; + + /* + * Prevent negative return values when {old,new}_addr was realigned but + * we broke out of the loop in move_page_tables() for the first PMD + * itself. + */ + return old_addr < orig_old_addr ? 0 : old_addr - orig_old_addr; +} + unsigned long move_page_tables(struct pagetable_move_control *pmc) { - unsigned long extent, old_end; + unsigned long extent; struct mmu_notifier_range range; pmd_t *old_pmd, *new_pmd; pud_t *old_pud, *new_pud; - unsigned long old_addr, new_addr; - struct vm_area_struct *vma = pmc->old; + struct mm_struct *mm = pmc->old->vm_mm; if (!pmc->len_in) return 0; - if (is_vm_hugetlb_page(vma)) + if (is_vm_hugetlb_page(pmc->old)) return move_hugetlb_page_tables(pmc->old, pmc->new, pmc->old_addr, pmc->new_addr, pmc->len_in); - old_end = pmc->old_end; /* * If possible, realign addresses to PMD boundary for faster copy. * Only realign if the mremap copying hits a PMD boundary. */ try_realign_addr(pmc, PMD_MASK); - /* These may have been changed. */ - old_addr = pmc->old_addr; - new_addr = pmc->new_addr; - flush_cache_range(vma, old_addr, old_end); - mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma->vm_mm, - old_addr, old_end); + flush_cache_range(pmc->old, pmc->old_addr, pmc->old_end); + mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, mm, + pmc->old_addr, pmc->old_end); mmu_notifier_invalidate_range_start(&range); - for (; old_addr < old_end; old_addr += extent, new_addr += extent) { + for (; !pmc_done(pmc); pmc_next(pmc, extent)) { cond_resched(); /* * If extent is PUD-sized try to speed up the move by moving at the * PUD level if possible. */ - extent = get_extent(NORMAL_PUD, old_addr, old_end, new_addr); + extent = get_extent(NORMAL_PUD, pmc); - old_pud = get_old_pud(vma->vm_mm, old_addr); + old_pud = get_old_pud(mm, pmc->old_addr); if (!old_pud) continue; - new_pud = alloc_new_pud(vma->vm_mm, vma, new_addr); + new_pud = alloc_new_pud(mm, pmc->new_addr); if (!new_pud) break; if (pud_trans_huge(*old_pud) || pud_devmap(*old_pud)) { if (extent == HPAGE_PUD_SIZE) { - move_pgt_entry(HPAGE_PUD, vma, old_addr, new_addr, - old_pud, new_pud, pmc->need_rmap_locks); + move_pgt_entry(pmc, HPAGE_PUD, old_pud, new_pud); /* We ignore and continue on error? */ continue; } } else if (IS_ENABLED(CONFIG_HAVE_MOVE_PUD) && extent == PUD_SIZE) { - if (move_pgt_entry(NORMAL_PUD, vma, old_addr, new_addr, - old_pud, new_pud, true)) + if (move_pgt_entry(pmc, NORMAL_PUD, old_pud, new_pud)) continue; } - extent = get_extent(NORMAL_PMD, old_addr, old_end, new_addr); - old_pmd = get_old_pmd(vma->vm_mm, old_addr); + extent = get_extent(NORMAL_PMD, pmc); + old_pmd = get_old_pmd(mm, pmc->old_addr); if (!old_pmd) continue; - new_pmd = alloc_new_pmd(vma->vm_mm, vma, new_addr); + new_pmd = alloc_new_pmd(mm, pmc->new_addr); if (!new_pmd) break; again: if (is_swap_pmd(*old_pmd) || pmd_trans_huge(*old_pmd) || pmd_devmap(*old_pmd)) { if (extent == HPAGE_PMD_SIZE && - move_pgt_entry(HPAGE_PMD, vma, old_addr, new_addr, - old_pmd, new_pmd, pmc->need_rmap_locks)) + move_pgt_entry(pmc, HPAGE_PMD, old_pmd, new_pmd)) continue; - split_huge_pmd(vma, old_pmd, old_addr); + split_huge_pmd(pmc->old, old_pmd, pmc->old_addr); } else if (IS_ENABLED(CONFIG_HAVE_MOVE_PMD) && extent == PMD_SIZE) { /* * If the extent is PMD-sized, try to speed the move by * moving at the PMD level if possible. */ - if (move_pgt_entry(NORMAL_PMD, vma, old_addr, new_addr, - old_pmd, new_pmd, true)) + if (move_pgt_entry(pmc, NORMAL_PMD, old_pmd, new_pmd)) continue; } if (pmd_none(*old_pmd)) continue; if (pte_alloc(pmc->new->vm_mm, new_pmd)) break; - if (move_ptes(vma, old_pmd, old_addr, old_addr + extent, - pmc->new, new_pmd, new_addr, pmc->need_rmap_locks) < 0) + if (move_ptes(pmc, extent, old_pmd, new_pmd) < 0) goto again; } mmu_notifier_invalidate_range_end(&range); - /* - * Prevent negative return values when {old,new}_addr was realigned - * but we broke out of the above loop for the first PMD itself. - */ - if (old_addr < old_end - pmc->len_in) - return 0; - - return pmc->len_in + old_addr - old_end; /* how much done */ + return pmc_progress(pmc); } /* Set vrm->delta to the difference in VMA size specified by user. */ |