diff options
Diffstat (limited to 'mm/ksm.c')
| -rw-r--r-- | mm/ksm.c | 159 |
1 files changed, 100 insertions, 59 deletions
@@ -389,7 +389,7 @@ static unsigned long ewma(unsigned long prev, unsigned long curr) * exponentially weighted moving average. The new pages_to_scan value is * multiplied with that change factor: * - * new_pages_to_scan *= change facor + * new_pages_to_scan *= change factor * * The new_pages_to_scan value is limited by the cpu min and max values. It * calculates the cpu percent for the last scan and calculates the new @@ -607,7 +607,76 @@ static inline bool ksm_test_exit(struct mm_struct *mm) return atomic_read(&mm->mm_users) == 0; } +static int break_ksm_pmd_entry(pmd_t *pmdp, unsigned long addr, unsigned long end, + struct mm_walk *walk) +{ + unsigned long *found_addr = (unsigned long *) walk->private; + struct mm_struct *mm = walk->mm; + pte_t *start_ptep, *ptep; + spinlock_t *ptl; + int found = 0; + + if (ksm_test_exit(walk->mm)) + return 0; + if (signal_pending(current)) + return -ERESTARTSYS; + + start_ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl); + if (!start_ptep) + return 0; + + for (ptep = start_ptep; addr < end; ptep++, addr += PAGE_SIZE) { + pte_t pte = ptep_get(ptep); + struct folio *folio = NULL; + + if (pte_present(pte)) { + folio = vm_normal_folio(walk->vma, addr, pte); + } else if (!pte_none(pte)) { + const softleaf_t entry = softleaf_from_pte(pte); + + /* + * As KSM pages remain KSM pages until freed, no need to wait + * here for migration to end. + */ + if (softleaf_is_migration(entry)) + folio = softleaf_to_folio(entry); + } + /* return 1 if the page is an normal ksm page or KSM-placed zero page */ + found = (folio && folio_test_ksm(folio)) || + (pte_present(pte) && is_ksm_zero_pte(pte)); + if (found) { + *found_addr = addr; + goto out_unlock; + } + } +out_unlock: + pte_unmap_unlock(ptep, ptl); + return found; +} + +static const struct mm_walk_ops break_ksm_ops = { + .pmd_entry = break_ksm_pmd_entry, + .walk_lock = PGWALK_RDLOCK, +}; + +static const struct mm_walk_ops break_ksm_lock_vma_ops = { + .pmd_entry = break_ksm_pmd_entry, + .walk_lock = PGWALK_WRLOCK, +}; + /* + * Though it's very tempting to unmerge rmap_items from stable tree rather + * than check every pte of a given vma, the locking doesn't quite work for + * that - an rmap_item is assigned to the stable tree after inserting ksm + * page and upping mmap_lock. Nor does it fit with the way we skip dup'ing + * rmap_items from parent to child at fork time (so as not to waste time + * if exit comes before the next scan reaches it). + * + * Similarly, although we'd like to remove rmap_items (so updating counts + * and freeing memory) when unmerging an area, it's easier to leave that + * to the next pass of ksmd - consider, for example, how ksmd might be + * in cmp_and_merge_page on one of the rmap_items we would be removing. + * * We use break_ksm to break COW on a ksm page by triggering unsharing, * such that the ksm page will get replaced by an exclusive anonymous page. * @@ -620,31 +689,20 @@ static inline bool ksm_test_exit(struct mm_struct *mm) * of the process that owns 'vma'. We also do not want to enforce * protection keys here anyway. */ -static int break_ksm(struct vm_area_struct *vma, unsigned long addr, bool lock_vma) +static int break_ksm(struct vm_area_struct *vma, unsigned long addr, + unsigned long end, bool lock_vma) { vm_fault_t ret = 0; - - if (lock_vma) - vma_start_write(vma); + const struct mm_walk_ops *ops = lock_vma ? + &break_ksm_lock_vma_ops : &break_ksm_ops; do { - bool ksm_page = false; - struct folio_walk fw; - struct folio *folio; + int ksm_page; cond_resched(); - folio = folio_walk_start(&fw, vma, addr, - FW_MIGRATION | FW_ZEROPAGE); - if (folio) { - /* Small folio implies FW_LEVEL_PTE. */ - if (!folio_test_large(folio) && - (folio_test_ksm(folio) || is_ksm_zero_pte(fw.pte))) - ksm_page = true; - folio_walk_end(&fw, vma); - } - - if (!ksm_page) - return 0; + ksm_page = walk_page_range_vma(vma, addr, end, ops, &addr); + if (ksm_page <= 0) + return ksm_page; ret = handle_mm_fault(vma, addr, FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE, NULL); @@ -730,7 +788,7 @@ static void break_cow(struct ksm_rmap_item *rmap_item) mmap_read_lock(mm); vma = find_mergeable_vma(mm, addr); if (vma) - break_ksm(vma, addr, false); + break_ksm(vma, addr, addr + PAGE_SIZE, false); mmap_read_unlock(mm); } @@ -1025,36 +1083,6 @@ static void remove_trailing_rmap_items(struct ksm_rmap_item **rmap_list) } } -/* - * Though it's very tempting to unmerge rmap_items from stable tree rather - * than check every pte of a given vma, the locking doesn't quite work for - * that - an rmap_item is assigned to the stable tree after inserting ksm - * page and upping mmap_lock. Nor does it fit with the way we skip dup'ing - * rmap_items from parent to child at fork time (so as not to waste time - * if exit comes before the next scan reaches it). - * - * Similarly, although we'd like to remove rmap_items (so updating counts - * and freeing memory) when unmerging an area, it's easier to leave that - * to the next pass of ksmd - consider, for example, how ksmd might be - * in cmp_and_merge_page on one of the rmap_items we would be removing. - */ -static int unmerge_ksm_pages(struct vm_area_struct *vma, - unsigned long start, unsigned long end, bool lock_vma) -{ - unsigned long addr; - int err = 0; - - for (addr = start; addr < end && !err; addr += PAGE_SIZE) { - if (ksm_test_exit(vma->vm_mm)) - break; - if (signal_pending(current)) - err = -ERESTARTSYS; - else - err = break_ksm(vma, addr, lock_vma); - } - return err; -} - static inline struct ksm_stable_node *folio_stable_node(const struct folio *folio) { @@ -1192,8 +1220,7 @@ static int unmerge_and_remove_all_rmap_items(void) for_each_vma(vmi, vma) { if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma) continue; - err = unmerge_ksm_pages(vma, - vma->vm_start, vma->vm_end, false); + err = break_ksm(vma, vma->vm_start, vma->vm_end, false); if (err) goto error; } @@ -2712,8 +2739,14 @@ no_vmas: spin_unlock(&ksm_mmlist_lock); mm_slot_free(mm_slot_cache, mm_slot); + /* + * Only clear MMF_VM_MERGEABLE. We must not clear + * MMF_VM_MERGE_ANY, because for those MMF_VM_MERGE_ANY process, + * perhaps their mm_struct has just been added to ksm_mm_slot + * list, and its process has not yet officially started running + * or has not yet performed mmap/brk to allocate anonymous VMAS. + */ mm_flags_clear(MMF_VM_MERGEABLE, mm); - mm_flags_clear(MMF_VM_MERGE_ANY, mm); mmap_read_unlock(mm); mmdrop(mm); } else { @@ -2814,7 +2847,7 @@ static int __ksm_del_vma(struct vm_area_struct *vma) return 0; if (vma->anon_vma) { - err = unmerge_ksm_pages(vma, vma->vm_start, vma->vm_end, true); + err = break_ksm(vma, vma->vm_start, vma->vm_end, true); if (err) return err; } @@ -2831,12 +2864,20 @@ static int __ksm_del_vma(struct vm_area_struct *vma) * * Returns: @vm_flags possibly updated to mark mergeable. */ -vm_flags_t ksm_vma_flags(const struct mm_struct *mm, const struct file *file, +vm_flags_t ksm_vma_flags(struct mm_struct *mm, const struct file *file, vm_flags_t vm_flags) { if (mm_flags_test(MMF_VM_MERGE_ANY, mm) && - __ksm_should_add_vma(file, vm_flags)) + __ksm_should_add_vma(file, vm_flags)) { vm_flags |= VM_MERGEABLE; + /* + * Generally, the flags here always include MMF_VM_MERGEABLE. + * However, in rare cases, this flag may be cleared by ksmd who + * scans a cycle without finding any mergeable vma. + */ + if (unlikely(!mm_flags_test(MMF_VM_MERGEABLE, mm))) + __ksm_enter(mm); + } return vm_flags; } @@ -2958,7 +2999,7 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start, return 0; /* just ignore the advice */ if (vma->anon_vma) { - err = unmerge_ksm_pages(vma, start, end, true); + err = break_ksm(vma, start, end, true); if (err) return err; } @@ -3340,7 +3381,7 @@ static int ksm_memory_callback(struct notifier_block *self, * Prevent ksm_do_scan(), unmerge_and_remove_all_rmap_items() * and remove_all_stable_nodes() while memory is going offline: * it is unsafe for them to touch the stable tree at this time. - * But unmerge_ksm_pages(), rmap lookups and other entry points + * But break_ksm(), rmap lookups and other entry points * which do not need the ksm_thread_mutex are all safe. */ mutex_lock(&ksm_thread_mutex); |
