summaryrefslogtreecommitdiff
path: root/mm/ksm.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/ksm.c')
-rw-r--r--mm/ksm.c159
1 files changed, 100 insertions, 59 deletions
diff --git a/mm/ksm.c b/mm/ksm.c
index c4e730409949..cfc182255c7b 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -389,7 +389,7 @@ static unsigned long ewma(unsigned long prev, unsigned long curr)
* exponentially weighted moving average. The new pages_to_scan value is
* multiplied with that change factor:
*
- * new_pages_to_scan *= change facor
+ * new_pages_to_scan *= change factor
*
* The new_pages_to_scan value is limited by the cpu min and max values. It
* calculates the cpu percent for the last scan and calculates the new
@@ -607,7 +607,76 @@ static inline bool ksm_test_exit(struct mm_struct *mm)
return atomic_read(&mm->mm_users) == 0;
}
+static int break_ksm_pmd_entry(pmd_t *pmdp, unsigned long addr, unsigned long end,
+ struct mm_walk *walk)
+{
+ unsigned long *found_addr = (unsigned long *) walk->private;
+ struct mm_struct *mm = walk->mm;
+ pte_t *start_ptep, *ptep;
+ spinlock_t *ptl;
+ int found = 0;
+
+ if (ksm_test_exit(walk->mm))
+ return 0;
+ if (signal_pending(current))
+ return -ERESTARTSYS;
+
+ start_ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
+ if (!start_ptep)
+ return 0;
+
+ for (ptep = start_ptep; addr < end; ptep++, addr += PAGE_SIZE) {
+ pte_t pte = ptep_get(ptep);
+ struct folio *folio = NULL;
+
+ if (pte_present(pte)) {
+ folio = vm_normal_folio(walk->vma, addr, pte);
+ } else if (!pte_none(pte)) {
+ const softleaf_t entry = softleaf_from_pte(pte);
+
+ /*
+ * As KSM pages remain KSM pages until freed, no need to wait
+ * here for migration to end.
+ */
+ if (softleaf_is_migration(entry))
+ folio = softleaf_to_folio(entry);
+ }
+ /* return 1 if the page is an normal ksm page or KSM-placed zero page */
+ found = (folio && folio_test_ksm(folio)) ||
+ (pte_present(pte) && is_ksm_zero_pte(pte));
+ if (found) {
+ *found_addr = addr;
+ goto out_unlock;
+ }
+ }
+out_unlock:
+ pte_unmap_unlock(ptep, ptl);
+ return found;
+}
+
+static const struct mm_walk_ops break_ksm_ops = {
+ .pmd_entry = break_ksm_pmd_entry,
+ .walk_lock = PGWALK_RDLOCK,
+};
+
+static const struct mm_walk_ops break_ksm_lock_vma_ops = {
+ .pmd_entry = break_ksm_pmd_entry,
+ .walk_lock = PGWALK_WRLOCK,
+};
+
/*
+ * Though it's very tempting to unmerge rmap_items from stable tree rather
+ * than check every pte of a given vma, the locking doesn't quite work for
+ * that - an rmap_item is assigned to the stable tree after inserting ksm
+ * page and upping mmap_lock. Nor does it fit with the way we skip dup'ing
+ * rmap_items from parent to child at fork time (so as not to waste time
+ * if exit comes before the next scan reaches it).
+ *
+ * Similarly, although we'd like to remove rmap_items (so updating counts
+ * and freeing memory) when unmerging an area, it's easier to leave that
+ * to the next pass of ksmd - consider, for example, how ksmd might be
+ * in cmp_and_merge_page on one of the rmap_items we would be removing.
+ *
* We use break_ksm to break COW on a ksm page by triggering unsharing,
* such that the ksm page will get replaced by an exclusive anonymous page.
*
@@ -620,31 +689,20 @@ static inline bool ksm_test_exit(struct mm_struct *mm)
* of the process that owns 'vma'. We also do not want to enforce
* protection keys here anyway.
*/
-static int break_ksm(struct vm_area_struct *vma, unsigned long addr, bool lock_vma)
+static int break_ksm(struct vm_area_struct *vma, unsigned long addr,
+ unsigned long end, bool lock_vma)
{
vm_fault_t ret = 0;
-
- if (lock_vma)
- vma_start_write(vma);
+ const struct mm_walk_ops *ops = lock_vma ?
+ &break_ksm_lock_vma_ops : &break_ksm_ops;
do {
- bool ksm_page = false;
- struct folio_walk fw;
- struct folio *folio;
+ int ksm_page;
cond_resched();
- folio = folio_walk_start(&fw, vma, addr,
- FW_MIGRATION | FW_ZEROPAGE);
- if (folio) {
- /* Small folio implies FW_LEVEL_PTE. */
- if (!folio_test_large(folio) &&
- (folio_test_ksm(folio) || is_ksm_zero_pte(fw.pte)))
- ksm_page = true;
- folio_walk_end(&fw, vma);
- }
-
- if (!ksm_page)
- return 0;
+ ksm_page = walk_page_range_vma(vma, addr, end, ops, &addr);
+ if (ksm_page <= 0)
+ return ksm_page;
ret = handle_mm_fault(vma, addr,
FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE,
NULL);
@@ -730,7 +788,7 @@ static void break_cow(struct ksm_rmap_item *rmap_item)
mmap_read_lock(mm);
vma = find_mergeable_vma(mm, addr);
if (vma)
- break_ksm(vma, addr, false);
+ break_ksm(vma, addr, addr + PAGE_SIZE, false);
mmap_read_unlock(mm);
}
@@ -1025,36 +1083,6 @@ static void remove_trailing_rmap_items(struct ksm_rmap_item **rmap_list)
}
}
-/*
- * Though it's very tempting to unmerge rmap_items from stable tree rather
- * than check every pte of a given vma, the locking doesn't quite work for
- * that - an rmap_item is assigned to the stable tree after inserting ksm
- * page and upping mmap_lock. Nor does it fit with the way we skip dup'ing
- * rmap_items from parent to child at fork time (so as not to waste time
- * if exit comes before the next scan reaches it).
- *
- * Similarly, although we'd like to remove rmap_items (so updating counts
- * and freeing memory) when unmerging an area, it's easier to leave that
- * to the next pass of ksmd - consider, for example, how ksmd might be
- * in cmp_and_merge_page on one of the rmap_items we would be removing.
- */
-static int unmerge_ksm_pages(struct vm_area_struct *vma,
- unsigned long start, unsigned long end, bool lock_vma)
-{
- unsigned long addr;
- int err = 0;
-
- for (addr = start; addr < end && !err; addr += PAGE_SIZE) {
- if (ksm_test_exit(vma->vm_mm))
- break;
- if (signal_pending(current))
- err = -ERESTARTSYS;
- else
- err = break_ksm(vma, addr, lock_vma);
- }
- return err;
-}
-
static inline
struct ksm_stable_node *folio_stable_node(const struct folio *folio)
{
@@ -1192,8 +1220,7 @@ static int unmerge_and_remove_all_rmap_items(void)
for_each_vma(vmi, vma) {
if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
continue;
- err = unmerge_ksm_pages(vma,
- vma->vm_start, vma->vm_end, false);
+ err = break_ksm(vma, vma->vm_start, vma->vm_end, false);
if (err)
goto error;
}
@@ -2712,8 +2739,14 @@ no_vmas:
spin_unlock(&ksm_mmlist_lock);
mm_slot_free(mm_slot_cache, mm_slot);
+ /*
+ * Only clear MMF_VM_MERGEABLE. We must not clear
+ * MMF_VM_MERGE_ANY, because for those MMF_VM_MERGE_ANY process,
+ * perhaps their mm_struct has just been added to ksm_mm_slot
+ * list, and its process has not yet officially started running
+ * or has not yet performed mmap/brk to allocate anonymous VMAS.
+ */
mm_flags_clear(MMF_VM_MERGEABLE, mm);
- mm_flags_clear(MMF_VM_MERGE_ANY, mm);
mmap_read_unlock(mm);
mmdrop(mm);
} else {
@@ -2814,7 +2847,7 @@ static int __ksm_del_vma(struct vm_area_struct *vma)
return 0;
if (vma->anon_vma) {
- err = unmerge_ksm_pages(vma, vma->vm_start, vma->vm_end, true);
+ err = break_ksm(vma, vma->vm_start, vma->vm_end, true);
if (err)
return err;
}
@@ -2831,12 +2864,20 @@ static int __ksm_del_vma(struct vm_area_struct *vma)
*
* Returns: @vm_flags possibly updated to mark mergeable.
*/
-vm_flags_t ksm_vma_flags(const struct mm_struct *mm, const struct file *file,
+vm_flags_t ksm_vma_flags(struct mm_struct *mm, const struct file *file,
vm_flags_t vm_flags)
{
if (mm_flags_test(MMF_VM_MERGE_ANY, mm) &&
- __ksm_should_add_vma(file, vm_flags))
+ __ksm_should_add_vma(file, vm_flags)) {
vm_flags |= VM_MERGEABLE;
+ /*
+ * Generally, the flags here always include MMF_VM_MERGEABLE.
+ * However, in rare cases, this flag may be cleared by ksmd who
+ * scans a cycle without finding any mergeable vma.
+ */
+ if (unlikely(!mm_flags_test(MMF_VM_MERGEABLE, mm)))
+ __ksm_enter(mm);
+ }
return vm_flags;
}
@@ -2958,7 +2999,7 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
return 0; /* just ignore the advice */
if (vma->anon_vma) {
- err = unmerge_ksm_pages(vma, start, end, true);
+ err = break_ksm(vma, start, end, true);
if (err)
return err;
}
@@ -3340,7 +3381,7 @@ static int ksm_memory_callback(struct notifier_block *self,
* Prevent ksm_do_scan(), unmerge_and_remove_all_rmap_items()
* and remove_all_stable_nodes() while memory is going offline:
* it is unsafe for them to touch the stable tree at this time.
- * But unmerge_ksm_pages(), rmap lookups and other entry points
+ * But break_ksm(), rmap lookups and other entry points
* which do not need the ksm_thread_mutex are all safe.
*/
mutex_lock(&ksm_thread_mutex);