diff options
| author | Lance Yang <lance.yang@linux.dev> | 2025-09-30 16:10:40 +0800 |
|---|---|---|
| committer | Andrew Morton <akpm@linux-foundation.org> | 2025-10-07 14:01:11 -0700 |
| commit | 9658d698a8a83540bf6a6c80d13c9a61590ee985 (patch) | |
| tree | f9745c0325706ffbba0f604aa50648898e2971f7 | |
| parent | 1ce6473d17e78e3cb9a40147658231731a551828 (diff) | |
mm/rmap: fix soft-dirty and uffd-wp bit loss when remapping zero-filled mTHP subpage to shared zeropage
When splitting an mTHP and replacing a zero-filled subpage with the shared
zeropage, try_to_map_unused_to_zeropage() currently drops several
important PTE bits.
For userspace tools like CRIU, which rely on the soft-dirty mechanism for
incremental snapshots, losing the soft-dirty bit means modified pages are
missed, leading to inconsistent memory state after restore.
As pointed out by David, the more critical uffd-wp bit is also dropped.
This breaks the userfaultfd write-protection mechanism, causing writes to
be silently missed by monitoring applications, which can lead to data
corruption.
Preserve both the soft-dirty and uffd-wp bits from the old PTE when
creating the new zeropage mapping to ensure they are correctly tracked.
Link: https://lkml.kernel.org/r/20250930081040.80926-1-lance.yang@linux.dev
Fixes: b1f202060afe ("mm: remap unused subpages to shared zeropage when splitting isolated thp")
Signed-off-by: Lance Yang <lance.yang@linux.dev>
Suggested-by: David Hildenbrand <david@redhat.com>
Suggested-by: Dev Jain <dev.jain@arm.com>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Dev Jain <dev.jain@arm.com>
Acked-by: Zi Yan <ziy@nvidia.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Reviewed-by: Harry Yoo <harry.yoo@oracle.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Byungchul Park <byungchul@sk.com>
Cc: Gregory Price <gourry@gourry.net>
Cc: "Huang, Ying" <ying.huang@linux.alibaba.com>
Cc: Jann Horn <jannh@google.com>
Cc: Joshua Hahn <joshua.hahnjy@gmail.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Mariano Pache <npache@redhat.com>
Cc: Mathew Brost <matthew.brost@intel.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Rakie Kim <rakie.kim@sk.com>
Cc: Rik van Riel <riel@surriel.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Usama Arif <usamaarif642@gmail.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Yu Zhao <yuzhao@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
| -rw-r--r-- | mm/migrate.c | 15 |
1 files changed, 10 insertions, 5 deletions
diff --git a/mm/migrate.c b/mm/migrate.c index ce83c2c3c287..e3065c9edb55 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -296,8 +296,7 @@ bool isolate_folio_to_list(struct folio *folio, struct list_head *list) } static bool try_to_map_unused_to_zeropage(struct page_vma_mapped_walk *pvmw, - struct folio *folio, - unsigned long idx) + struct folio *folio, pte_t old_pte, unsigned long idx) { struct page *page = folio_page(folio, idx); pte_t newpte; @@ -306,7 +305,7 @@ static bool try_to_map_unused_to_zeropage(struct page_vma_mapped_walk *pvmw, return false; VM_BUG_ON_PAGE(!PageAnon(page), page); VM_BUG_ON_PAGE(!PageLocked(page), page); - VM_BUG_ON_PAGE(pte_present(ptep_get(pvmw->pte)), page); + VM_BUG_ON_PAGE(pte_present(old_pte), page); if (folio_test_mlocked(folio) || (pvmw->vma->vm_flags & VM_LOCKED) || mm_forbids_zeropage(pvmw->vma->vm_mm)) @@ -322,6 +321,12 @@ static bool try_to_map_unused_to_zeropage(struct page_vma_mapped_walk *pvmw, newpte = pte_mkspecial(pfn_pte(my_zero_pfn(pvmw->address), pvmw->vma->vm_page_prot)); + + if (pte_swp_soft_dirty(old_pte)) + newpte = pte_mksoft_dirty(newpte); + if (pte_swp_uffd_wp(old_pte)) + newpte = pte_mkuffd_wp(newpte); + set_pte_at(pvmw->vma->vm_mm, pvmw->address, pvmw->pte, newpte); dec_mm_counter(pvmw->vma->vm_mm, mm_counter(folio)); @@ -364,13 +369,13 @@ static bool remove_migration_pte(struct folio *folio, continue; } #endif + old_pte = ptep_get(pvmw.pte); if (rmap_walk_arg->map_unused_to_zeropage && - try_to_map_unused_to_zeropage(&pvmw, folio, idx)) + try_to_map_unused_to_zeropage(&pvmw, folio, old_pte, idx)) continue; folio_get(folio); pte = mk_pte(new, READ_ONCE(vma->vm_page_prot)); - old_pte = ptep_get(pvmw.pte); entry = pte_to_swp_entry(old_pte); if (!is_migration_entry_young(entry)) |
