From 8f34f1eac3820fc2722e5159acceb22545b30b0d Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 30 Jun 2021 18:49:02 -0700 Subject: mm/userfaultfd: fix uffd-wp special cases for fork() We tried to do something similar in b569a1760782 ("userfaultfd: wp: drop _PAGE_UFFD_WP properly when fork") previously, but it's not doing it all right.. A few fixes around the code path: 1. We were referencing VM_UFFD_WP vm_flags on the _old_ vma rather than the new vma. That's overlooked in b569a1760782, so it won't work as expected. Thanks to the recent rework on fork code (7a4830c380f3a8b3), we can easily get the new vma now, so switch the checks to that. 2. Dropping the uffd-wp bit in copy_huge_pmd() could be wrong if the huge pmd is a migration huge pmd. When it happens, instead of using pmd_uffd_wp(), we should use pmd_swp_uffd_wp(). The fix is simply to handle them separately. 3. Forget to carry over uffd-wp bit for a write migration huge pmd entry. This also happens in copy_huge_pmd(), where we converted a write huge migration entry into a read one. 4. In copy_nonpresent_pte(), drop uffd-wp if necessary for swap ptes. 5. In copy_present_page() when COW is enforced when fork(), we also need to pass over the uffd-wp bit if VM_UFFD_WP is armed on the new vma, and when the pte to be copied has uffd-wp bit set. Remove the comment in copy_present_pte() about this. It won't help a huge lot to only comment there, but comment everywhere would be an overkill. Let's assume the commit messages would help. [peterx@redhat.com: fix a few thp pmd missing uffd-wp bit] Link: https://lkml.kernel.org/r/20210428225030.9708-4-peterx@redhat.com Link: https://lkml.kernel.org/r/20210428225030.9708-3-peterx@redhat.com Fixes: b569a1760782f ("userfaultfd: wp: drop _PAGE_UFFD_WP properly when fork") Signed-off-by: Peter Xu Cc: Jerome Glisse Cc: Mike Rapoport Cc: Alexander Viro Cc: Andrea Arcangeli Cc: Axel Rasmussen Cc: Brian Geffon Cc: "Dr . David Alan Gilbert" Cc: Hugh Dickins Cc: Joe Perches Cc: Kirill A. Shutemov Cc: Lokesh Gidra Cc: Mike Kravetz Cc: Mina Almasry Cc: Oliver Upton Cc: Shaohua Li Cc: Shuah Khan Cc: Stephen Rothwell Cc: Wang Qing Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'mm/memory.c') diff --git a/mm/memory.c b/mm/memory.c index 48c4576df898..07a71a016c18 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -707,10 +707,10 @@ out: static unsigned long copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, - pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma, - unsigned long addr, int *rss) + pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *dst_vma, + struct vm_area_struct *src_vma, unsigned long addr, int *rss) { - unsigned long vm_flags = vma->vm_flags; + unsigned long vm_flags = dst_vma->vm_flags; pte_t pte = *src_pte; struct page *page; swp_entry_t entry = pte_to_swp_entry(pte); @@ -779,6 +779,8 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, set_pte_at(src_mm, addr, src_pte, pte); } } + if (!userfaultfd_wp(dst_vma)) + pte = pte_swp_clear_uffd_wp(pte); set_pte_at(dst_mm, addr, dst_pte, pte); return 0; } @@ -844,6 +846,9 @@ copy_present_page(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma /* All done, just insert the new page copy in the child */ pte = mk_pte(new_page, dst_vma->vm_page_prot); pte = maybe_mkwrite(pte_mkdirty(pte), dst_vma); + if (userfaultfd_pte_wp(dst_vma, *src_pte)) + /* Uffd-wp needs to be delivered to dest pte as well */ + pte = pte_wrprotect(pte_mkuffd_wp(pte)); set_pte_at(dst_vma->vm_mm, addr, dst_pte, pte); return 0; } @@ -893,12 +898,7 @@ copy_present_pte(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, pte = pte_mkclean(pte); pte = pte_mkold(pte); - /* - * Make sure the _PAGE_UFFD_WP bit is cleared if the new VMA - * does not have the VM_UFFD_WP, which means that the uffd - * fork event is not enabled. - */ - if (!(vm_flags & VM_UFFD_WP)) + if (!userfaultfd_wp(dst_vma)) pte = pte_clear_uffd_wp(pte); set_pte_at(dst_vma->vm_mm, addr, dst_pte, pte); @@ -973,7 +973,8 @@ again: if (unlikely(!pte_present(*src_pte))) { entry.val = copy_nonpresent_pte(dst_mm, src_mm, dst_pte, src_pte, - src_vma, addr, rss); + dst_vma, src_vma, + addr, rss); if (entry.val) break; progress += 8; @@ -1050,8 +1051,8 @@ copy_pmd_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, || pmd_devmap(*src_pmd)) { int err; VM_BUG_ON_VMA(next-addr != HPAGE_PMD_SIZE, src_vma); - err = copy_huge_pmd(dst_mm, src_mm, - dst_pmd, src_pmd, addr, src_vma); + err = copy_huge_pmd(dst_mm, src_mm, dst_pmd, src_pmd, + addr, dst_vma, src_vma); if (err == -ENOMEM) return -ENOMEM; if (!err) -- cgit