From d1751118c88673fe5a948ad82277898e9e284c55 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 4 Jan 2023 17:52:07 -0500 Subject: mm/uffd: detect pgtable allocation failures Before this patch, when there's any pgtable allocation issues happened during change_protection(), the error will be ignored from the syscall. For shmem, there will be an error dumped into the host dmesg. Two issues with that: (1) Doing a trace dump when allocation fails is not anything close to grace. (2) The user should be notified with any kind of such error, so the user can trap it and decide what to do next, either by retrying, or stop the process properly, or anything else. For userfault users, this will change the API of UFFDIO_WRITEPROTECT when pgtable allocation failure happened. It should not normally break anyone, though. If it breaks, then in good ways. One man-page update will be on the way to introduce the new -ENOMEM for UFFDIO_WRITEPROTECT. Not marking stable so we keep the old behavior on the 5.19-till-now kernels. [akpm@linux-foundation.org: coding-style cleanups] Link: https://lkml.kernel.org/r/20230104225207.1066932-4-peterx@redhat.com Signed-off-by: Peter Xu Reported-by: James Houghton Acked-by: James Houghton Cc: Andrea Arcangeli Cc: Axel Rasmussen Cc: David Hildenbrand Cc: Mike Kravetz Cc: Muchun Song Cc: Nadav Amit Signed-off-by: Andrew Morton --- mm/userfaultfd.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'mm/userfaultfd.c') diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 65ad172add27..53c3d916ff66 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -710,11 +710,12 @@ ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long start, mmap_changing, 0); } -void uffd_wp_range(struct mm_struct *dst_mm, struct vm_area_struct *dst_vma, +long uffd_wp_range(struct mm_struct *dst_mm, struct vm_area_struct *dst_vma, unsigned long start, unsigned long len, bool enable_wp) { unsigned int mm_cp_flags; struct mmu_gather tlb; + long ret; if (enable_wp) mm_cp_flags = MM_CP_UFFD_WP; @@ -730,8 +731,10 @@ void uffd_wp_range(struct mm_struct *dst_mm, struct vm_area_struct *dst_vma, if (!enable_wp && vma_wants_manual_pte_write_upgrade(dst_vma)) mm_cp_flags |= MM_CP_TRY_CHANGE_WRITABLE; tlb_gather_mmu(&tlb, dst_mm); - change_protection(&tlb, dst_vma, start, start + len, mm_cp_flags); + ret = change_protection(&tlb, dst_vma, start, start + len, mm_cp_flags); tlb_finish_mmu(&tlb); + + return ret; } int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start, @@ -740,7 +743,7 @@ int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start, { struct vm_area_struct *dst_vma; unsigned long page_mask; - int err; + long err; /* * Sanitize the command parameters: @@ -779,9 +782,12 @@ int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start, goto out_unlock; } - uffd_wp_range(dst_mm, dst_vma, start, len, enable_wp); + err = uffd_wp_range(dst_mm, dst_vma, start, len, enable_wp); + + /* Return 0 on success, <0 on failures */ + if (err > 0) + err = 0; - err = 0; out_unlock: mmap_read_unlock(dst_mm); return err; -- cgit