diff options
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 104 |
1 files changed, 76 insertions, 28 deletions
diff --git a/mm/memory.c b/mm/memory.c index 0bccc622e482..19874d133a66 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -733,6 +733,8 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, pte = swp_entry_to_pte(entry); if (pte_swp_soft_dirty(*src_pte)) pte = pte_swp_mksoft_dirty(pte); + if (pte_swp_uffd_wp(*src_pte)) + pte = pte_swp_mkuffd_wp(pte); set_pte_at(src_mm, addr, src_pte, pte); } } else if (is_device_private_entry(entry)) { @@ -762,6 +764,8 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, is_cow_mapping(vm_flags)) { make_device_private_entry_read(&entry); pte = swp_entry_to_pte(entry); + if (pte_swp_uffd_wp(*src_pte)) + pte = pte_swp_mkuffd_wp(pte); set_pte_at(src_mm, addr, src_pte, pte); } } @@ -785,6 +789,14 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, pte = pte_mkclean(pte); pte = pte_mkold(pte); + /* + * Make sure the _PAGE_UFFD_WP bit is cleared if the new VMA + * does not have the VM_UFFD_WP, which means that the uffd + * fork event is not enabled. + */ + if (!(vm_flags & VM_UFFD_WP)) + pte = pte_clear_uffd_wp(pte); + page = vm_normal_page(vma, addr, pte); if (page) { get_page(page); @@ -1939,8 +1951,8 @@ static inline int remap_p4d_range(struct mm_struct *mm, pgd_t *pgd, * remap_pfn_range - remap kernel memory to userspace * @vma: user vma to map to * @addr: target user address to start at - * @pfn: physical address of kernel memory - * @size: size of map area + * @pfn: page frame number of kernel physical memory address + * @size: size of mapping area * @prot: page protection flags for this mapping * * Note: this is only safe if the mm semaphore is held when called. @@ -2009,7 +2021,7 @@ EXPORT_SYMBOL(remap_pfn_range); /** * vm_iomap_memory - remap memory to userspace * @vma: user vma to map to - * @start: start of area + * @start: start of the physical memory to be mapped * @len: size of area * * This is a simplified io_remap_pfn_range() for common driver use. The @@ -2257,7 +2269,7 @@ static inline bool cow_user_page(struct page *dst, struct page *src, bool ret; void *kaddr; void __user *uaddr; - bool force_mkyoung; + bool locked = false; struct vm_area_struct *vma = vmf->vma; struct mm_struct *mm = vma->vm_mm; unsigned long addr = vmf->address; @@ -2282,11 +2294,11 @@ static inline bool cow_user_page(struct page *dst, struct page *src, * On architectures with software "accessed" bits, we would * take a double page fault, so mark it accessed here. */ - force_mkyoung = arch_faults_on_old_pte() && !pte_young(vmf->orig_pte); - if (force_mkyoung) { + if (arch_faults_on_old_pte() && !pte_young(vmf->orig_pte)) { pte_t entry; vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl); + locked = true; if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) { /* * Other thread has already handled the fault @@ -2310,18 +2322,37 @@ static inline bool cow_user_page(struct page *dst, struct page *src, * zeroes. */ if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) { + if (locked) + goto warn; + + /* Re-validate under PTL if the page is still mapped */ + vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl); + locked = true; + if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) { + /* The PTE changed under us. Retry page fault. */ + ret = false; + goto pte_unlock; + } + /* - * Give a warn in case there can be some obscure - * use-case + * The same page can be mapped back since last copy attampt. + * Try to copy again under PTL. */ - WARN_ON_ONCE(1); - clear_page(kaddr); + if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) { + /* + * Give a warn in case there can be some obscure + * use-case + */ +warn: + WARN_ON_ONCE(1); + clear_page(kaddr); + } } ret = true; pte_unlock: - if (force_mkyoung) + if (locked) pte_unmap_unlock(vmf->pte, vmf->ptl); kunmap_atomic(kaddr); flush_dcache_page(dst); @@ -2733,6 +2764,11 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; + if (userfaultfd_pte_wp(vma, *vmf->pte)) { + pte_unmap_unlock(vmf->pte, vmf->ptl); + return handle_userfault(vmf, VM_UFFD_WP); + } + vmf->page = vm_normal_page(vma, vmf->address, vmf->orig_pte); if (!vmf->page) { /* @@ -3066,6 +3102,10 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) flush_icache_page(vma, page); if (pte_swp_soft_dirty(vmf->orig_pte)) pte = pte_mksoft_dirty(pte); + if (pte_swp_uffd_wp(vmf->orig_pte)) { + pte = pte_mkuffd_wp(pte); + pte = pte_wrprotect(pte); + } set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte); arch_do_swap_page(vma->vm_mm, vma, vmf->address, pte, vmf->orig_pte); vmf->orig_pte = pte; @@ -3354,7 +3394,7 @@ map_pte: return 0; } -#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE +#ifdef CONFIG_TRANSPARENT_HUGEPAGE static void deposit_prealloc_pte(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; @@ -3456,8 +3496,7 @@ vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg, pte_t entry; vm_fault_t ret; - if (pmd_none(*vmf->pmd) && PageTransCompound(page) && - IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE)) { + if (pmd_none(*vmf->pmd) && PageTransCompound(page)) { /* THP on COW? */ VM_BUG_ON_PAGE(memcg, page); @@ -3930,31 +3969,40 @@ static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf) /* `inline' is required to avoid gcc 4.1.2 build error */ static inline vm_fault_t wp_huge_pmd(struct vm_fault *vmf, pmd_t orig_pmd) { - if (vma_is_anonymous(vmf->vma)) + if (vma_is_anonymous(vmf->vma)) { + if (userfaultfd_huge_pmd_wp(vmf->vma, orig_pmd)) + return handle_userfault(vmf, VM_UFFD_WP); return do_huge_pmd_wp_page(vmf, orig_pmd); - if (vmf->vma->vm_ops->huge_fault) - return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD); + } + if (vmf->vma->vm_ops->huge_fault) { + vm_fault_t ret = vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD); + + if (!(ret & VM_FAULT_FALLBACK)) + return ret; + } - /* COW handled on pte level: split pmd */ - VM_BUG_ON_VMA(vmf->vma->vm_flags & VM_SHARED, vmf->vma); + /* COW or write-notify handled on pte level: split pmd. */ __split_huge_pmd(vmf->vma, vmf->pmd, vmf->address, false, NULL); return VM_FAULT_FALLBACK; } -static inline bool vma_is_accessible(struct vm_area_struct *vma) -{ - return vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE); -} - static vm_fault_t create_huge_pud(struct vm_fault *vmf) { -#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \ + defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) /* No support for anonymous transparent PUD pages yet */ if (vma_is_anonymous(vmf->vma)) - return VM_FAULT_FALLBACK; - if (vmf->vma->vm_ops->huge_fault) - return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD); + goto split; + if (vmf->vma->vm_ops->huge_fault) { + vm_fault_t ret = vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD); + + if (!(ret & VM_FAULT_FALLBACK)) + return ret; + } +split: + /* COW or write-notify not handled on PUD level: split pud.*/ + __split_huge_pud(vmf->vma, vmf->pud, vmf->address); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ return VM_FAULT_FALLBACK; } |