summaryrefslogtreecommitdiff
path: root/mm/memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c107
1 files changed, 74 insertions, 33 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 454ecc05ad85..a75040a47fcc 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2745,8 +2745,8 @@ static inline int pte_unmap_same(struct vm_fault *vmf)
return same;
}
-static inline bool cow_user_page(struct page *dst, struct page *src,
- struct vm_fault *vmf)
+static inline bool __wp_page_copy_user(struct page *dst, struct page *src,
+ struct vm_fault *vmf)
{
bool ret;
void *kaddr;
@@ -2954,6 +2954,7 @@ static inline void wp_page_reuse(struct vm_fault *vmf)
struct page *page = vmf->page;
pte_t entry;
+ VM_BUG_ON(!(vmf->flags & FAULT_FLAG_WRITE));
VM_BUG_ON(PageAnon(page) && !PageAnonExclusive(page));
/*
@@ -2974,7 +2975,8 @@ static inline void wp_page_reuse(struct vm_fault *vmf)
}
/*
- * Handle the case of a page which we actually need to copy to a new page.
+ * Handle the case of a page which we actually need to copy to a new page,
+ * either due to COW or unsharing.
*
* Called with mmap_lock locked and the old page referenced, but
* without the ptl held.
@@ -2991,6 +2993,7 @@ static inline void wp_page_reuse(struct vm_fault *vmf)
*/
static vm_fault_t wp_page_copy(struct vm_fault *vmf)
{
+ const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE;
struct vm_area_struct *vma = vmf->vma;
struct mm_struct *mm = vma->vm_mm;
struct page *old_page = vmf->page;
@@ -3013,7 +3016,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
if (!new_page)
goto oom;
- if (!cow_user_page(new_page, old_page, vmf)) {
+ if (!__wp_page_copy_user(new_page, old_page, vmf)) {
/*
* COW failed, if the fault was solved by other,
* it's fine. If not, userspace would re-fault on
@@ -3055,7 +3058,14 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
entry = mk_pte(new_page, vma->vm_page_prot);
entry = pte_sw_mkyoung(entry);
- entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+ if (unlikely(unshare)) {
+ if (pte_soft_dirty(vmf->orig_pte))
+ entry = pte_mksoft_dirty(entry);
+ if (pte_uffd_wp(vmf->orig_pte))
+ entry = pte_mkuffd_wp(entry);
+ } else {
+ entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+ }
/*
* Clear the pte entry and flush it first, before updating the
@@ -3072,6 +3082,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
* mmu page tables (such as kvm shadow page tables), we want the
* new page to be mapped directly into the secondary page table.
*/
+ BUG_ON(unshare && pte_write(entry));
set_pte_at_notify(mm, vmf->address, vmf->pte, entry);
update_mmu_cache(vma, vmf->address, vmf->pte);
if (old_page) {
@@ -3121,7 +3132,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
free_swap_cache(old_page);
put_page(old_page);
}
- return page_copied ? VM_FAULT_WRITE : 0;
+ return (page_copied && !unshare) ? VM_FAULT_WRITE : 0;
oom_free_new:
put_page(new_page);
oom:
@@ -3221,18 +3232,22 @@ static vm_fault_t wp_page_shared(struct vm_fault *vmf)
}
/*
- * This routine handles present pages, when users try to write
- * to a shared page. It is done by copying the page to a new address
- * and decrementing the shared-page counter for the old page.
+ * This routine handles present pages, when
+ * * users try to write to a shared page (FAULT_FLAG_WRITE)
+ * * GUP wants to take a R/O pin on a possibly shared anonymous page
+ * (FAULT_FLAG_UNSHARE)
+ *
+ * It is done by copying the page to a new address and decrementing the
+ * shared-page counter for the old page.
*
* Note that this routine assumes that the protection checks have been
* done by the caller (the low-level page fault routine in most cases).
- * Thus we can safely just mark it writable once we've done any necessary
- * COW.
+ * Thus, with FAULT_FLAG_WRITE, we can safely just mark it writable once we've
+ * done any necessary COW.
*
- * We also mark the page dirty at this point even though the page will
- * change only once the write actually happens. This avoids a few races,
- * and potentially makes it more efficient.
+ * In case of FAULT_FLAG_WRITE, we also mark the page dirty at this point even
+ * though the page will change only once the write actually happens. This
+ * avoids a few races, and potentially makes it more efficient.
*
* We enter with non-exclusive mmap_lock (to exclude vma changes,
* but allow concurrent faults), with pte both mapped and locked.
@@ -3241,23 +3256,35 @@ static vm_fault_t wp_page_shared(struct vm_fault *vmf)
static vm_fault_t do_wp_page(struct vm_fault *vmf)
__releases(vmf->ptl)
{
+ const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE;
struct vm_area_struct *vma = vmf->vma;
- if (userfaultfd_pte_wp(vma, *vmf->pte)) {
- pte_unmap_unlock(vmf->pte, vmf->ptl);
- return handle_userfault(vmf, VM_UFFD_WP);
- }
+ VM_BUG_ON(unshare && (vmf->flags & FAULT_FLAG_WRITE));
+ VM_BUG_ON(!unshare && !(vmf->flags & FAULT_FLAG_WRITE));
- /*
- * Userfaultfd write-protect can defer flushes. Ensure the TLB
- * is flushed in this case before copying.
- */
- if (unlikely(userfaultfd_wp(vmf->vma) &&
- mm_tlb_flush_pending(vmf->vma->vm_mm)))
- flush_tlb_page(vmf->vma, vmf->address);
+ if (likely(!unshare)) {
+ if (userfaultfd_pte_wp(vma, *vmf->pte)) {
+ pte_unmap_unlock(vmf->pte, vmf->ptl);
+ return handle_userfault(vmf, VM_UFFD_WP);
+ }
+
+ /*
+ * Userfaultfd write-protect can defer flushes. Ensure the TLB
+ * is flushed in this case before copying.
+ */
+ if (unlikely(userfaultfd_wp(vmf->vma) &&
+ mm_tlb_flush_pending(vmf->vma->vm_mm)))
+ flush_tlb_page(vmf->vma, vmf->address);
+ }
vmf->page = vm_normal_page(vma, vmf->address, vmf->orig_pte);
if (!vmf->page) {
+ if (unlikely(unshare)) {
+ /* No anonymous page -> nothing to do. */
+ pte_unmap_unlock(vmf->pte, vmf->ptl);
+ return 0;
+ }
+
/*
* VM_MIXEDMAP !pfn_valid() case, or VM_SOFTDIRTY clear on a
* VM_PFNMAP VMA.
@@ -3320,8 +3347,16 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
page_move_anon_rmap(page, vma);
unlock_page(page);
reuse:
+ if (unlikely(unshare)) {
+ pte_unmap_unlock(vmf->pte, vmf->ptl);
+ return 0;
+ }
wp_page_reuse(vmf);
return VM_FAULT_WRITE;
+ } else if (unshare) {
+ /* No anonymous page -> nothing to do. */
+ pte_unmap_unlock(vmf->pte, vmf->ptl);
+ return 0;
} else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
(VM_WRITE|VM_SHARED))) {
return wp_page_shared(vmf);
@@ -4523,8 +4558,11 @@ static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf)
/* `inline' is required to avoid gcc 4.1.2 build error */
static inline vm_fault_t wp_huge_pmd(struct vm_fault *vmf)
{
+ const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE;
+
if (vma_is_anonymous(vmf->vma)) {
- if (userfaultfd_huge_pmd_wp(vmf->vma, vmf->orig_pmd))
+ if (likely(!unshare) &&
+ userfaultfd_huge_pmd_wp(vmf->vma, vmf->orig_pmd))
return handle_userfault(vmf, VM_UFFD_WP);
return do_huge_pmd_wp_page(vmf);
}
@@ -4659,10 +4697,11 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
update_mmu_tlb(vmf->vma, vmf->address, vmf->pte);
goto unlock;
}
- if (vmf->flags & FAULT_FLAG_WRITE) {
+ if (vmf->flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) {
if (!pte_write(entry))
return do_wp_page(vmf);
- entry = pte_mkdirty(entry);
+ else if (likely(vmf->flags & FAULT_FLAG_WRITE))
+ entry = pte_mkdirty(entry);
}
entry = pte_mkyoung(entry);
if (ptep_set_access_flags(vmf->vma, vmf->address, vmf->pte, entry,
@@ -4703,7 +4742,6 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
.pgoff = linear_page_index(vma, address),
.gfp_mask = __get_fault_gfp_mask(vma),
};
- unsigned int dirty = flags & FAULT_FLAG_WRITE;
struct mm_struct *mm = vma->vm_mm;
pgd_t *pgd;
p4d_t *p4d;
@@ -4728,9 +4766,11 @@ retry_pud:
barrier();
if (pud_trans_huge(orig_pud) || pud_devmap(orig_pud)) {
- /* NUMA case for anonymous PUDs would go here */
-
- if (dirty && !pud_write(orig_pud)) {
+ /*
+ * TODO once we support anonymous PUDs: NUMA case and
+ * FAULT_FLAG_UNSHARE handling.
+ */
+ if ((flags & FAULT_FLAG_WRITE) && !pud_write(orig_pud)) {
ret = wp_huge_pud(&vmf, orig_pud);
if (!(ret & VM_FAULT_FALLBACK))
return ret;
@@ -4768,7 +4808,8 @@ retry_pud:
if (pmd_protnone(vmf.orig_pmd) && vma_is_accessible(vma))
return do_huge_pmd_numa_page(&vmf);
- if (dirty && !pmd_write(vmf.orig_pmd)) {
+ if ((flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) &&
+ !pmd_write(vmf.orig_pmd)) {
ret = wp_huge_pmd(&vmf);
if (!(ret & VM_FAULT_FALLBACK))
return ret;