summaryrefslogtreecommitdiff
path: root/mm/userfaultfd.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/userfaultfd.c')
-rw-r--r--mm/userfaultfd.c105
1 files changed, 92 insertions, 13 deletions
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 1b0d7abad1d4..512576e171ce 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -53,7 +53,8 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm,
struct vm_area_struct *dst_vma,
unsigned long dst_addr,
unsigned long src_addr,
- struct page **pagep)
+ struct page **pagep,
+ bool wp_copy)
{
struct mem_cgroup *memcg;
pte_t _dst_pte, *dst_pte;
@@ -99,9 +100,13 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm,
if (mem_cgroup_try_charge(page, dst_mm, GFP_KERNEL, &memcg, false))
goto out_release;
- _dst_pte = mk_pte(page, dst_vma->vm_page_prot);
- if (dst_vma->vm_flags & VM_WRITE)
- _dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte));
+ _dst_pte = pte_mkdirty(mk_pte(page, dst_vma->vm_page_prot));
+ if (dst_vma->vm_flags & VM_WRITE) {
+ if (wp_copy)
+ _dst_pte = pte_mkuffd_wp(_dst_pte);
+ else
+ _dst_pte = pte_mkwrite(_dst_pte);
+ }
dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
if (dst_vma->vm_file) {
@@ -276,10 +281,14 @@ retry:
BUG_ON(dst_addr >= dst_start + len);
/*
- * Serialize via hugetlb_fault_mutex
+ * Serialize via i_mmap_rwsem and hugetlb_fault_mutex.
+ * i_mmap_rwsem ensures the dst_pte remains valid even
+ * in the case of shared pmds. fault mutex prevents
+ * races with other faulting threads.
*/
- idx = linear_page_index(dst_vma, dst_addr);
mapping = dst_vma->vm_file->f_mapping;
+ i_mmap_lock_read(mapping);
+ idx = linear_page_index(dst_vma, dst_addr);
hash = hugetlb_fault_mutex_hash(mapping, idx);
mutex_lock(&hugetlb_fault_mutex_table[hash]);
@@ -287,6 +296,7 @@ retry:
dst_pte = huge_pte_alloc(dst_mm, dst_addr, vma_hpagesize);
if (!dst_pte) {
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
+ i_mmap_unlock_read(mapping);
goto out_unlock;
}
@@ -294,6 +304,7 @@ retry:
dst_pteval = huge_ptep_get(dst_pte);
if (!huge_pte_none(dst_pteval)) {
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
+ i_mmap_unlock_read(mapping);
goto out_unlock;
}
@@ -301,6 +312,7 @@ retry:
dst_addr, src_addr, &page);
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
+ i_mmap_unlock_read(mapping);
vm_alloc_shared = vm_shared;
cond_resched();
@@ -408,7 +420,8 @@ static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm,
unsigned long dst_addr,
unsigned long src_addr,
struct page **page,
- bool zeropage)
+ bool zeropage,
+ bool wp_copy)
{
ssize_t err;
@@ -425,11 +438,13 @@ static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm,
if (!(dst_vma->vm_flags & VM_SHARED)) {
if (!zeropage)
err = mcopy_atomic_pte(dst_mm, dst_pmd, dst_vma,
- dst_addr, src_addr, page);
+ dst_addr, src_addr, page,
+ wp_copy);
else
err = mfill_zeropage_pte(dst_mm, dst_pmd,
dst_vma, dst_addr);
} else {
+ VM_WARN_ON_ONCE(wp_copy);
if (!zeropage)
err = shmem_mcopy_atomic_pte(dst_mm, dst_pmd,
dst_vma, dst_addr,
@@ -447,7 +462,8 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
unsigned long src_start,
unsigned long len,
bool zeropage,
- bool *mmap_changing)
+ bool *mmap_changing,
+ __u64 mode)
{
struct vm_area_struct *dst_vma;
ssize_t err;
@@ -455,6 +471,7 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
unsigned long src_addr, dst_addr;
long copied;
struct page *page;
+ bool wp_copy;
/*
* Sanitize the command parameters:
@@ -501,6 +518,14 @@ retry:
goto out_unlock;
/*
+ * validate 'mode' now that we know the dst_vma: don't allow
+ * a wrprotect copy if the userfaultfd didn't register as WP.
+ */
+ wp_copy = mode & UFFDIO_COPY_MODE_WP;
+ if (wp_copy && !(dst_vma->vm_flags & VM_UFFD_WP))
+ goto out_unlock;
+
+ /*
* If this is a HUGETLB vma, pass off to appropriate routine
*/
if (is_vm_hugetlb_page(dst_vma))
@@ -555,7 +580,7 @@ retry:
BUG_ON(pmd_trans_huge(*dst_pmd));
err = mfill_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
- src_addr, &page, zeropage);
+ src_addr, &page, zeropage, wp_copy);
cond_resched();
if (unlikely(err == -ENOENT)) {
@@ -602,14 +627,68 @@ out:
ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
unsigned long src_start, unsigned long len,
- bool *mmap_changing)
+ bool *mmap_changing, __u64 mode)
{
return __mcopy_atomic(dst_mm, dst_start, src_start, len, false,
- mmap_changing);
+ mmap_changing, mode);
}
ssize_t mfill_zeropage(struct mm_struct *dst_mm, unsigned long start,
unsigned long len, bool *mmap_changing)
{
- return __mcopy_atomic(dst_mm, start, 0, len, true, mmap_changing);
+ return __mcopy_atomic(dst_mm, start, 0, len, true, mmap_changing, 0);
+}
+
+int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
+ unsigned long len, bool enable_wp, bool *mmap_changing)
+{
+ struct vm_area_struct *dst_vma;
+ pgprot_t newprot;
+ int err;
+
+ /*
+ * Sanitize the command parameters:
+ */
+ BUG_ON(start & ~PAGE_MASK);
+ BUG_ON(len & ~PAGE_MASK);
+
+ /* Does the address range wrap, or is the span zero-sized? */
+ BUG_ON(start + len <= start);
+
+ down_read(&dst_mm->mmap_sem);
+
+ /*
+ * If memory mappings are changing because of non-cooperative
+ * operation (e.g. mremap) running in parallel, bail out and
+ * request the user to retry later
+ */
+ err = -EAGAIN;
+ if (mmap_changing && READ_ONCE(*mmap_changing))
+ goto out_unlock;
+
+ err = -ENOENT;
+ dst_vma = find_dst_vma(dst_mm, start, len);
+ /*
+ * Make sure the vma is not shared, that the dst range is
+ * both valid and fully within a single existing vma.
+ */
+ if (!dst_vma || (dst_vma->vm_flags & VM_SHARED))
+ goto out_unlock;
+ if (!userfaultfd_wp(dst_vma))
+ goto out_unlock;
+ if (!vma_is_anonymous(dst_vma))
+ goto out_unlock;
+
+ if (enable_wp)
+ newprot = vm_get_page_prot(dst_vma->vm_flags & ~(VM_WRITE));
+ else
+ newprot = vm_get_page_prot(dst_vma->vm_flags);
+
+ change_protection(dst_vma, start, start + len, newprot,
+ enable_wp ? MM_CP_UFFD_WP : MM_CP_UFFD_WP_RESOLVE);
+
+ err = 0;
+out_unlock:
+ up_read(&dst_mm->mmap_sem);
+ return err;
}