summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/gup.c58
-rw-r--r--mm/internal.h3
-rw-r--r--mm/madvise.c66
3 files changed, 127 insertions, 0 deletions
diff --git a/mm/gup.c b/mm/gup.c
index 8651309f8ec3..728d996767cb 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1501,6 +1501,64 @@ long populate_vma_page_range(struct vm_area_struct *vma,
}
/*
+ * faultin_vma_page_range() - populate (prefault) page tables inside the
+ * given VMA range readable/writable
+ *
+ * This takes care of mlocking the pages, too, if VM_LOCKED is set.
+ *
+ * @vma: target vma
+ * @start: start address
+ * @end: end address
+ * @write: whether to prefault readable or writable
+ * @locked: whether the mmap_lock is still held
+ *
+ * Returns either number of processed pages in the vma, or a negative error
+ * code on error (see __get_user_pages()).
+ *
+ * vma->vm_mm->mmap_lock must be held. The range must be page-aligned and
+ * covered by the VMA.
+ *
+ * If @locked is NULL, it may be held for read or write and will be unperturbed.
+ *
+ * If @locked is non-NULL, it must held for read only and may be released. If
+ * it's released, *@locked will be set to 0.
+ */
+long faultin_vma_page_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end, bool write, int *locked)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ unsigned long nr_pages = (end - start) / PAGE_SIZE;
+ int gup_flags;
+
+ VM_BUG_ON(!PAGE_ALIGNED(start));
+ VM_BUG_ON(!PAGE_ALIGNED(end));
+ VM_BUG_ON_VMA(start < vma->vm_start, vma);
+ VM_BUG_ON_VMA(end > vma->vm_end, vma);
+ mmap_assert_locked(mm);
+
+ /*
+ * FOLL_TOUCH: Mark page accessed and thereby young; will also mark
+ * the page dirty with FOLL_WRITE -- which doesn't make a
+ * difference with !FOLL_FORCE, because the page is writable
+ * in the page table.
+ * FOLL_HWPOISON: Return -EHWPOISON instead of -EFAULT when we hit
+ * a poisoned page.
+ * FOLL_POPULATE: Always populate memory with VM_LOCKONFAULT.
+ * !FOLL_FORCE: Require proper access permissions.
+ */
+ gup_flags = FOLL_TOUCH | FOLL_POPULATE | FOLL_MLOCK | FOLL_HWPOISON;
+ if (write)
+ gup_flags |= FOLL_WRITE;
+
+ /*
+ * See check_vma_flags(): Will return -EFAULT on incompatible mappings
+ * or with insufficient permissions.
+ */
+ return __get_user_pages(mm, start, nr_pages, gup_flags,
+ NULL, NULL, locked);
+}
+
+/*
* __mm_populate - populate and/or mlock pages within a range of address space.
*
* This is used to implement mlock() and the MAP_POPULATE / MAP_LOCKED mmap
diff --git a/mm/internal.h b/mm/internal.h
index d9c2b33cd2ab..f38bc5d75ab7 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -345,6 +345,9 @@ void __vma_unlink_list(struct mm_struct *mm, struct vm_area_struct *vma);
#ifdef CONFIG_MMU
extern long populate_vma_page_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end, int *locked);
+extern long faultin_vma_page_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ bool write, int *locked);
extern void munlock_vma_pages_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end);
static inline void munlock_vma_pages_all(struct vm_area_struct *vma)
diff --git a/mm/madvise.c b/mm/madvise.c
index 63e489e5bfdb..6d3d348b17f4 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -53,6 +53,8 @@ static int madvise_need_mmap_write(int behavior)
case MADV_COLD:
case MADV_PAGEOUT:
case MADV_FREE:
+ case MADV_POPULATE_READ:
+ case MADV_POPULATE_WRITE:
return 0;
default:
/* be safe, default to 1. list exceptions explicitly */
@@ -822,6 +824,61 @@ static long madvise_dontneed_free(struct vm_area_struct *vma,
return -EINVAL;
}
+static long madvise_populate(struct vm_area_struct *vma,
+ struct vm_area_struct **prev,
+ unsigned long start, unsigned long end,
+ int behavior)
+{
+ const bool write = behavior == MADV_POPULATE_WRITE;
+ struct mm_struct *mm = vma->vm_mm;
+ unsigned long tmp_end;
+ int locked = 1;
+ long pages;
+
+ *prev = vma;
+
+ while (start < end) {
+ /*
+ * We might have temporarily dropped the lock. For example,
+ * our VMA might have been split.
+ */
+ if (!vma || start >= vma->vm_end) {
+ vma = find_vma(mm, start);
+ if (!vma || start < vma->vm_start)
+ return -ENOMEM;
+ }
+
+ tmp_end = min_t(unsigned long, end, vma->vm_end);
+ /* Populate (prefault) page tables readable/writable. */
+ pages = faultin_vma_page_range(vma, start, tmp_end, write,
+ &locked);
+ if (!locked) {
+ mmap_read_lock(mm);
+ locked = 1;
+ *prev = NULL;
+ vma = NULL;
+ }
+ if (pages < 0) {
+ switch (pages) {
+ case -EINTR:
+ return -EINTR;
+ case -EFAULT: /* Incompatible mappings / permissions. */
+ return -EINVAL;
+ case -EHWPOISON:
+ return -EHWPOISON;
+ default:
+ pr_warn_once("%s: unhandled return value: %ld\n",
+ __func__, pages);
+ fallthrough;
+ case -ENOMEM:
+ return -ENOMEM;
+ }
+ }
+ start += pages * PAGE_SIZE;
+ }
+ return 0;
+}
+
/*
* Application wants to free up the pages and associated backing store.
* This is effectively punching a hole into the middle of a file.
@@ -935,6 +992,9 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
case MADV_FREE:
case MADV_DONTNEED:
return madvise_dontneed_free(vma, prev, start, end, behavior);
+ case MADV_POPULATE_READ:
+ case MADV_POPULATE_WRITE:
+ return madvise_populate(vma, prev, start, end, behavior);
default:
return madvise_behavior(vma, prev, start, end, behavior);
}
@@ -955,6 +1015,8 @@ madvise_behavior_valid(int behavior)
case MADV_FREE:
case MADV_COLD:
case MADV_PAGEOUT:
+ case MADV_POPULATE_READ:
+ case MADV_POPULATE_WRITE:
#ifdef CONFIG_KSM
case MADV_MERGEABLE:
case MADV_UNMERGEABLE:
@@ -1042,6 +1104,10 @@ process_madvise_behavior_valid(int behavior)
* easily if memory pressure happens.
* MADV_PAGEOUT - the application is not expected to use this memory soon,
* page out the pages in this range immediately.
+ * MADV_POPULATE_READ - populate (prefault) page tables readable by
+ * triggering read faults if required
+ * MADV_POPULATE_WRITE - populate (prefault) page tables writable by
+ * triggering write faults if required
*
* return values:
* zero - success