summaryrefslogtreecommitdiff
path: root/mm/mprotect.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/mprotect.c')
-rw-r--r--mm/mprotect.c161
1 files changed, 97 insertions, 64 deletions
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 61cf60015a8b..1d4843c97c2a 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -80,13 +80,13 @@ bool can_change_pte_writable(struct vm_area_struct *vma, unsigned long addr,
return pte_dirty(pte);
}
-static unsigned long change_pte_range(struct mmu_gather *tlb,
+static long change_pte_range(struct mmu_gather *tlb,
struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr,
unsigned long end, pgprot_t newprot, unsigned long cp_flags)
{
pte_t *pte, oldpte;
spinlock_t *ptl;
- unsigned long pages = 0;
+ long pages = 0;
int target_node = NUMA_NO_NODE;
bool prot_numa = cp_flags & MM_CP_PROT_NUMA;
bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
@@ -177,12 +177,10 @@ static unsigned long change_pte_range(struct mmu_gather *tlb,
oldpte = ptep_modify_prot_start(vma, addr, pte);
ptent = pte_modify(oldpte, newprot);
- if (uffd_wp) {
- ptent = pte_wrprotect(ptent);
+ if (uffd_wp)
ptent = pte_mkuffd_wp(ptent);
- } else if (uffd_wp_resolve) {
+ else if (uffd_wp_resolve)
ptent = pte_clear_uffd_wp(ptent);
- }
/*
* In some writable, shared mappings, we might want
@@ -332,36 +330,42 @@ uffd_wp_protect_file(struct vm_area_struct *vma, unsigned long cp_flags)
/*
* If wr-protecting the range for file-backed, populate pgtable for the case
* when pgtable is empty but page cache exists. When {pte|pmd|...}_alloc()
- * failed it means no memory, we don't have a better option but stop.
+ * failed we treat it the same way as pgtable allocation failures during
+ * page faults by kicking OOM and returning error.
*/
#define change_pmd_prepare(vma, pmd, cp_flags) \
- do { \
+ ({ \
+ long err = 0; \
if (unlikely(uffd_wp_protect_file(vma, cp_flags))) { \
- if (WARN_ON_ONCE(pte_alloc(vma->vm_mm, pmd))) \
- break; \
+ if (pte_alloc(vma->vm_mm, pmd)) \
+ err = -ENOMEM; \
} \
- } while (0)
+ err; \
+ })
+
/*
* This is the general pud/p4d/pgd version of change_pmd_prepare(). We need to
* have separate change_pmd_prepare() because pte_alloc() returns 0 on success,
* while {pmd|pud|p4d}_alloc() returns the valid pointer on success.
*/
#define change_prepare(vma, high, low, addr, cp_flags) \
- do { \
+ ({ \
+ long err = 0; \
if (unlikely(uffd_wp_protect_file(vma, cp_flags))) { \
low##_t *p = low##_alloc(vma->vm_mm, high, addr); \
- if (WARN_ON_ONCE(p == NULL)) \
- break; \
+ if (p == NULL) \
+ err = -ENOMEM; \
} \
- } while (0)
+ err; \
+ })
-static inline unsigned long change_pmd_range(struct mmu_gather *tlb,
+static inline long change_pmd_range(struct mmu_gather *tlb,
struct vm_area_struct *vma, pud_t *pud, unsigned long addr,
unsigned long end, pgprot_t newprot, unsigned long cp_flags)
{
pmd_t *pmd;
unsigned long next;
- unsigned long pages = 0;
+ long pages = 0;
unsigned long nr_huge_updates = 0;
struct mmu_notifier_range range;
@@ -369,11 +373,15 @@ static inline unsigned long change_pmd_range(struct mmu_gather *tlb,
pmd = pmd_offset(pud, addr);
do {
- unsigned long this_pages;
+ long ret;
next = pmd_addr_end(addr, end);
- change_pmd_prepare(vma, pmd, cp_flags);
+ ret = change_pmd_prepare(vma, pmd, cp_flags);
+ if (ret) {
+ pages = ret;
+ break;
+ }
/*
* Automatic NUMA balancing walks the tables with mmap_lock
* held for read. It's possible a parallel update to occur
@@ -390,7 +398,7 @@ static inline unsigned long change_pmd_range(struct mmu_gather *tlb,
if (!range.start) {
mmu_notifier_range_init(&range,
MMU_NOTIFY_PROTECTION_VMA, 0,
- vma, vma->vm_mm, addr, end);
+ vma->vm_mm, addr, end);
mmu_notifier_invalidate_range_start(&range);
}
@@ -403,7 +411,11 @@ static inline unsigned long change_pmd_range(struct mmu_gather *tlb,
* cleared; make sure pmd populated if
* necessary, then fall-through to pte level.
*/
- change_pmd_prepare(vma, pmd, cp_flags);
+ ret = change_pmd_prepare(vma, pmd, cp_flags);
+ if (ret) {
+ pages = ret;
+ break;
+ }
} else {
/*
* change_huge_pmd() does not defer TLB flushes,
@@ -424,9 +436,8 @@ static inline unsigned long change_pmd_range(struct mmu_gather *tlb,
}
/* fall through, the trans huge pmd just split */
}
- this_pages = change_pte_range(tlb, vma, pmd, addr, next,
- newprot, cp_flags);
- pages += this_pages;
+ pages += change_pte_range(tlb, vma, pmd, addr, next,
+ newprot, cp_flags);
next:
cond_resched();
} while (pmd++, addr = next, addr != end);
@@ -439,18 +450,20 @@ next:
return pages;
}
-static inline unsigned long change_pud_range(struct mmu_gather *tlb,
+static inline long change_pud_range(struct mmu_gather *tlb,
struct vm_area_struct *vma, p4d_t *p4d, unsigned long addr,
unsigned long end, pgprot_t newprot, unsigned long cp_flags)
{
pud_t *pud;
unsigned long next;
- unsigned long pages = 0;
+ long pages = 0, ret;
pud = pud_offset(p4d, addr);
do {
next = pud_addr_end(addr, end);
- change_prepare(vma, pud, pmd, addr, cp_flags);
+ ret = change_prepare(vma, pud, pmd, addr, cp_flags);
+ if (ret)
+ return ret;
if (pud_none_or_clear_bad(pud))
continue;
pages += change_pmd_range(tlb, vma, pud, addr, next, newprot,
@@ -460,18 +473,20 @@ static inline unsigned long change_pud_range(struct mmu_gather *tlb,
return pages;
}
-static inline unsigned long change_p4d_range(struct mmu_gather *tlb,
+static inline long change_p4d_range(struct mmu_gather *tlb,
struct vm_area_struct *vma, pgd_t *pgd, unsigned long addr,
unsigned long end, pgprot_t newprot, unsigned long cp_flags)
{
p4d_t *p4d;
unsigned long next;
- unsigned long pages = 0;
+ long pages = 0, ret;
p4d = p4d_offset(pgd, addr);
do {
next = p4d_addr_end(addr, end);
- change_prepare(vma, p4d, pud, addr, cp_flags);
+ ret = change_prepare(vma, p4d, pud, addr, cp_flags);
+ if (ret)
+ return ret;
if (p4d_none_or_clear_bad(p4d))
continue;
pages += change_pud_range(tlb, vma, p4d, addr, next, newprot,
@@ -481,21 +496,25 @@ static inline unsigned long change_p4d_range(struct mmu_gather *tlb,
return pages;
}
-static unsigned long change_protection_range(struct mmu_gather *tlb,
+static long change_protection_range(struct mmu_gather *tlb,
struct vm_area_struct *vma, unsigned long addr,
unsigned long end, pgprot_t newprot, unsigned long cp_flags)
{
struct mm_struct *mm = vma->vm_mm;
pgd_t *pgd;
unsigned long next;
- unsigned long pages = 0;
+ long pages = 0, ret;
BUG_ON(addr >= end);
pgd = pgd_offset(mm, addr);
tlb_start_vma(tlb, vma);
do {
next = pgd_addr_end(addr, end);
- change_prepare(vma, pgd, p4d, addr, cp_flags);
+ ret = change_prepare(vma, pgd, p4d, addr, cp_flags);
+ if (ret) {
+ pages = ret;
+ break;
+ }
if (pgd_none_or_clear_bad(pgd))
continue;
pages += change_p4d_range(tlb, vma, pgd, addr, next, newprot,
@@ -507,15 +526,27 @@ static unsigned long change_protection_range(struct mmu_gather *tlb,
return pages;
}
-unsigned long change_protection(struct mmu_gather *tlb,
+long change_protection(struct mmu_gather *tlb,
struct vm_area_struct *vma, unsigned long start,
- unsigned long end, pgprot_t newprot,
- unsigned long cp_flags)
+ unsigned long end, unsigned long cp_flags)
{
- unsigned long pages;
+ pgprot_t newprot = vma->vm_page_prot;
+ long pages;
BUG_ON((cp_flags & MM_CP_UFFD_WP_ALL) == MM_CP_UFFD_WP_ALL);
+#ifdef CONFIG_NUMA_BALANCING
+ /*
+ * Ordinary protection updates (mprotect, uffd-wp, softdirty tracking)
+ * are expected to reflect their requirements via VMA flags such that
+ * vma_set_page_prot() will adjust vma->vm_page_prot accordingly.
+ */
+ if (cp_flags & MM_CP_PROT_NUMA)
+ newprot = PAGE_NONE;
+#else
+ WARN_ON_ONCE(cp_flags & MM_CP_PROT_NUMA);
+#endif
+
if (is_vm_hugetlb_page(vma))
pages = hugetlb_change_protection(vma, start, end, newprot,
cp_flags);
@@ -554,9 +585,9 @@ static const struct mm_walk_ops prot_none_walk_ops = {
};
int
-mprotect_fixup(struct mmu_gather *tlb, struct vm_area_struct *vma,
- struct vm_area_struct **pprev, unsigned long start,
- unsigned long end, unsigned long newflags)
+mprotect_fixup(struct vma_iterator *vmi, struct mmu_gather *tlb,
+ struct vm_area_struct *vma, struct vm_area_struct **pprev,
+ unsigned long start, unsigned long end, unsigned long newflags)
{
struct mm_struct *mm = vma->vm_mm;
unsigned long oldflags = vma->vm_flags;
@@ -611,7 +642,7 @@ mprotect_fixup(struct mmu_gather *tlb, struct vm_area_struct *vma,
* First try to merge with previous and/or next vma.
*/
pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
- *pprev = vma_merge(mm, *pprev, start, end, newflags,
+ *pprev = vma_merge(vmi, mm, *pprev, start, end, newflags,
vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
vma->vm_userfaultfd_ctx, anon_vma_name(vma));
if (*pprev) {
@@ -623,13 +654,13 @@ mprotect_fixup(struct mmu_gather *tlb, struct vm_area_struct *vma,
*pprev = vma;
if (start != vma->vm_start) {
- error = split_vma(mm, vma, start, 1);
+ error = split_vma(vmi, vma, start, 1);
if (error)
goto fail;
}
if (end != vma->vm_end) {
- error = split_vma(mm, vma, end, 0);
+ error = split_vma(vmi, vma, end, 0);
if (error)
goto fail;
}
@@ -639,12 +670,12 @@ success:
* vm_flags and vm_page_prot are protected by the mmap_lock
* held in write mode.
*/
- vma->vm_flags = newflags;
+ vm_flags_reset(vma, newflags);
if (vma_wants_manual_pte_write_upgrade(vma))
mm_cp_flags |= MM_CP_TRY_CHANGE_WRITABLE;
vma_set_page_prot(vma);
- change_protection(tlb, vma, start, end, vma->vm_page_prot, mm_cp_flags);
+ change_protection(tlb, vma, start, end, mm_cp_flags);
/*
* Private VM_LOCKED VMA becoming writable: trigger COW to avoid major
@@ -678,7 +709,7 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
const bool rier = (current->personality & READ_IMPLIES_EXEC) &&
(prot & PROT_READ);
struct mmu_gather tlb;
- MA_STATE(mas, &current->mm->mm_mt, 0, 0);
+ struct vma_iterator vmi;
start = untagged_addr(start);
@@ -710,8 +741,8 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
if ((pkey != -1) && !mm_pkey_is_allocated(current->mm, pkey))
goto out;
- mas_set(&mas, start);
- vma = mas_find(&mas, ULONG_MAX);
+ vma_iter_init(&vmi, current->mm, start);
+ vma = vma_find(&vmi, end);
error = -ENOMEM;
if (!vma)
goto out;
@@ -734,18 +765,22 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
}
}
+ prev = vma_prev(&vmi);
if (start > vma->vm_start)
prev = vma;
- else
- prev = mas_prev(&mas, 0);
tlb_gather_mmu(&tlb, current->mm);
- for (nstart = start ; ; ) {
+ nstart = start;
+ tmp = vma->vm_start;
+ for_each_vma_range(vmi, vma, end) {
unsigned long mask_off_old_flags;
unsigned long newflags;
int new_vma_pkey;
- /* Here we know that vma->vm_start <= nstart < vma->vm_end. */
+ if (vma->vm_start != tmp) {
+ error = -ENOMEM;
+ break;
+ }
/* Does the application expect PROT_READ to imply PROT_EXEC */
if (rier && (vma->vm_flags & VM_MAYEXEC))
@@ -768,6 +803,11 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
break;
}
+ if (map_deny_write_exec(vma, newflags)) {
+ error = -EACCES;
+ goto out;
+ }
+
/* Allow architectures to sanity-check the new flags */
if (!arch_validate_flags(newflags)) {
error = -EINVAL;
@@ -788,25 +828,18 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
break;
}
- error = mprotect_fixup(&tlb, vma, &prev, nstart, tmp, newflags);
+ error = mprotect_fixup(&vmi, &tlb, vma, &prev, nstart, tmp, newflags);
if (error)
break;
nstart = tmp;
-
- if (nstart < prev->vm_end)
- nstart = prev->vm_end;
- if (nstart >= end)
- break;
-
- vma = find_vma(current->mm, prev->vm_end);
- if (!vma || vma->vm_start != nstart) {
- error = -ENOMEM;
- break;
- }
prot = reqprot;
}
tlb_finish_mmu(&tlb);
+
+ if (vma_iter_end(&vmi) < end)
+ error = -ENOMEM;
+
out:
mmap_write_unlock(current->mm);
return error;