summaryrefslogtreecommitdiff
path: root/mm/mprotect.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/mprotect.c')
-rw-r--r--mm/mprotect.c109
1 files changed, 66 insertions, 43 deletions
diff --git a/mm/mprotect.c b/mm/mprotect.c
index f8a4544b4601..88608d0dc2c2 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -32,6 +32,7 @@
#include <linux/sched/sysctl.h>
#include <linux/userfaultfd_k.h>
#include <linux/memory-tiers.h>
+#include <uapi/linux/mman.h>
#include <asm/cacheflush.h>
#include <asm/mmu_context.h>
#include <asm/tlbflush.h>
@@ -52,7 +53,7 @@ bool can_change_pte_writable(struct vm_area_struct *vma, unsigned long addr,
return false;
/* Do we need write faults for softdirty tracking? */
- if (vma_soft_dirty_enabled(vma) && !pte_soft_dirty(pte))
+ if (pte_needs_soft_dirty_wp(vma, pte))
return false;
/* Do we need write faults for uffd-wp tracking? */
@@ -70,6 +71,8 @@ bool can_change_pte_writable(struct vm_area_struct *vma, unsigned long addr,
return page && PageAnon(page) && PageAnonExclusive(page);
}
+ VM_WARN_ON_ONCE(is_zero_pfn(pte_pfn(pte)) && pte_dirty(pte));
+
/*
* Writable MAP_SHARED mapping: "clean" might indicate that the FS still
* needs a real write-fault for writenotify
@@ -129,7 +132,8 @@ static long change_pte_range(struct mmu_gather *tlb,
/* Also skip shared copy-on-write pages */
if (is_cow_mapping(vma->vm_flags) &&
- folio_ref_count(folio) != 1)
+ (folio_maybe_dma_pinned(folio) ||
+ folio_maybe_mapped_shared(folio)))
continue;
/*
@@ -157,8 +161,7 @@ static long change_pte_range(struct mmu_gather *tlb,
if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) &&
toptier)
continue;
- if (sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING &&
- !toptier)
+ if (folio_use_access_time(folio))
folio_xchg_access_time(folio,
jiffies_to_msecs(jiffies));
}
@@ -222,20 +225,14 @@ static long change_pte_range(struct mmu_gather *tlb,
newpte = swp_entry_to_pte(entry);
if (pte_swp_uffd_wp(oldpte))
newpte = pte_swp_mkuffd_wp(newpte);
- } else if (is_writable_device_exclusive_entry(entry)) {
- entry = make_readable_device_exclusive_entry(
- swp_offset(entry));
- newpte = swp_entry_to_pte(entry);
- if (pte_swp_soft_dirty(oldpte))
- newpte = pte_swp_mksoft_dirty(newpte);
- if (pte_swp_uffd_wp(oldpte))
- newpte = pte_swp_mkuffd_wp(newpte);
} else if (is_pte_marker_entry(entry)) {
/*
* Ignore error swap entries unconditionally,
- * because any access should sigbus anyway.
+ * because any access should sigbus/sigsegv
+ * anyway.
*/
- if (is_poisoned_swp_entry(entry))
+ if (is_poisoned_swp_entry(entry) ||
+ is_guard_swp_entry(entry))
continue;
/*
* If this is uffd-wp pte marker and we'd like
@@ -299,8 +296,9 @@ pgtable_split_needed(struct vm_area_struct *vma, unsigned long cp_flags)
{
/*
* pte markers only resides in pte level, if we need pte markers,
- * we need to split. We cannot wr-protect shmem thp because file
- * thp is handled differently when split by erasing the pmd so far.
+ * we need to split. For example, we cannot wr-protect a file thp
+ * (e.g. 2M shmem) because file thp is handled differently when
+ * split by erasing the pmd so far.
*/
return (cp_flags & MM_CP_UFFD_WP) && !vma_is_anonymous(vma);
}
@@ -360,9 +358,6 @@ static inline long change_pmd_range(struct mmu_gather *tlb,
unsigned long next;
long pages = 0;
unsigned long nr_huge_updates = 0;
- struct mmu_notifier_range range;
-
- range.start = 0;
pmd = pmd_offset(pud, addr);
do {
@@ -380,19 +375,11 @@ again:
if (pmd_none(*pmd))
goto next;
- /* invoke the mmu notifier if the pmd is populated */
- if (!range.start) {
- mmu_notifier_range_init(&range,
- MMU_NOTIFY_PROTECTION_VMA, 0,
- vma->vm_mm, addr, end);
- mmu_notifier_invalidate_range_start(&range);
- }
-
_pmd = pmdp_get_lockless(pmd);
if (is_swap_pmd(_pmd) || pmd_trans_huge(_pmd) || pmd_devmap(_pmd)) {
if ((next - addr != HPAGE_PMD_SIZE) ||
pgtable_split_needed(vma, cp_flags)) {
- __split_huge_pmd(vma, pmd, addr, false, NULL);
+ __split_huge_pmd(vma, pmd, addr, false);
/*
* For file-backed, the pmd could have been
* cleared; make sure pmd populated if
@@ -428,9 +415,6 @@ next:
cond_resched();
} while (pmd++, addr = next, addr != end);
- if (range.start)
- mmu_notifier_invalidate_range_end(&range);
-
if (nr_huge_updates)
count_vm_numa_events(NUMA_HUGE_PTE_UPDATES, nr_huge_updates);
return pages;
@@ -440,21 +424,57 @@ static inline long change_pud_range(struct mmu_gather *tlb,
struct vm_area_struct *vma, p4d_t *p4d, unsigned long addr,
unsigned long end, pgprot_t newprot, unsigned long cp_flags)
{
- pud_t *pud;
+ struct mmu_notifier_range range;
+ pud_t *pudp, pud;
unsigned long next;
long pages = 0, ret;
- pud = pud_offset(p4d, addr);
+ range.start = 0;
+
+ pudp = pud_offset(p4d, addr);
do {
+again:
next = pud_addr_end(addr, end);
- ret = change_prepare(vma, pud, pmd, addr, cp_flags);
- if (ret)
- return ret;
- if (pud_none_or_clear_bad(pud))
+ ret = change_prepare(vma, pudp, pmd, addr, cp_flags);
+ if (ret) {
+ pages = ret;
+ break;
+ }
+
+ pud = READ_ONCE(*pudp);
+ if (pud_none(pud))
continue;
- pages += change_pmd_range(tlb, vma, pud, addr, next, newprot,
+
+ if (!range.start) {
+ mmu_notifier_range_init(&range,
+ MMU_NOTIFY_PROTECTION_VMA, 0,
+ vma->vm_mm, addr, end);
+ mmu_notifier_invalidate_range_start(&range);
+ }
+
+ if (pud_leaf(pud)) {
+ if ((next - addr != PUD_SIZE) ||
+ pgtable_split_needed(vma, cp_flags)) {
+ __split_huge_pud(vma, pudp, addr);
+ goto again;
+ } else {
+ ret = change_huge_pud(tlb, vma, pudp,
+ addr, newprot, cp_flags);
+ if (ret == 0)
+ goto again;
+ /* huge pud was handled */
+ if (ret == HPAGE_PUD_NR)
+ pages += HPAGE_PUD_NR;
+ continue;
+ }
+ }
+
+ pages += change_pmd_range(tlb, vma, pudp, addr, next, newprot,
cp_flags);
- } while (pud++, addr = next, addr != end);
+ } while (pudp++, addr = next, addr != end);
+
+ if (range.start)
+ mmu_notifier_invalidate_range_end(&range);
return pages;
}
@@ -579,12 +599,15 @@ mprotect_fixup(struct vma_iterator *vmi, struct mmu_gather *tlb,
unsigned long start, unsigned long end, unsigned long newflags)
{
struct mm_struct *mm = vma->vm_mm;
- unsigned long oldflags = vma->vm_flags;
+ unsigned long oldflags = READ_ONCE(vma->vm_flags);
long nrpages = (end - start) >> PAGE_SHIFT;
unsigned int mm_cp_flags = 0;
unsigned long charged = 0;
int error;
+ if (!can_modify_vma(vma))
+ return -EPERM;
+
if (newflags == oldflags) {
*pprev = vma;
return 0;
@@ -596,7 +619,7 @@ mprotect_fixup(struct vma_iterator *vmi, struct mmu_gather *tlb,
* uncommon case, so doesn't need to be very optimized.
*/
if (arch_has_pfn_modify_check() &&
- (vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) &&
+ (oldflags & (VM_PFNMAP|VM_MIXEDMAP)) &&
(newflags & VM_ACCESS_FLAGS) == 0) {
pgprot_t new_pgprot = vm_get_page_prot(newflags);
@@ -645,7 +668,7 @@ mprotect_fixup(struct vma_iterator *vmi, struct mmu_gather *tlb,
* held in write mode.
*/
vma_start_write(vma);
- vm_flags_reset(vma, newflags);
+ vm_flags_reset_once(vma, newflags);
if (vma_wants_manual_pte_write_upgrade(vma))
mm_cp_flags |= MM_CP_TRY_CHANGE_WRITABLE;
vma_set_page_prot(vma);
@@ -781,7 +804,7 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
break;
}
- if (map_deny_write_exec(vma, newflags)) {
+ if (map_deny_write_exec(vma->vm_flags, newflags)) {
error = -EACCES;
break;
}