summaryrefslogtreecommitdiff
path: root/include/linux/mm.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/mm.h')
-rw-r--r--include/linux/mm.h542
1 files changed, 265 insertions, 277 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 716d30d93616..1f79667824eb 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -282,7 +282,12 @@ extern unsigned int kobjsize(const void *objp);
#define VM_MAYSHARE 0x00000080
#define VM_GROWSDOWN 0x00000100 /* general info on the segment */
+#ifdef CONFIG_MMU
#define VM_UFFD_MISSING 0x00000200 /* missing pages tracking */
+#else /* CONFIG_MMU */
+#define VM_MAYOVERLAY 0x00000200 /* nommu: R/O MAP_PRIVATE mapping that might overlay a file mapping */
+#define VM_UFFD_MISSING 0
+#endif /* CONFIG_MMU */
#define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */
#define VM_UFFD_WP 0x00001000 /* wrprotect pages tracking */
@@ -422,8 +427,8 @@ extern unsigned int kobjsize(const void *objp);
/* This mask defines which mm->def_flags a process can inherit its parent */
#define VM_INIT_DEF_MASK VM_NOHUGEPAGE
-/* This mask is used to clear all the VMA flags used by mlock */
-#define VM_LOCKED_CLEAR_MASK (~(VM_LOCKED | VM_LOCKONFAULT))
+/* This mask represents all the VMA flag bits used by mlock */
+#define VM_LOCKED_MASK (VM_LOCKED | VM_LOCKONFAULT)
/* Arch-specific flags to clear when updating VM flags on protection change */
#ifndef VM_ARCH_CLEAR
@@ -628,6 +633,63 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
INIT_LIST_HEAD(&vma->anon_vma_chain);
}
+/* Use when VMA is not part of the VMA tree and needs no locking */
+static inline void vm_flags_init(struct vm_area_struct *vma,
+ vm_flags_t flags)
+{
+ ACCESS_PRIVATE(vma, __vm_flags) = flags;
+}
+
+/* Use when VMA is part of the VMA tree and modifications need coordination */
+static inline void vm_flags_reset(struct vm_area_struct *vma,
+ vm_flags_t flags)
+{
+ mmap_assert_write_locked(vma->vm_mm);
+ vm_flags_init(vma, flags);
+}
+
+static inline void vm_flags_reset_once(struct vm_area_struct *vma,
+ vm_flags_t flags)
+{
+ mmap_assert_write_locked(vma->vm_mm);
+ WRITE_ONCE(ACCESS_PRIVATE(vma, __vm_flags), flags);
+}
+
+static inline void vm_flags_set(struct vm_area_struct *vma,
+ vm_flags_t flags)
+{
+ mmap_assert_write_locked(vma->vm_mm);
+ ACCESS_PRIVATE(vma, __vm_flags) |= flags;
+}
+
+static inline void vm_flags_clear(struct vm_area_struct *vma,
+ vm_flags_t flags)
+{
+ mmap_assert_write_locked(vma->vm_mm);
+ ACCESS_PRIVATE(vma, __vm_flags) &= ~flags;
+}
+
+/*
+ * Use only if VMA is not part of the VMA tree or has no other users and
+ * therefore needs no locking.
+ */
+static inline void __vm_flags_mod(struct vm_area_struct *vma,
+ vm_flags_t set, vm_flags_t clear)
+{
+ vm_flags_init(vma, (vma->vm_flags | set) & ~clear);
+}
+
+/*
+ * Use only when the order of set/clear operations is unimportant, otherwise
+ * use vm_flags_{set|clear} explicitly.
+ */
+static inline void vm_flags_mod(struct vm_area_struct *vma,
+ vm_flags_t set, vm_flags_t clear)
+{
+ mmap_assert_write_locked(vma->vm_mm);
+ __vm_flags_mod(vma, set, clear);
+}
+
static inline void vma_set_anonymous(struct vm_area_struct *vma)
{
vma->vm_ops = NULL;
@@ -671,16 +733,16 @@ static inline bool vma_is_accessible(struct vm_area_struct *vma)
static inline
struct vm_area_struct *vma_find(struct vma_iterator *vmi, unsigned long max)
{
- return mas_find(&vmi->mas, max);
+ return mas_find(&vmi->mas, max - 1);
}
static inline struct vm_area_struct *vma_next(struct vma_iterator *vmi)
{
/*
- * Uses vma_find() to get the first VMA when the iterator starts.
+ * Uses mas_find() to get the first VMA when the iterator starts.
* Calling mas_next() could skip the first entry.
*/
- return vma_find(vmi, ULONG_MAX);
+ return mas_find(&vmi->mas, ULONG_MAX);
}
static inline struct vm_area_struct *vma_prev(struct vma_iterator *vmi)
@@ -693,12 +755,50 @@ static inline unsigned long vma_iter_addr(struct vma_iterator *vmi)
return vmi->mas.index;
}
+static inline unsigned long vma_iter_end(struct vma_iterator *vmi)
+{
+ return vmi->mas.last + 1;
+}
+static inline int vma_iter_bulk_alloc(struct vma_iterator *vmi,
+ unsigned long count)
+{
+ return mas_expected_entries(&vmi->mas, count);
+}
+
+/* Free any unused preallocations */
+static inline void vma_iter_free(struct vma_iterator *vmi)
+{
+ mas_destroy(&vmi->mas);
+}
+
+static inline int vma_iter_bulk_store(struct vma_iterator *vmi,
+ struct vm_area_struct *vma)
+{
+ vmi->mas.index = vma->vm_start;
+ vmi->mas.last = vma->vm_end - 1;
+ mas_store(&vmi->mas, vma);
+ if (unlikely(mas_is_err(&vmi->mas)))
+ return -ENOMEM;
+
+ return 0;
+}
+
+static inline void vma_iter_invalidate(struct vma_iterator *vmi)
+{
+ mas_pause(&vmi->mas);
+}
+
+static inline void vma_iter_set(struct vma_iterator *vmi, unsigned long addr)
+{
+ mas_set(&vmi->mas, addr);
+}
+
#define for_each_vma(__vmi, __vma) \
while (((__vma) = vma_next(&(__vmi))) != NULL)
/* The MM code likes to work with exclusive end addresses */
#define for_each_vma_range(__vmi, __vma, __end) \
- while (((__vma) = vma_find(&(__vmi), (__end) - 1)) != NULL)
+ while (((__vma) = vma_find(&(__vmi), (__end))) != NULL)
#ifdef CONFIG_SHMEM
/*
@@ -720,11 +820,20 @@ int vma_is_stack_for_current(struct vm_area_struct *vma);
struct mmu_gather;
struct inode;
+/*
+ * compound_order() can be called without holding a reference, which means
+ * that niceties like page_folio() don't work. These callers should be
+ * prepared to handle wild return values. For example, PG_head may be
+ * set before _folio_order is initialised, or this may be a tail page.
+ * See compaction.c for some good examples.
+ */
static inline unsigned int compound_order(struct page *page)
{
- if (!PageHead(page))
+ struct folio *folio = (struct folio *)page;
+
+ if (!test_bit(PG_head, &folio->flags))
return 0;
- return page[1].compound_order;
+ return folio->_folio_order;
}
/**
@@ -783,6 +892,13 @@ static inline bool get_page_unless_zero(struct page *page)
return page_ref_add_unless(page, 1, 0);
}
+static inline struct folio *folio_get_nontail_page(struct page *page)
+{
+ if (unlikely(!get_page_unless_zero(page)))
+ return NULL;
+ return (struct folio *)page;
+}
+
extern int page_is_ram(unsigned long pfn);
enum {
@@ -832,34 +948,7 @@ static inline int is_vmalloc_or_module_addr(const void *x)
static inline int folio_entire_mapcount(struct folio *folio)
{
VM_BUG_ON_FOLIO(!folio_test_large(folio), folio);
- return atomic_read(folio_mapcount_ptr(folio)) + 1;
-}
-
-/*
- * Mapcount of compound page as a whole, does not include mapped sub-pages.
- * Must be called only on head of compound page.
- */
-static inline int head_compound_mapcount(struct page *head)
-{
- return atomic_read(compound_mapcount_ptr(head)) + 1;
-}
-
-/*
- * If a 16GB hugetlb page were mapped by PTEs of all of its 4kB sub-pages,
- * its subpages_mapcount would be 0x400000: choose the COMPOUND_MAPPED bit
- * above that range, instead of 2*(PMD_SIZE/PAGE_SIZE). Hugetlb currently
- * leaves subpages_mapcount at 0, but avoid surprise if it participates later.
- */
-#define COMPOUND_MAPPED 0x800000
-#define SUBPAGES_MAPPED (COMPOUND_MAPPED - 1)
-
-/*
- * Number of sub-pages mapped by PTE, does not include compound mapcount.
- * Must be called only on head of compound page.
- */
-static inline int head_subpages_mapcount(struct page *head)
-{
- return atomic_read(subpages_mapcount_ptr(head)) & SUBPAGES_MAPPED;
+ return atomic_read(&folio->_entire_mapcount) + 1;
}
/*
@@ -872,25 +961,29 @@ static inline void page_mapcount_reset(struct page *page)
atomic_set(&(page)->_mapcount, -1);
}
-/*
- * Mapcount of 0-order page; when compound sub-page, includes
- * compound_mapcount of compound_head of page.
+/**
+ * page_mapcount() - Number of times this precise page is mapped.
+ * @page: The page.
+ *
+ * The number of times this page is mapped. If this page is part of
+ * a large folio, it includes the number of times this page is mapped
+ * as part of that folio.
*
- * Result is undefined for pages which cannot be mapped into userspace.
+ * The result is undefined for pages which cannot be mapped into userspace.
* For example SLAB or special types of pages. See function page_has_type().
- * They use this place in struct page differently.
+ * They use this field in struct page differently.
*/
static inline int page_mapcount(struct page *page)
{
int mapcount = atomic_read(&page->_mapcount) + 1;
- if (likely(!PageCompound(page)))
- return mapcount;
- page = compound_head(page);
- return head_compound_mapcount(page) + mapcount;
+ if (unlikely(PageCompound(page)))
+ mapcount += folio_entire_mapcount(page_folio(page));
+
+ return mapcount;
}
-int total_compound_mapcount(struct page *head);
+int folio_total_mapcount(struct folio *folio);
/**
* folio_mapcount() - Calculate the number of mappings of this folio.
@@ -907,24 +1000,24 @@ static inline int folio_mapcount(struct folio *folio)
{
if (likely(!folio_test_large(folio)))
return atomic_read(&folio->_mapcount) + 1;
- return total_compound_mapcount(&folio->page);
+ return folio_total_mapcount(folio);
}
static inline int total_mapcount(struct page *page)
{
if (likely(!PageCompound(page)))
return atomic_read(&page->_mapcount) + 1;
- return total_compound_mapcount(compound_head(page));
+ return folio_total_mapcount(page_folio(page));
}
static inline bool folio_large_is_mapped(struct folio *folio)
{
/*
- * Reading folio_mapcount_ptr() below could be omitted if hugetlb
- * participated in incrementing subpages_mapcount when compound mapped.
+ * Reading _entire_mapcount below could be omitted if hugetlb
+ * participated in incrementing nr_pages_mapped when compound mapped.
*/
- return atomic_read(folio_subpages_mapcount_ptr(folio)) > 0 ||
- atomic_read(folio_mapcount_ptr(folio)) >= 0;
+ return atomic_read(&folio->_nr_pages_mapped) > 0 ||
+ atomic_read(&folio->_entire_mapcount) >= 0;
}
/**
@@ -999,8 +1092,11 @@ extern compound_page_dtor * const compound_page_dtors[NR_COMPOUND_DTORS];
static inline void set_compound_page_dtor(struct page *page,
enum compound_dtor_id compound_dtor)
{
+ struct folio *folio = (struct folio *)page;
+
VM_BUG_ON_PAGE(compound_dtor >= NR_COMPOUND_DTORS, page);
- page[1].compound_dtor = compound_dtor;
+ VM_BUG_ON_PAGE(!PageHead(page), page);
+ folio->_folio_dtor = compound_dtor;
}
static inline void folio_set_compound_dtor(struct folio *folio,
@@ -1012,44 +1108,13 @@ static inline void folio_set_compound_dtor(struct folio *folio,
void destroy_large_folio(struct folio *folio);
-static inline int head_compound_pincount(struct page *head)
-{
- return atomic_read(compound_pincount_ptr(head));
-}
-
static inline void set_compound_order(struct page *page, unsigned int order)
{
- page[1].compound_order = order;
-#ifdef CONFIG_64BIT
- page[1].compound_nr = 1U << order;
-#endif
-}
-
-/*
- * folio_set_compound_order is generally passed a non-zero order to
- * initialize a large folio. However, hugetlb code abuses this by
- * passing in zero when 'dissolving' a large folio.
- */
-static inline void folio_set_compound_order(struct folio *folio,
- unsigned int order)
-{
- VM_BUG_ON_FOLIO(!folio_test_large(folio), folio);
+ struct folio *folio = (struct folio *)page;
folio->_folio_order = order;
#ifdef CONFIG_64BIT
- folio->_folio_nr_pages = order ? 1U << order : 0;
-#endif
-}
-
-/* Returns the number of pages in this potentially compound page. */
-static inline unsigned long compound_nr(struct page *page)
-{
- if (!PageHead(page))
- return 1;
-#ifdef CONFIG_64BIT
- return page[1].compound_nr;
-#else
- return 1UL << compound_order(page);
+ folio->_folio_nr_pages = 1U << order;
#endif
}
@@ -1076,16 +1141,6 @@ static inline unsigned int thp_order(struct page *page)
}
/**
- * thp_nr_pages - The number of regular pages in this huge page.
- * @page: The head page of a huge page.
- */
-static inline int thp_nr_pages(struct page *page)
-{
- VM_BUG_ON_PGFLAGS(PageTail(page), page);
- return compound_nr(page);
-}
-
-/**
* thp_size - Size of a transparent huge page.
* @page: Head page of a transparent huge page.
*
@@ -1226,8 +1281,6 @@ static inline void get_page(struct page *page)
folio_get(page_folio(page));
}
-int __must_check try_grab_page(struct page *page, unsigned int flags);
-
static inline __must_check bool try_get_page(struct page *page)
{
page = compound_head(page);
@@ -1369,6 +1422,21 @@ static inline bool is_cow_mapping(vm_flags_t flags)
return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
}
+#ifndef CONFIG_MMU
+static inline bool is_nommu_shared_mapping(vm_flags_t flags)
+{
+ /*
+ * NOMMU shared mappings are ordinary MAP_SHARED mappings and selected
+ * R/O MAP_PRIVATE file mappings that are an effective R/O overlay of
+ * a file mapping. R/O MAP_PRIVATE mappings might still modify
+ * underlying memory if ptrace is active, so this is only possible if
+ * ptrace does not apply. Note that there is no mprotect() to upgrade
+ * write permissions later.
+ */
+ return flags & (VM_MAYSHARE | VM_MAYOVERLAY);
+}
+#endif
+
#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
#define SECTION_IN_PAGE_FLAGS
#endif
@@ -1643,11 +1711,6 @@ static inline struct folio *pfn_folio(unsigned long pfn)
return page_folio(pfn_to_page(pfn));
}
-static inline atomic_t *folio_pincount_ptr(struct folio *folio)
-{
- return &folio_page(folio, 1)->compound_pincount;
-}
-
/**
* folio_maybe_dma_pinned - Report if a folio may be pinned for DMA.
* @folio: The folio.
@@ -1665,7 +1728,7 @@ static inline atomic_t *folio_pincount_ptr(struct folio *folio)
* expected to be able to deal gracefully with a false positive.
*
* For large folios, the result will be exactly correct. That's because
- * we have more tracking data available: the compound_pincount is used
+ * we have more tracking data available: the _pincount field is used
* instead of the GUP_PIN_COUNTING_BIAS scheme.
*
* For more information, please see Documentation/core-api/pin_user_pages.rst.
@@ -1676,7 +1739,7 @@ static inline atomic_t *folio_pincount_ptr(struct folio *folio)
static inline bool folio_maybe_dma_pinned(struct folio *folio)
{
if (folio_test_large(folio))
- return atomic_read(folio_pincount_ptr(folio)) > 0;
+ return atomic_read(&folio->_pincount) > 0;
/*
* folio_ref_count() is signed. If that refcount overflows, then
@@ -1784,6 +1847,33 @@ static inline long folio_nr_pages(struct folio *folio)
#endif
}
+/*
+ * compound_nr() returns the number of pages in this potentially compound
+ * page. compound_nr() can be called on a tail page, and is defined to
+ * return 1 in that case.
+ */
+static inline unsigned long compound_nr(struct page *page)
+{
+ struct folio *folio = (struct folio *)page;
+
+ if (!test_bit(PG_head, &folio->flags))
+ return 1;
+#ifdef CONFIG_64BIT
+ return folio->_folio_nr_pages;
+#else
+ return 1L << folio->_folio_order;
+#endif
+}
+
+/**
+ * thp_nr_pages - The number of regular pages in this huge page.
+ * @page: The head page of a huge page.
+ */
+static inline int thp_nr_pages(struct page *page)
+{
+ return folio_nr_pages((struct folio *)page);
+}
+
/**
* folio_next - Move to the next physical folio.
* @folio: The folio we're currently operating on.
@@ -1833,6 +1923,24 @@ static inline size_t folio_size(struct folio *folio)
return PAGE_SIZE << folio_order(folio);
}
+/**
+ * folio_estimated_sharers - Estimate the number of sharers of a folio.
+ * @folio: The folio.
+ *
+ * folio_estimated_sharers() aims to serve as a function to efficiently
+ * estimate the number of processes sharing a folio. This is done by
+ * looking at the precise mapcount of the first subpage in the folio, and
+ * assuming the other subpages are the same. This may not be true for large
+ * folios. If you want exact mapcounts for exact calculations, look at
+ * page_mapcount() or folio_total_mapcount().
+ *
+ * Return: The estimated number of processes sharing a folio.
+ */
+static inline int folio_estimated_sharers(struct folio *folio)
+{
+ return page_mapcount(folio_page(folio, 0));
+}
+
#ifndef HAVE_ARCH_MAKE_PAGE_ACCESSIBLE
static inline int arch_make_page_accessible(struct page *page)
{
@@ -1929,6 +2037,21 @@ static inline bool page_is_pfmemalloc(const struct page *page)
}
/*
+ * Return true only if the folio has been allocated with
+ * ALLOC_NO_WATERMARKS and the low watermark was not
+ * met implying that the system is under some pressure.
+ */
+static inline bool folio_is_pfmemalloc(const struct folio *folio)
+{
+ /*
+ * lru.next has bit 1 set if the page is allocated from the
+ * pfmemalloc reserves. Callers may simply overwrite it if
+ * they do not need to preserve that information.
+ */
+ return (uintptr_t)folio->lru.next & BIT(1);
+}
+
+/*
* Only to be called by the page allocator on a freshly allocated
* page.
*/
@@ -2015,6 +2138,8 @@ static inline bool can_do_mlock(void) { return false; }
extern int user_shm_lock(size_t, struct ucounts *);
extern void user_shm_unlock(size_t, struct ucounts *);
+struct folio *vm_normal_folio(struct vm_area_struct *vma, unsigned long addr,
+ pte_t pte);
struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
pte_t pte);
struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
@@ -2022,13 +2147,16 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
unsigned long size);
-void zap_page_range(struct vm_area_struct *vma, unsigned long address,
- unsigned long size);
void zap_page_range_single(struct vm_area_struct *vma, unsigned long address,
unsigned long size, struct zap_details *details);
+static inline void zap_vma_pages(struct vm_area_struct *vma)
+{
+ zap_page_range_single(vma, vma->vm_start,
+ vma->vm_end - vma->vm_start, NULL);
+}
void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt,
struct vm_area_struct *start_vma, unsigned long start,
- unsigned long end);
+ unsigned long end, bool mm_wr_locked);
struct mmu_notifier_range;
@@ -2175,21 +2303,18 @@ static inline bool vma_wants_manual_pte_write_upgrade(struct vm_area_struct *vma
}
bool can_change_pte_writable(struct vm_area_struct *vma, unsigned long addr,
pte_t pte);
-extern unsigned long change_protection(struct mmu_gather *tlb,
+extern long change_protection(struct mmu_gather *tlb,
struct vm_area_struct *vma, unsigned long start,
- unsigned long end, pgprot_t newprot,
- unsigned long cp_flags);
-extern int mprotect_fixup(struct mmu_gather *tlb, struct vm_area_struct *vma,
- struct vm_area_struct **pprev, unsigned long start,
- unsigned long end, unsigned long newflags);
+ unsigned long end, unsigned long cp_flags);
+extern int mprotect_fixup(struct vma_iterator *vmi, struct mmu_gather *tlb,
+ struct vm_area_struct *vma, struct vm_area_struct **pprev,
+ unsigned long start, unsigned long end, unsigned long newflags);
/*
* doesn't attempt to fault and will return short.
*/
int get_user_pages_fast_only(unsigned long start, int nr_pages,
unsigned int gup_flags, struct page **pages);
-int pin_user_pages_fast_only(unsigned long start, int nr_pages,
- unsigned int gup_flags, struct page **pages);
static inline bool get_user_page_fast_only(unsigned long addr,
unsigned int gup_flags, struct page **pagep)
@@ -2813,23 +2938,21 @@ void anon_vma_interval_tree_verify(struct anon_vma_chain *node);
/* mmap.c */
extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin);
-extern int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
- unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert,
- struct vm_area_struct *expand);
-static inline int vma_adjust(struct vm_area_struct *vma, unsigned long start,
- unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert)
-{
- return __vma_adjust(vma, start, end, pgoff, insert, NULL);
-}
-extern struct vm_area_struct *vma_merge(struct mm_struct *,
- struct vm_area_struct *prev, unsigned long addr, unsigned long end,
- unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t,
- struct mempolicy *, struct vm_userfaultfd_ctx, struct anon_vma_name *);
+extern int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma,
+ unsigned long start, unsigned long end, pgoff_t pgoff,
+ struct vm_area_struct *next);
+extern int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma,
+ unsigned long start, unsigned long end, pgoff_t pgoff);
+extern struct vm_area_struct *vma_merge(struct vma_iterator *vmi,
+ struct mm_struct *, struct vm_area_struct *prev, unsigned long addr,
+ unsigned long end, unsigned long vm_flags, struct anon_vma *,
+ struct file *, pgoff_t, struct mempolicy *, struct vm_userfaultfd_ctx,
+ struct anon_vma_name *);
extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *);
-extern int __split_vma(struct mm_struct *, struct vm_area_struct *,
- unsigned long addr, int new_below);
-extern int split_vma(struct mm_struct *, struct vm_area_struct *,
- unsigned long addr, int new_below);
+extern int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *,
+ unsigned long addr, int new_below);
+extern int split_vma(struct vma_iterator *vmi, struct vm_area_struct *,
+ unsigned long addr, int new_below);
extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
extern void unlink_file_vma(struct vm_area_struct *);
extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
@@ -2837,9 +2960,6 @@ extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
bool *need_rmap_locks);
extern void exit_mmap(struct mm_struct *);
-void vma_mas_store(struct vm_area_struct *vma, struct ma_state *mas);
-void vma_mas_remove(struct vm_area_struct *vma, struct ma_state *mas);
-
static inline int check_data_rlimit(unsigned long rlim,
unsigned long new,
unsigned long start,
@@ -2887,7 +3007,7 @@ extern unsigned long mmap_region(struct file *file, unsigned long addr,
extern unsigned long do_mmap(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot, unsigned long flags,
unsigned long pgoff, unsigned long *populate, struct list_head *uf);
-extern int do_mas_munmap(struct ma_state *mas, struct mm_struct *mm,
+extern int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm,
unsigned long start, size_t len, struct list_head *uf,
bool downgrade);
extern int do_munmap(struct mm_struct *, unsigned long, size_t,
@@ -2895,6 +3015,9 @@ extern int do_munmap(struct mm_struct *, unsigned long, size_t,
extern int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior);
#ifdef CONFIG_MMU
+extern int do_vma_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ struct list_head *uf, bool downgrade);
extern int __mm_populate(unsigned long addr, unsigned long len,
int ignore_errors);
static inline void mm_populate(unsigned long addr, unsigned long len)
@@ -3100,81 +3223,6 @@ static inline vm_fault_t vmf_error(int err)
struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
unsigned int foll_flags);
-#define FOLL_WRITE 0x01 /* check pte is writable */
-#define FOLL_TOUCH 0x02 /* mark page accessed */
-#define FOLL_GET 0x04 /* do get_page on page */
-#define FOLL_DUMP 0x08 /* give error on hole if it would be zero */
-#define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */
-#define FOLL_NOWAIT 0x20 /* if a disk transfer is needed, start the IO
- * and return without waiting upon it */
-#define FOLL_NOFAULT 0x80 /* do not fault in pages */
-#define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */
-#define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */
-#define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */
-#define FOLL_ANON 0x8000 /* don't do file mappings */
-#define FOLL_LONGTERM 0x10000 /* mapping lifetime is indefinite: see below */
-#define FOLL_SPLIT_PMD 0x20000 /* split huge pmd before returning */
-#define FOLL_PIN 0x40000 /* pages must be released via unpin_user_page */
-#define FOLL_FAST_ONLY 0x80000 /* gup_fast: prevent fall-back to slow gup */
-#define FOLL_PCI_P2PDMA 0x100000 /* allow returning PCI P2PDMA pages */
-#define FOLL_INTERRUPTIBLE 0x200000 /* allow interrupts from generic signals */
-
-/*
- * FOLL_PIN and FOLL_LONGTERM may be used in various combinations with each
- * other. Here is what they mean, and how to use them:
- *
- * FOLL_LONGTERM indicates that the page will be held for an indefinite time
- * period _often_ under userspace control. This is in contrast to
- * iov_iter_get_pages(), whose usages are transient.
- *
- * FIXME: For pages which are part of a filesystem, mappings are subject to the
- * lifetime enforced by the filesystem and we need guarantees that longterm
- * users like RDMA and V4L2 only establish mappings which coordinate usage with
- * the filesystem. Ideas for this coordination include revoking the longterm
- * pin, delaying writeback, bounce buffer page writeback, etc. As FS DAX was
- * added after the problem with filesystems was found FS DAX VMAs are
- * specifically failed. Filesystem pages are still subject to bugs and use of
- * FOLL_LONGTERM should be avoided on those pages.
- *
- * FIXME: Also NOTE that FOLL_LONGTERM is not supported in every GUP call.
- * Currently only get_user_pages() and get_user_pages_fast() support this flag
- * and calls to get_user_pages_[un]locked are specifically not allowed. This
- * is due to an incompatibility with the FS DAX check and
- * FAULT_FLAG_ALLOW_RETRY.
- *
- * In the CMA case: long term pins in a CMA region would unnecessarily fragment
- * that region. And so, CMA attempts to migrate the page before pinning, when
- * FOLL_LONGTERM is specified.
- *
- * FOLL_PIN indicates that a special kind of tracking (not just page->_refcount,
- * but an additional pin counting system) will be invoked. This is intended for
- * anything that gets a page reference and then touches page data (for example,
- * Direct IO). This lets the filesystem know that some non-file-system entity is
- * potentially changing the pages' data. In contrast to FOLL_GET (whose pages
- * are released via put_page()), FOLL_PIN pages must be released, ultimately, by
- * a call to unpin_user_page().
- *
- * FOLL_PIN is similar to FOLL_GET: both of these pin pages. They use different
- * and separate refcounting mechanisms, however, and that means that each has
- * its own acquire and release mechanisms:
- *
- * FOLL_GET: get_user_pages*() to acquire, and put_page() to release.
- *
- * FOLL_PIN: pin_user_pages*() to acquire, and unpin_user_pages to release.
- *
- * FOLL_PIN and FOLL_GET are mutually exclusive for a given function call.
- * (The underlying pages may experience both FOLL_GET-based and FOLL_PIN-based
- * calls applied to them, and that's perfectly OK. This is a constraint on the
- * callers, not on the pages.)
- *
- * FOLL_PIN should be set internally by the pin_user_pages*() APIs, never
- * directly by the caller. That's in order to help avoid mismatches when
- * releasing pages: get_user_pages*() pages must be released via put_page(),
- * while pin_user_pages*() pages must be released via unpin_user_page().
- *
- * Please see Documentation/core-api/pin_user_pages.rst for more information.
- */
-
static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags)
{
if (vm_fault & VM_FAULT_OOM)
@@ -3187,71 +3235,6 @@ static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags)
}
/*
- * Indicates for which pages that are write-protected in the page table,
- * whether GUP has to trigger unsharing via FAULT_FLAG_UNSHARE such that the
- * GUP pin will remain consistent with the pages mapped into the page tables
- * of the MM.
- *
- * Temporary unmapping of PageAnonExclusive() pages or clearing of
- * PageAnonExclusive() has to protect against concurrent GUP:
- * * Ordinary GUP: Using the PT lock
- * * GUP-fast and fork(): mm->write_protect_seq
- * * GUP-fast and KSM or temporary unmapping (swap, migration): see
- * page_try_share_anon_rmap()
- *
- * Must be called with the (sub)page that's actually referenced via the
- * page table entry, which might not necessarily be the head page for a
- * PTE-mapped THP.
- *
- * If the vma is NULL, we're coming from the GUP-fast path and might have
- * to fallback to the slow path just to lookup the vma.
- */
-static inline bool gup_must_unshare(struct vm_area_struct *vma,
- unsigned int flags, struct page *page)
-{
- /*
- * FOLL_WRITE is implicitly handled correctly as the page table entry
- * has to be writable -- and if it references (part of) an anonymous
- * folio, that part is required to be marked exclusive.
- */
- if ((flags & (FOLL_WRITE | FOLL_PIN)) != FOLL_PIN)
- return false;
- /*
- * Note: PageAnon(page) is stable until the page is actually getting
- * freed.
- */
- if (!PageAnon(page)) {
- /*
- * We only care about R/O long-term pining: R/O short-term
- * pinning does not have the semantics to observe successive
- * changes through the process page tables.
- */
- if (!(flags & FOLL_LONGTERM))
- return false;
-
- /* We really need the vma ... */
- if (!vma)
- return true;
-
- /*
- * ... because we only care about writable private ("COW")
- * mappings where we have to break COW early.
- */
- return is_cow_mapping(vma->vm_flags);
- }
-
- /* Paired with a memory barrier in page_try_share_anon_rmap(). */
- if (IS_ENABLED(CONFIG_HAVE_FAST_GUP))
- smp_rmb();
-
- /*
- * Note that PageKsm() pages cannot be exclusive, and consequently,
- * cannot get pinned.
- */
- return !PageAnonExclusive(page);
-}
-
-/*
* Indicates whether GUP can follow a PROT_NONE mapped page, or whether
* a (NUMA hinting) fault is required.
*/
@@ -3550,6 +3533,11 @@ enum mf_action_page_type {
MF_MSG_UNKNOWN,
};
+/*
+ * Sysfs entries for memory failure handling statistics.
+ */
+extern const struct attribute_group memory_failure_attr_group;
+
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS)
extern void clear_huge_page(struct page *page,
unsigned long addr_hint,
@@ -3667,7 +3655,7 @@ static inline int seal_check_future_write(int seals, struct vm_area_struct *vma)
* VM_MAYWRITE as we still want them to be COW-writable.
*/
if (vma->vm_flags & VM_SHARED)
- vma->vm_flags &= ~(VM_MAYWRITE);
+ vm_flags_clear(vma, VM_MAYWRITE);
}
return 0;