summaryrefslogtreecommitdiff
path: root/include/linux/mm.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/mm.h')
-rw-r--r--include/linux/mm.h225
1 files changed, 143 insertions, 82 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7b0ee64225de..9849dfda44d4 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -5,6 +5,7 @@
#include <linux/errno.h>
#include <linux/mmdebug.h>
#include <linux/gfp.h>
+#include <linux/pgalloc_tag.h>
#include <linux/bug.h>
#include <linux/list.h>
#include <linux/mmzone.h>
@@ -1199,7 +1200,7 @@ static inline int is_vmalloc_or_module_addr(const void *x)
* debugging purposes - it does not include PTE-mapped sub-pages; look
* at folio_mapcount() or page_mapcount() instead.
*/
-static inline int folio_entire_mapcount(struct folio *folio)
+static inline int folio_entire_mapcount(const struct folio *folio)
{
VM_BUG_ON_FOLIO(!folio_test_large(folio), folio);
return atomic_read(&folio->_entire_mapcount) + 1;
@@ -1223,48 +1224,60 @@ static inline void page_mapcount_reset(struct page *page)
* a large folio, it includes the number of times this page is mapped
* as part of that folio.
*
- * The result is undefined for pages which cannot be mapped into userspace.
- * For example SLAB or special types of pages. See function page_has_type().
- * They use this field in struct page differently.
+ * Will report 0 for pages which cannot be mapped into userspace, eg
+ * slab, page tables and similar.
*/
static inline int page_mapcount(struct page *page)
{
int mapcount = atomic_read(&page->_mapcount) + 1;
+ /* Handle page_has_type() pages */
+ if (mapcount < PAGE_MAPCOUNT_RESERVE + 1)
+ mapcount = 0;
if (unlikely(PageCompound(page)))
mapcount += folio_entire_mapcount(page_folio(page));
return mapcount;
}
-int folio_total_mapcount(struct folio *folio);
+static inline int folio_large_mapcount(const struct folio *folio)
+{
+ VM_WARN_ON_FOLIO(!folio_test_large(folio), folio);
+ return atomic_read(&folio->_large_mapcount) + 1;
+}
/**
- * folio_mapcount() - Calculate the number of mappings of this folio.
+ * folio_mapcount() - Number of mappings of this folio.
* @folio: The folio.
*
- * A large folio tracks both how many times the entire folio is mapped,
- * and how many times each individual page in the folio is mapped.
- * This function calculates the total number of times the folio is
- * mapped.
+ * The folio mapcount corresponds to the number of present user page table
+ * entries that reference any part of a folio. Each such present user page
+ * table entry must be paired with exactly on folio reference.
+ *
+ * For ordindary folios, each user page table entry (PTE/PMD/PUD/...) counts
+ * exactly once.
+ *
+ * For hugetlb folios, each abstracted "hugetlb" user page table entry that
+ * references the entire folio counts exactly once, even when such special
+ * page table entries are comprised of multiple ordinary page table entries.
+ *
+ * Will report 0 for pages which cannot be mapped into userspace, such as
+ * slab, page tables and similar.
*
* Return: The number of times this folio is mapped.
*/
-static inline int folio_mapcount(struct folio *folio)
+static inline int folio_mapcount(const struct folio *folio)
{
- if (likely(!folio_test_large(folio)))
- return atomic_read(&folio->_mapcount) + 1;
- return folio_total_mapcount(folio);
-}
+ int mapcount;
-static inline bool folio_large_is_mapped(struct folio *folio)
-{
- /*
- * Reading _entire_mapcount below could be omitted if hugetlb
- * participated in incrementing nr_pages_mapped when compound mapped.
- */
- return atomic_read(&folio->_nr_pages_mapped) > 0 ||
- atomic_read(&folio->_entire_mapcount) >= 0;
+ if (likely(!folio_test_large(folio))) {
+ mapcount = atomic_read(&folio->_mapcount) + 1;
+ /* Handle page_has_type() pages */
+ if (mapcount < PAGE_MAPCOUNT_RESERVE + 1)
+ mapcount = 0;
+ return mapcount;
+ }
+ return folio_large_mapcount(folio);
}
/**
@@ -1273,11 +1286,9 @@ static inline bool folio_large_is_mapped(struct folio *folio)
*
* Return: True if any page in this folio is referenced by user page tables.
*/
-static inline bool folio_mapped(struct folio *folio)
+static inline bool folio_mapped(const struct folio *folio)
{
- if (likely(!folio_test_large(folio)))
- return atomic_read(&folio->_mapcount) >= 0;
- return folio_large_is_mapped(folio);
+ return folio_mapcount(folio) >= 1;
}
/*
@@ -1285,11 +1296,9 @@ static inline bool folio_mapped(struct folio *folio)
* For compound page it returns true if any sub-page of compound page is mapped,
* even if this particular sub-page is not itself mapped by any PTE or PMD.
*/
-static inline bool page_mapped(struct page *page)
+static inline bool page_mapped(const struct page *page)
{
- if (likely(!PageCompound(page)))
- return atomic_read(&page->_mapcount) >= 0;
- return folio_large_is_mapped(page_folio(page));
+ return folio_mapped(page_folio(page));
}
static inline struct page *virt_to_head_page(const void *x)
@@ -1315,8 +1324,6 @@ void folio_copy(struct folio *dst, struct folio *src);
unsigned long nr_free_buffer_pages(void);
-void destroy_large_folio(struct folio *folio);
-
/* Returns the number of bytes in this potentially compound page. */
static inline unsigned long page_size(struct page *page)
{
@@ -1434,27 +1441,22 @@ vm_fault_t finish_fault(struct vm_fault *vmf);
#if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_FS_DAX)
DECLARE_STATIC_KEY_FALSE(devmap_managed_key);
-bool __put_devmap_managed_page_refs(struct page *page, int refs);
-static inline bool put_devmap_managed_page_refs(struct page *page, int refs)
+bool __put_devmap_managed_folio_refs(struct folio *folio, int refs);
+static inline bool put_devmap_managed_folio_refs(struct folio *folio, int refs)
{
if (!static_branch_unlikely(&devmap_managed_key))
return false;
- if (!is_zone_device_page(page))
+ if (!folio_is_zone_device(folio))
return false;
- return __put_devmap_managed_page_refs(page, refs);
+ return __put_devmap_managed_folio_refs(folio, refs);
}
#else /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */
-static inline bool put_devmap_managed_page_refs(struct page *page, int refs)
+static inline bool put_devmap_managed_folio_refs(struct folio *folio, int refs)
{
return false;
}
#endif /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */
-static inline bool put_devmap_managed_page(struct page *page)
-{
- return put_devmap_managed_page_refs(page, 1);
-}
-
/* 127: arbitrary random number, small enough to assemble well */
#define folio_ref_zero_or_close_to_overflow(folio) \
((unsigned int) folio_ref_count(folio) + 127u <= 127u)
@@ -1573,7 +1575,7 @@ static inline void put_page(struct page *page)
* For some devmap managed pages we need to catch refcount transition
* from 2 to 1:
*/
- if (put_devmap_managed_page(&folio->page))
+ if (put_devmap_managed_folio_refs(folio, 1))
return;
folio_put(folio);
}
@@ -2067,7 +2069,7 @@ static inline void set_page_links(struct page *page, enum zone_type zone,
*
* Return: A positive power of two.
*/
-static inline long folio_nr_pages(struct folio *folio)
+static inline long folio_nr_pages(const struct folio *folio)
{
if (!folio_test_large(folio))
return 1;
@@ -2162,21 +2164,64 @@ static inline size_t folio_size(struct folio *folio)
}
/**
- * folio_estimated_sharers - Estimate the number of sharers of a folio.
+ * folio_likely_mapped_shared - Estimate if the folio is mapped into the page
+ * tables of more than one MM
* @folio: The folio.
*
- * folio_estimated_sharers() aims to serve as a function to efficiently
- * estimate the number of processes sharing a folio. This is done by
- * looking at the precise mapcount of the first subpage in the folio, and
- * assuming the other subpages are the same. This may not be true for large
- * folios. If you want exact mapcounts for exact calculations, look at
- * page_mapcount() or folio_total_mapcount().
+ * This function checks if the folio is currently mapped into more than one
+ * MM ("mapped shared"), or if the folio is only mapped into a single MM
+ * ("mapped exclusively").
+ *
+ * As precise information is not easily available for all folios, this function
+ * estimates the number of MMs ("sharers") that are currently mapping a folio
+ * using the number of times the first page of the folio is currently mapped
+ * into page tables.
+ *
+ * For small anonymous folios (except KSM folios) and anonymous hugetlb folios,
+ * the return value will be exactly correct, because they can only be mapped
+ * at most once into an MM, and they cannot be partially mapped.
+ *
+ * For other folios, the result can be fuzzy:
+ * #. For partially-mappable large folios (THP), the return value can wrongly
+ * indicate "mapped exclusively" (false negative) when the folio is
+ * only partially mapped into at least one MM.
+ * #. For pagecache folios (including hugetlb), the return value can wrongly
+ * indicate "mapped shared" (false positive) when two VMAs in the same MM
+ * cover the same file range.
+ * #. For (small) KSM folios, the return value can wrongly indicate "mapped
+ * shared" (false positive), when the folio is mapped multiple times into
+ * the same MM.
+ *
+ * Further, this function only considers current page table mappings that
+ * are tracked using the folio mapcount(s).
*
- * Return: The estimated number of processes sharing a folio.
+ * This function does not consider:
+ * #. If the folio might get mapped in the (near) future (e.g., swapcache,
+ * pagecache, temporary unmapping for migration).
+ * #. If the folio is mapped differently (VM_PFNMAP).
+ * #. If hugetlb page table sharing applies. Callers might want to check
+ * hugetlb_pmd_shared().
+ *
+ * Return: Whether the folio is estimated to be mapped into more than one MM.
*/
-static inline int folio_estimated_sharers(struct folio *folio)
+static inline bool folio_likely_mapped_shared(struct folio *folio)
{
- return page_mapcount(folio_page(folio, 0));
+ int mapcount = folio_mapcount(folio);
+
+ /* Only partially-mappable folios require more care. */
+ if (!folio_test_large(folio) || unlikely(folio_test_hugetlb(folio)))
+ return mapcount > 1;
+
+ /* A single mapping implies "mapped exclusively". */
+ if (mapcount <= 1)
+ return false;
+
+ /* If any page is mapped more than once we treat it "mapped shared". */
+ if (folio_entire_mapcount(folio) || mapcount > folio_nr_pages(folio))
+ return true;
+
+ /* Let's guess based on the first subpage. */
+ return atomic_read(&folio->_mapcount) > 0;
}
#ifndef HAVE_ARCH_MAKE_PAGE_ACCESSIBLE
@@ -2391,12 +2436,8 @@ void free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
unsigned long end, unsigned long floor, unsigned long ceiling);
int
copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma);
-int follow_pte(struct mm_struct *mm, unsigned long address,
+int follow_pte(struct vm_area_struct *vma, unsigned long address,
pte_t **ptepp, spinlock_t **ptlp);
-int follow_pfn(struct vm_area_struct *vma, unsigned long address,
- unsigned long *pfn);
-int follow_phys(struct vm_area_struct *vma, unsigned long address,
- unsigned int flags, unsigned long *prot, resource_size_t *phys);
int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
void *buf, int len, int write);
@@ -2550,7 +2591,7 @@ extern unsigned long move_page_tables(struct vm_area_struct *vma,
MM_CP_UFFD_WP_RESOLVE)
bool vma_needs_dirty_tracking(struct vm_area_struct *vma);
-int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot);
+bool vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot);
static inline bool vma_wants_manual_pte_write_upgrade(struct vm_area_struct *vma)
{
/*
@@ -2857,12 +2898,13 @@ static inline bool pagetable_is_reserved(struct ptdesc *pt)
*
* Return: The ptdesc describing the allocated page tables.
*/
-static inline struct ptdesc *pagetable_alloc(gfp_t gfp, unsigned int order)
+static inline struct ptdesc *pagetable_alloc_noprof(gfp_t gfp, unsigned int order)
{
- struct page *page = alloc_pages(gfp | __GFP_COMP, order);
+ struct page *page = alloc_pages_noprof(gfp | __GFP_COMP, order);
return page_ptdesc(page);
}
+#define pagetable_alloc(...) alloc_hooks(pagetable_alloc_noprof(__VA_ARGS__))
/**
* pagetable_free - Free pagetables
@@ -3132,6 +3174,14 @@ extern void reserve_bootmem_region(phys_addr_t start,
/* Free the reserved page into the buddy system, so it gets managed. */
static inline void free_reserved_page(struct page *page)
{
+ if (mem_alloc_profiling_enabled()) {
+ union codetag_ref *ref = get_page_tag_ref(page);
+
+ if (ref) {
+ set_codetag_empty(ref);
+ put_page_tag_ref(ref);
+ }
+ }
ClearPageReserved(page);
init_page_count(page);
__free_page(page);
@@ -3193,8 +3243,6 @@ static inline unsigned long get_num_physpages(void)
*/
void free_area_init(unsigned long *max_zone_pfn);
unsigned long node_map_pfn_alignment(void);
-unsigned long __absent_pages_in_range(int nid, unsigned long start_pfn,
- unsigned long end_pfn);
extern unsigned long absent_pages_in_range(unsigned long start_pfn,
unsigned long end_pfn);
extern void get_pfn_range_for_nid(unsigned int nid,
@@ -3210,7 +3258,6 @@ static inline int early_pfn_to_nid(unsigned long pfn)
extern int __meminit early_pfn_to_nid(unsigned long pfn);
#endif
-extern void set_dma_reserve(unsigned long new_dma_reserve);
extern void mem_init(void);
extern void __init mmap_init(void);
@@ -3222,9 +3269,6 @@ static inline void show_mem(void)
extern long si_mem_available(void);
extern void si_meminfo(struct sysinfo * val);
extern void si_meminfo_node(struct sysinfo *val, int nid);
-#ifdef __HAVE_ARCH_RESERVED_KERNEL_PAGES
-extern unsigned long arch_reserved_kernel_pages(void);
-#endif
extern __printf(3, 4)
void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...);
@@ -3383,7 +3427,16 @@ extern int install_special_mapping(struct mm_struct *mm,
unsigned long randomize_stack_top(unsigned long stack_top);
unsigned long randomize_page(unsigned long start, unsigned long range);
-extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
+unsigned long
+__get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
+ unsigned long pgoff, unsigned long flags, vm_flags_t vm_flags);
+
+static inline unsigned long
+get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
+ unsigned long pgoff, unsigned long flags)
+{
+ return __get_unmapped_area(file, addr, len, pgoff, flags, 0);
+}
extern unsigned long mmap_region(struct file *file, unsigned long addr,
unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
@@ -3429,6 +3482,7 @@ struct vm_unmapped_area_info {
unsigned long high_limit;
unsigned long align_mask;
unsigned long align_offset;
+ unsigned long start_gap;
};
extern unsigned long vm_unmapped_area(struct vm_unmapped_area_info *info);
@@ -3722,7 +3776,14 @@ DECLARE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_FREE_DEFAULT_ON, init_on_free);
static inline bool want_init_on_free(void)
{
return static_branch_maybe(CONFIG_INIT_ON_FREE_DEFAULT_ON,
- &init_on_free);
+ &init_on_free);
+}
+
+DECLARE_STATIC_KEY_MAYBE(CONFIG_INIT_MLOCKED_ON_FREE_DEFAULT_ON, init_mlocked_on_free);
+static inline bool want_init_mlocked_on_free(void)
+{
+ return static_branch_maybe(CONFIG_INIT_MLOCKED_ON_FREE_DEFAULT_ON,
+ &init_mlocked_on_free);
}
extern bool _debug_pagealloc_enabled_early;
@@ -3785,24 +3846,22 @@ static inline bool page_is_guard(struct page *page)
return PageGuard(page);
}
-bool __set_page_guard(struct zone *zone, struct page *page, unsigned int order,
- int migratetype);
+bool __set_page_guard(struct zone *zone, struct page *page, unsigned int order);
static inline bool set_page_guard(struct zone *zone, struct page *page,
- unsigned int order, int migratetype)
+ unsigned int order)
{
if (!debug_guardpage_enabled())
return false;
- return __set_page_guard(zone, page, order, migratetype);
+ return __set_page_guard(zone, page, order);
}
-void __clear_page_guard(struct zone *zone, struct page *page, unsigned int order,
- int migratetype);
+void __clear_page_guard(struct zone *zone, struct page *page, unsigned int order);
static inline void clear_page_guard(struct zone *zone, struct page *page,
- unsigned int order, int migratetype)
+ unsigned int order)
{
if (!debug_guardpage_enabled())
return;
- __clear_page_guard(zone, page, order, migratetype);
+ __clear_page_guard(zone, page, order);
}
#else /* CONFIG_DEBUG_PAGEALLOC */
@@ -3812,9 +3871,9 @@ static inline unsigned int debug_guardpage_minorder(void) { return 0; }
static inline bool debug_guardpage_enabled(void) { return false; }
static inline bool page_is_guard(struct page *page) { return false; }
static inline bool set_page_guard(struct zone *zone, struct page *page,
- unsigned int order, int migratetype) { return false; }
+ unsigned int order) { return false; }
static inline void clear_page_guard(struct zone *zone, struct page *page,
- unsigned int order, int migratetype) {}
+ unsigned int order) {}
#endif /* CONFIG_DEBUG_PAGEALLOC */
#ifdef __HAVE_ARCH_GATE_AREA
@@ -3969,7 +4028,6 @@ int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index,
extern int memory_failure(unsigned long pfn, int flags);
extern void memory_failure_queue_kick(int cpu);
extern int unpoison_memory(unsigned long pfn);
-extern void shake_page(struct page *p);
extern atomic_long_t num_poisoned_pages __read_mostly;
extern int soft_offline_page(unsigned long pfn, int flags);
#ifdef CONFIG_MEMORY_FAILURE
@@ -4202,4 +4260,7 @@ static inline bool pfn_is_unaccepted_memory(unsigned long pfn)
return range_contains_unaccepted_memory(paddr, paddr + PAGE_SIZE);
}
+void vma_pgtable_walk_begin(struct vm_area_struct *vma);
+void vma_pgtable_walk_end(struct vm_area_struct *vma);
+
#endif /* _LINUX_MM_H */