summaryrefslogtreecommitdiff
path: root/include/linux/page-flags.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/page-flags.h')
-rw-r--r--include/linux/page-flags.h1213
1 files changed, 963 insertions, 250 deletions
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 6d53675c2b54..f7a0e4af0c73 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Macros for manipulating and testing page->flags
*/
@@ -16,8 +17,32 @@
/*
* Various page->flags bits:
*
- * PG_reserved is set for special pages, which can never be swapped out. Some
- * of them might not even exist (eg empty_bad_page)...
+ * PG_reserved is set for special pages. The "struct page" of such a page
+ * should in general not be touched (e.g. set dirty) except by its owner.
+ * Pages marked as PG_reserved include:
+ * - Pages part of the kernel image (including vDSO) and similar (e.g. BIOS,
+ * initrd, HW tables)
+ * - Pages reserved or allocated early during boot (before the page allocator
+ * was initialized). This includes (depending on the architecture) the
+ * initial vmemmap, initial page tables, crashkernel, elfcorehdr, and much
+ * much more. Once (if ever) freed, PG_reserved is cleared and they will
+ * be given to the page allocator.
+ * - Pages falling into physical memory gaps - not IORESOURCE_SYSRAM. Trying
+ * to read/write these pages might end badly. Don't touch!
+ * - The zero page(s)
+ * - Pages allocated in the context of kexec/kdump (loaded kernel image,
+ * control pages, vmcoreinfo)
+ * - MMIO/DMA pages. Some architectures don't allow to ioremap pages that are
+ * not marked PG_reserved (as they might be in use by somebody else who does
+ * not respect the caching strategy).
+ * - MCA pages on ia64
+ * - Pages holding CPU notes for POWER Firmware Assisted Dump
+ * - Device memory (e.g. PMEM, DAX, HMM)
+ * Some PG_reserved pages will be excluded from the hibernation image.
+ * PG_reserved does in general not hinder anybody from dumping or swapping
+ * and is no longer required for remap_pfn_range(). ioremap might require it.
+ * Consequently, PG_reserved for a page mapped into user space can indicate
+ * the zero page, the vDSO, MMIO pages or device memory.
*
* The PG_private bitflag is set on pagecache pages if they contain filesystem
* specific data (which is normally at page->private). It can be used by
@@ -33,31 +58,25 @@
* page_waitqueue(page) is a wait queue of all tasks waiting for the page
* to become unlocked.
*
- * PG_uptodate tells whether the page's contents is valid. When a read
- * completes, the page becomes uptodate, unless a disk I/O error happened.
+ * PG_swapbacked is set when a page uses swap as a backing storage. This are
+ * usually PageAnon or shmem pages but please note that even anonymous pages
+ * might lose their PG_swapbacked flag when they simply can be dropped (e.g. as
+ * a result of MADV_FREE).
*
* PG_referenced, PG_reclaim are used for page reclaim for anonymous and
* file-backed pagecache (see mm/vmscan.c).
*
- * PG_error is set to indicate that an I/O error occurred on this page.
- *
* PG_arch_1 is an architecture specific page state bit. The generic code
* guarantees that this bit is cleared for a page when it first is entered into
* the page cache.
*
- * PG_highmem pages are not permanently mapped into the kernel virtual address
- * space, they need to be kmapped separately for doing IO on the pages. The
- * struct page (these bits with information) are always mapped into kernel
- * address space...
- *
* PG_hwpoison indicates that a page got corrupted in hardware and contains
* data with incorrect ECC bits that triggered a machine check. Accessing is
* not safe since it may cause another machine check. Don't touch!
*/
/*
- * Don't use the *_dontuse flags. Use the macros. Otherwise you'll break
- * locked- and dirty-page accounting.
+ * Don't use the pageflags directly. Use the PageFoo macros.
*
* The page flags field is split into two parts, the main flags area
* which extends from the low bits upwards, and the fields area which
@@ -73,47 +92,65 @@
*/
enum pageflags {
PG_locked, /* Page is locked. Don't touch. */
- PG_error,
+ PG_writeback, /* Page is under writeback */
PG_referenced,
PG_uptodate,
PG_dirty,
PG_lru,
+ PG_head, /* Must be in bit 6 */
+ PG_waiters, /* Page has waiters, check its waitqueue. Must be bit #7 and in the same byte as "PG_locked" */
PG_active,
- PG_slab,
- PG_owner_priv_1, /* Owner use. If pagecache, fs may use*/
+ PG_workingset,
+ PG_owner_priv_1, /* Owner use. If pagecache, fs may use */
+ PG_owner_2, /* Owner use. If pagecache, fs may use */
PG_arch_1,
PG_reserved,
PG_private, /* If pagecache, has fs-private data */
PG_private_2, /* If pagecache, has fs aux data */
- PG_writeback, /* Page is under writeback */
-#ifdef CONFIG_PAGEFLAGS_EXTENDED
- PG_head, /* A head page */
- PG_tail, /* A tail page */
-#else
- PG_compound, /* A compound page */
-#endif
- PG_swapcache, /* Swap page: swp_entry_t in private */
- PG_mappedtodisk, /* Has blocks allocated on-disk */
PG_reclaim, /* To be reclaimed asap */
PG_swapbacked, /* Page is backed by RAM/swap */
PG_unevictable, /* Page is "unevictable" */
+ PG_dropbehind, /* drop pages on IO completion */
#ifdef CONFIG_MMU
PG_mlocked, /* Page is vma mlocked */
#endif
-#ifdef CONFIG_ARCH_USES_PG_UNCACHED
- PG_uncached, /* Page has been mapped as uncached */
-#endif
#ifdef CONFIG_MEMORY_FAILURE
PG_hwpoison, /* hardware poisoned page. Don't touch */
#endif
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- PG_compound_lock,
+#if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT)
+ PG_young,
+ PG_idle,
+#endif
+#ifdef CONFIG_ARCH_USES_PG_ARCH_2
+ PG_arch_2,
+#endif
+#ifdef CONFIG_ARCH_USES_PG_ARCH_3
+ PG_arch_3,
#endif
__NR_PAGEFLAGS,
- /* Filesystems */
+ PG_readahead = PG_reclaim,
+
+ /* Anonymous memory (and shmem) */
+ PG_swapcache = PG_owner_priv_1, /* Swap page: swp_entry_t in private */
+ /* Some filesystems */
PG_checked = PG_owner_priv_1,
+ /*
+ * Depending on the way an anonymous folio can be mapped into a page
+ * table (e.g., single PMD/PUD/CONT of the head page vs. PTE-mapped
+ * THP), PG_anon_exclusive may be set only for the head page or for
+ * tail pages of an anonymous folio. For now, we only expect it to be
+ * set on tail pages for PTE-mapped THP.
+ */
+ PG_anon_exclusive = PG_owner_2,
+
+ /*
+ * Set if all buffer heads in the folio are mapped.
+ * Filesystems which do not use BHs can use it for their own purpose.
+ */
+ PG_mappedtodisk = PG_owner_2,
+
/* Two page bits are conscripted by FS-Cache to maintain local caching
* state. These bits are set on pages belonging to the netfs's inodes
* when those inodes are being locally cached.
@@ -121,173 +158,640 @@ enum pageflags {
PG_fscache = PG_private_2, /* page backed by cache */
/* XEN */
+ /* Pinned in Xen as a read-only pagetable page. */
PG_pinned = PG_owner_priv_1,
+ /* Pinned as part of domain save (see xen_mm_pin_all()). */
PG_savepinned = PG_dirty,
+ /* Has a grant mapping of another (foreign) domain's page. */
+ PG_foreign = PG_owner_priv_1,
+ /* Remapped by swiotlb-xen. */
+ PG_xen_remapped = PG_owner_priv_1,
+
+#ifdef CONFIG_MIGRATION
+ /* movable_ops page that is isolated for migration */
+ PG_movable_ops_isolated = PG_reclaim,
+ /* this is a movable_ops page (for selected typed pages only) */
+ PG_movable_ops = PG_uptodate,
+#endif
+
+ /* Only valid for buddy pages. Used to track pages that are reported */
+ PG_reported = PG_uptodate,
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+ /* For self-hosted memmap pages */
+ PG_vmemmap_self_hosted = PG_owner_priv_1,
+#endif
+
+ /*
+ * Flags only valid for compound pages. Stored in first tail page's
+ * flags word. Cannot use the first 8 flags or any flag marked as
+ * PF_ANY.
+ */
- /* SLOB */
- PG_slob_free = PG_private,
+ /* At least one page in this folio has the hwpoison flag set */
+ PG_has_hwpoisoned = PG_active,
+ PG_large_rmappable = PG_workingset, /* anon or file-backed */
+ PG_partially_mapped = PG_reclaim, /* was identified to be partially mapped */
};
+#define PAGEFLAGS_MASK ((1UL << NR_PAGEFLAGS) - 1)
+
#ifndef __GENERATING_BOUNDS_H
+#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
+DECLARE_STATIC_KEY_FALSE(hugetlb_optimize_vmemmap_key);
+
/*
- * Macros to create function definitions for page flags
+ * Return the real head page struct iff the @page is a fake head page, otherwise
+ * return the @page itself. See Documentation/mm/vmemmap_dedup.rst.
*/
-#define TESTPAGEFLAG(uname, lname) \
-static inline int Page##uname(const struct page *page) \
- { return test_bit(PG_##lname, &page->flags); }
+static __always_inline const struct page *page_fixed_fake_head(const struct page *page)
+{
+ if (!static_branch_unlikely(&hugetlb_optimize_vmemmap_key))
+ return page;
+
+ /*
+ * Only addresses aligned with PAGE_SIZE of struct page may be fake head
+ * struct page. The alignment check aims to avoid access the fields (
+ * e.g. compound_head) of the @page[1]. It can avoid touch a (possibly)
+ * cold cacheline in some cases.
+ */
+ if (IS_ALIGNED((unsigned long)page, PAGE_SIZE) &&
+ test_bit(PG_head, &page->flags.f)) {
+ /*
+ * We can safely access the field of the @page[1] with PG_head
+ * because the @page is a compound page composed with at least
+ * two contiguous pages.
+ */
+ unsigned long head = READ_ONCE(page[1].compound_head);
+
+ if (likely(head & 1))
+ return (const struct page *)(head - 1);
+ }
+ return page;
+}
+
+static __always_inline bool page_count_writable(const struct page *page, int u)
+{
+ if (!static_branch_unlikely(&hugetlb_optimize_vmemmap_key))
+ return true;
+
+ /*
+ * The refcount check is ordered before the fake-head check to prevent
+ * the following race:
+ * CPU 1 (HVO) CPU 2 (speculative PFN walker)
+ *
+ * page_ref_freeze()
+ * synchronize_rcu()
+ * rcu_read_lock()
+ * page_is_fake_head() is false
+ * vmemmap_remap_pte()
+ * XXX: struct page[] becomes r/o
+ *
+ * page_ref_unfreeze()
+ * page_ref_count() is not zero
+ *
+ * atomic_add_unless(&page->_refcount)
+ * XXX: try to modify r/o struct page[]
+ *
+ * The refcount check also prevents modification attempts to other (r/o)
+ * tail pages that are not fake heads.
+ */
+ if (atomic_read_acquire(&page->_refcount) == u)
+ return false;
+
+ return page_fixed_fake_head(page) == page;
+}
+#else
+static inline const struct page *page_fixed_fake_head(const struct page *page)
+{
+ return page;
+}
-#define SETPAGEFLAG(uname, lname) \
-static inline void SetPage##uname(struct page *page) \
- { set_bit(PG_##lname, &page->flags); }
+static inline bool page_count_writable(const struct page *page, int u)
+{
+ return true;
+}
+#endif
+
+static __always_inline int page_is_fake_head(const struct page *page)
+{
+ return page_fixed_fake_head(page) != page;
+}
+
+static __always_inline unsigned long _compound_head(const struct page *page)
+{
+ unsigned long head = READ_ONCE(page->compound_head);
+
+ if (unlikely(head & 1))
+ return head - 1;
+ return (unsigned long)page_fixed_fake_head(page);
+}
+
+#define compound_head(page) ((typeof(page))_compound_head(page))
+
+/**
+ * page_folio - Converts from page to folio.
+ * @p: The page.
+ *
+ * Every page is part of a folio. This function cannot be called on a
+ * NULL pointer.
+ *
+ * Context: No reference, nor lock is required on @page. If the caller
+ * does not hold a reference, this call may race with a folio split, so
+ * it should re-check the folio still contains this page after gaining
+ * a reference on the folio.
+ * Return: The folio which contains this page.
+ */
+#define page_folio(p) (_Generic((p), \
+ const struct page *: (const struct folio *)_compound_head(p), \
+ struct page *: (struct folio *)_compound_head(p)))
-#define CLEARPAGEFLAG(uname, lname) \
-static inline void ClearPage##uname(struct page *page) \
- { clear_bit(PG_##lname, &page->flags); }
+/**
+ * folio_page - Return a page from a folio.
+ * @folio: The folio.
+ * @n: The page number to return.
+ *
+ * @n is relative to the start of the folio. This function does not
+ * check that the page number lies within @folio; the caller is presumed
+ * to have a reference to the page.
+ */
+#define folio_page(folio, n) (&(folio)->page + (n))
-#define __SETPAGEFLAG(uname, lname) \
-static inline void __SetPage##uname(struct page *page) \
- { __set_bit(PG_##lname, &page->flags); }
+static __always_inline int PageTail(const struct page *page)
+{
+ return READ_ONCE(page->compound_head) & 1 || page_is_fake_head(page);
+}
-#define __CLEARPAGEFLAG(uname, lname) \
-static inline void __ClearPage##uname(struct page *page) \
- { __clear_bit(PG_##lname, &page->flags); }
+static __always_inline int PageCompound(const struct page *page)
+{
+ return test_bit(PG_head, &page->flags.f) ||
+ READ_ONCE(page->compound_head) & 1;
+}
-#define TESTSETFLAG(uname, lname) \
-static inline int TestSetPage##uname(struct page *page) \
- { return test_and_set_bit(PG_##lname, &page->flags); }
+#define PAGE_POISON_PATTERN -1l
+static inline int PagePoisoned(const struct page *page)
+{
+ return READ_ONCE(page->flags.f) == PAGE_POISON_PATTERN;
+}
-#define TESTCLEARFLAG(uname, lname) \
-static inline int TestClearPage##uname(struct page *page) \
- { return test_and_clear_bit(PG_##lname, &page->flags); }
+#ifdef CONFIG_DEBUG_VM
+void page_init_poison(struct page *page, size_t size);
+#else
+static inline void page_init_poison(struct page *page, size_t size)
+{
+}
+#endif
-#define __TESTCLEARFLAG(uname, lname) \
-static inline int __TestClearPage##uname(struct page *page) \
- { return __test_and_clear_bit(PG_##lname, &page->flags); }
+static const unsigned long *const_folio_flags(const struct folio *folio,
+ unsigned n)
+{
+ const struct page *page = &folio->page;
-#define PAGEFLAG(uname, lname) TESTPAGEFLAG(uname, lname) \
- SETPAGEFLAG(uname, lname) CLEARPAGEFLAG(uname, lname)
+ VM_BUG_ON_PGFLAGS(page->compound_head & 1, page);
+ VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags.f), page);
+ return &page[n].flags.f;
+}
-#define __PAGEFLAG(uname, lname) TESTPAGEFLAG(uname, lname) \
- __SETPAGEFLAG(uname, lname) __CLEARPAGEFLAG(uname, lname)
+static unsigned long *folio_flags(struct folio *folio, unsigned n)
+{
+ struct page *page = &folio->page;
-#define PAGEFLAG_FALSE(uname) \
-static inline int Page##uname(const struct page *page) \
- { return 0; }
+ VM_BUG_ON_PGFLAGS(page->compound_head & 1, page);
+ VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags.f), page);
+ return &page[n].flags.f;
+}
-#define TESTSCFLAG(uname, lname) \
- TESTSETFLAG(uname, lname) TESTCLEARFLAG(uname, lname)
+/*
+ * Page flags policies wrt compound pages
+ *
+ * PF_POISONED_CHECK
+ * check if this struct page poisoned/uninitialized
+ *
+ * PF_ANY:
+ * the page flag is relevant for small, head and tail pages.
+ *
+ * PF_HEAD:
+ * for compound page all operations related to the page flag applied to
+ * head page.
+ *
+ * PF_NO_TAIL:
+ * modifications of the page flag must be done on small or head pages,
+ * checks can be done on tail pages too.
+ *
+ * PF_NO_COMPOUND:
+ * the page flag is not relevant for compound pages.
+ *
+ * PF_SECOND:
+ * the page flag is stored in the first tail page.
+ */
+#define PF_POISONED_CHECK(page) ({ \
+ VM_BUG_ON_PGFLAGS(PagePoisoned(page), page); \
+ page; })
+#define PF_ANY(page, enforce) PF_POISONED_CHECK(page)
+#define PF_HEAD(page, enforce) PF_POISONED_CHECK(compound_head(page))
+#define PF_NO_TAIL(page, enforce) ({ \
+ VM_BUG_ON_PGFLAGS(enforce && PageTail(page), page); \
+ PF_POISONED_CHECK(compound_head(page)); })
+#define PF_NO_COMPOUND(page, enforce) ({ \
+ VM_BUG_ON_PGFLAGS(enforce && PageCompound(page), page); \
+ PF_POISONED_CHECK(page); })
+#define PF_SECOND(page, enforce) ({ \
+ VM_BUG_ON_PGFLAGS(!PageHead(page), page); \
+ PF_POISONED_CHECK(&page[1]); })
+
+/* Which page is the flag stored in */
+#define FOLIO_PF_ANY 0
+#define FOLIO_PF_HEAD 0
+#define FOLIO_PF_NO_TAIL 0
+#define FOLIO_PF_NO_COMPOUND 0
+#define FOLIO_PF_SECOND 1
+
+#define FOLIO_HEAD_PAGE 0
+#define FOLIO_SECOND_PAGE 1
-#define SETPAGEFLAG_NOOP(uname) \
+/*
+ * Macros to create function definitions for page flags
+ */
+#define FOLIO_TEST_FLAG(name, page) \
+static __always_inline bool folio_test_##name(const struct folio *folio) \
+{ return test_bit(PG_##name, const_folio_flags(folio, page)); }
+
+#define FOLIO_SET_FLAG(name, page) \
+static __always_inline void folio_set_##name(struct folio *folio) \
+{ set_bit(PG_##name, folio_flags(folio, page)); }
+
+#define FOLIO_CLEAR_FLAG(name, page) \
+static __always_inline void folio_clear_##name(struct folio *folio) \
+{ clear_bit(PG_##name, folio_flags(folio, page)); }
+
+#define __FOLIO_SET_FLAG(name, page) \
+static __always_inline void __folio_set_##name(struct folio *folio) \
+{ __set_bit(PG_##name, folio_flags(folio, page)); }
+
+#define __FOLIO_CLEAR_FLAG(name, page) \
+static __always_inline void __folio_clear_##name(struct folio *folio) \
+{ __clear_bit(PG_##name, folio_flags(folio, page)); }
+
+#define FOLIO_TEST_SET_FLAG(name, page) \
+static __always_inline bool folio_test_set_##name(struct folio *folio) \
+{ return test_and_set_bit(PG_##name, folio_flags(folio, page)); }
+
+#define FOLIO_TEST_CLEAR_FLAG(name, page) \
+static __always_inline bool folio_test_clear_##name(struct folio *folio) \
+{ return test_and_clear_bit(PG_##name, folio_flags(folio, page)); }
+
+#define FOLIO_FLAG(name, page) \
+FOLIO_TEST_FLAG(name, page) \
+FOLIO_SET_FLAG(name, page) \
+FOLIO_CLEAR_FLAG(name, page)
+
+#define TESTPAGEFLAG(uname, lname, policy) \
+FOLIO_TEST_FLAG(lname, FOLIO_##policy) \
+static __always_inline int Page##uname(const struct page *page) \
+{ return test_bit(PG_##lname, &policy(page, 0)->flags.f); }
+
+#define SETPAGEFLAG(uname, lname, policy) \
+FOLIO_SET_FLAG(lname, FOLIO_##policy) \
+static __always_inline void SetPage##uname(struct page *page) \
+{ set_bit(PG_##lname, &policy(page, 1)->flags.f); }
+
+#define CLEARPAGEFLAG(uname, lname, policy) \
+FOLIO_CLEAR_FLAG(lname, FOLIO_##policy) \
+static __always_inline void ClearPage##uname(struct page *page) \
+{ clear_bit(PG_##lname, &policy(page, 1)->flags.f); }
+
+#define __SETPAGEFLAG(uname, lname, policy) \
+__FOLIO_SET_FLAG(lname, FOLIO_##policy) \
+static __always_inline void __SetPage##uname(struct page *page) \
+{ __set_bit(PG_##lname, &policy(page, 1)->flags.f); }
+
+#define __CLEARPAGEFLAG(uname, lname, policy) \
+__FOLIO_CLEAR_FLAG(lname, FOLIO_##policy) \
+static __always_inline void __ClearPage##uname(struct page *page) \
+{ __clear_bit(PG_##lname, &policy(page, 1)->flags.f); }
+
+#define TESTSETFLAG(uname, lname, policy) \
+FOLIO_TEST_SET_FLAG(lname, FOLIO_##policy) \
+static __always_inline int TestSetPage##uname(struct page *page) \
+{ return test_and_set_bit(PG_##lname, &policy(page, 1)->flags.f); }
+
+#define TESTCLEARFLAG(uname, lname, policy) \
+FOLIO_TEST_CLEAR_FLAG(lname, FOLIO_##policy) \
+static __always_inline int TestClearPage##uname(struct page *page) \
+{ return test_and_clear_bit(PG_##lname, &policy(page, 1)->flags.f); }
+
+#define PAGEFLAG(uname, lname, policy) \
+ TESTPAGEFLAG(uname, lname, policy) \
+ SETPAGEFLAG(uname, lname, policy) \
+ CLEARPAGEFLAG(uname, lname, policy)
+
+#define __PAGEFLAG(uname, lname, policy) \
+ TESTPAGEFLAG(uname, lname, policy) \
+ __SETPAGEFLAG(uname, lname, policy) \
+ __CLEARPAGEFLAG(uname, lname, policy)
+
+#define TESTSCFLAG(uname, lname, policy) \
+ TESTSETFLAG(uname, lname, policy) \
+ TESTCLEARFLAG(uname, lname, policy)
+
+#define FOLIO_TEST_FLAG_FALSE(name) \
+static inline bool folio_test_##name(const struct folio *folio) \
+{ return false; }
+#define FOLIO_SET_FLAG_NOOP(name) \
+static inline void folio_set_##name(struct folio *folio) { }
+#define FOLIO_CLEAR_FLAG_NOOP(name) \
+static inline void folio_clear_##name(struct folio *folio) { }
+#define __FOLIO_SET_FLAG_NOOP(name) \
+static inline void __folio_set_##name(struct folio *folio) { }
+#define __FOLIO_CLEAR_FLAG_NOOP(name) \
+static inline void __folio_clear_##name(struct folio *folio) { }
+#define FOLIO_TEST_SET_FLAG_FALSE(name) \
+static inline bool folio_test_set_##name(struct folio *folio) \
+{ return false; }
+#define FOLIO_TEST_CLEAR_FLAG_FALSE(name) \
+static inline bool folio_test_clear_##name(struct folio *folio) \
+{ return false; }
+
+#define FOLIO_FLAG_FALSE(name) \
+FOLIO_TEST_FLAG_FALSE(name) \
+FOLIO_SET_FLAG_NOOP(name) \
+FOLIO_CLEAR_FLAG_NOOP(name)
+
+#define TESTPAGEFLAG_FALSE(uname, lname) \
+FOLIO_TEST_FLAG_FALSE(lname) \
+static inline int Page##uname(const struct page *page) { return 0; }
+
+#define SETPAGEFLAG_NOOP(uname, lname) \
+FOLIO_SET_FLAG_NOOP(lname) \
static inline void SetPage##uname(struct page *page) { }
-#define CLEARPAGEFLAG_NOOP(uname) \
+#define CLEARPAGEFLAG_NOOP(uname, lname) \
+FOLIO_CLEAR_FLAG_NOOP(lname) \
static inline void ClearPage##uname(struct page *page) { }
-#define __CLEARPAGEFLAG_NOOP(uname) \
+#define __CLEARPAGEFLAG_NOOP(uname, lname) \
+__FOLIO_CLEAR_FLAG_NOOP(lname) \
static inline void __ClearPage##uname(struct page *page) { }
-#define TESTCLEARFLAG_FALSE(uname) \
-static inline int TestClearPage##uname(struct page *page) { return 0; }
-
-#define __TESTCLEARFLAG_FALSE(uname) \
-static inline int __TestClearPage##uname(struct page *page) { return 0; }
-
-struct page; /* forward declaration */
+#define TESTSETFLAG_FALSE(uname, lname) \
+FOLIO_TEST_SET_FLAG_FALSE(lname) \
+static inline int TestSetPage##uname(struct page *page) { return 0; }
-TESTPAGEFLAG(Locked, locked)
-PAGEFLAG(Error, error) TESTCLEARFLAG(Error, error)
-PAGEFLAG(Referenced, referenced) TESTCLEARFLAG(Referenced, referenced)
-PAGEFLAG(Dirty, dirty) TESTSCFLAG(Dirty, dirty) __CLEARPAGEFLAG(Dirty, dirty)
-PAGEFLAG(LRU, lru) __CLEARPAGEFLAG(LRU, lru)
-PAGEFLAG(Active, active) __CLEARPAGEFLAG(Active, active)
- TESTCLEARFLAG(Active, active)
-__PAGEFLAG(Slab, slab)
-PAGEFLAG(Checked, checked) /* Used by some filesystems */
-PAGEFLAG(Pinned, pinned) TESTSCFLAG(Pinned, pinned) /* Xen */
-PAGEFLAG(SavePinned, savepinned); /* Xen */
-PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved)
-PAGEFLAG(SwapBacked, swapbacked) __CLEARPAGEFLAG(SwapBacked, swapbacked)
+#define TESTCLEARFLAG_FALSE(uname, lname) \
+FOLIO_TEST_CLEAR_FLAG_FALSE(lname) \
+static inline int TestClearPage##uname(struct page *page) { return 0; }
-__PAGEFLAG(SlobFree, slob_free)
+#define PAGEFLAG_FALSE(uname, lname) TESTPAGEFLAG_FALSE(uname, lname) \
+ SETPAGEFLAG_NOOP(uname, lname) CLEARPAGEFLAG_NOOP(uname, lname)
+
+#define TESTSCFLAG_FALSE(uname, lname) \
+ TESTSETFLAG_FALSE(uname, lname) TESTCLEARFLAG_FALSE(uname, lname)
+
+__PAGEFLAG(Locked, locked, PF_NO_TAIL)
+FOLIO_FLAG(waiters, FOLIO_HEAD_PAGE)
+FOLIO_FLAG(referenced, FOLIO_HEAD_PAGE)
+ FOLIO_TEST_CLEAR_FLAG(referenced, FOLIO_HEAD_PAGE)
+ __FOLIO_SET_FLAG(referenced, FOLIO_HEAD_PAGE)
+PAGEFLAG(Dirty, dirty, PF_HEAD) TESTSCFLAG(Dirty, dirty, PF_HEAD)
+ __CLEARPAGEFLAG(Dirty, dirty, PF_HEAD)
+PAGEFLAG(LRU, lru, PF_HEAD) __CLEARPAGEFLAG(LRU, lru, PF_HEAD)
+ TESTCLEARFLAG(LRU, lru, PF_HEAD)
+FOLIO_FLAG(active, FOLIO_HEAD_PAGE)
+ __FOLIO_CLEAR_FLAG(active, FOLIO_HEAD_PAGE)
+ FOLIO_TEST_CLEAR_FLAG(active, FOLIO_HEAD_PAGE)
+PAGEFLAG(Workingset, workingset, PF_HEAD)
+ TESTCLEARFLAG(Workingset, workingset, PF_HEAD)
+PAGEFLAG(Checked, checked, PF_NO_COMPOUND) /* Used by some filesystems */
+
+/* Xen */
+PAGEFLAG(Pinned, pinned, PF_NO_COMPOUND)
+ TESTSCFLAG(Pinned, pinned, PF_NO_COMPOUND)
+PAGEFLAG(SavePinned, savepinned, PF_NO_COMPOUND);
+PAGEFLAG(Foreign, foreign, PF_NO_COMPOUND);
+PAGEFLAG(XenRemapped, xen_remapped, PF_NO_COMPOUND)
+ TESTCLEARFLAG(XenRemapped, xen_remapped, PF_NO_COMPOUND)
+
+PAGEFLAG(Reserved, reserved, PF_NO_COMPOUND)
+ __CLEARPAGEFLAG(Reserved, reserved, PF_NO_COMPOUND)
+ __SETPAGEFLAG(Reserved, reserved, PF_NO_COMPOUND)
+FOLIO_FLAG(swapbacked, FOLIO_HEAD_PAGE)
+ __FOLIO_CLEAR_FLAG(swapbacked, FOLIO_HEAD_PAGE)
+ __FOLIO_SET_FLAG(swapbacked, FOLIO_HEAD_PAGE)
/*
* Private page markings that may be used by the filesystem that owns the page
* for its own purposes.
- * - PG_private and PG_private_2 cause releasepage() and co to be invoked
+ * - PG_private and PG_private_2 cause release_folio() and co to be invoked
*/
-PAGEFLAG(Private, private) __SETPAGEFLAG(Private, private)
- __CLEARPAGEFLAG(Private, private)
-PAGEFLAG(Private2, private_2) TESTSCFLAG(Private2, private_2)
-PAGEFLAG(OwnerPriv1, owner_priv_1) TESTCLEARFLAG(OwnerPriv1, owner_priv_1)
+PAGEFLAG(Private, private, PF_ANY)
+FOLIO_FLAG(private_2, FOLIO_HEAD_PAGE)
+
+/* owner_2 can be set on tail pages for anon memory */
+FOLIO_FLAG(owner_2, FOLIO_HEAD_PAGE)
/*
* Only test-and-set exist for PG_writeback. The unconditional operators are
* risky: they bypass page accounting.
*/
-TESTPAGEFLAG(Writeback, writeback) TESTSCFLAG(Writeback, writeback)
-PAGEFLAG(MappedToDisk, mappedtodisk)
+TESTPAGEFLAG(Writeback, writeback, PF_NO_TAIL)
+ TESTSCFLAG(Writeback, writeback, PF_NO_TAIL)
+FOLIO_FLAG(mappedtodisk, FOLIO_HEAD_PAGE)
-/* PG_readahead is only used for file reads; PG_reclaim is only for writes */
-PAGEFLAG(Reclaim, reclaim) TESTCLEARFLAG(Reclaim, reclaim)
-PAGEFLAG(Readahead, reclaim) /* Reminder to do async read-ahead */
+/* PG_readahead is only used for reads; PG_reclaim is only for writes */
+PAGEFLAG(Reclaim, reclaim, PF_NO_TAIL)
+ TESTCLEARFLAG(Reclaim, reclaim, PF_NO_TAIL)
+FOLIO_FLAG(readahead, FOLIO_HEAD_PAGE)
+ FOLIO_TEST_CLEAR_FLAG(readahead, FOLIO_HEAD_PAGE)
+
+FOLIO_FLAG(dropbehind, FOLIO_HEAD_PAGE)
+ FOLIO_TEST_CLEAR_FLAG(dropbehind, FOLIO_HEAD_PAGE)
+ __FOLIO_SET_FLAG(dropbehind, FOLIO_HEAD_PAGE)
#ifdef CONFIG_HIGHMEM
/*
* Must use a macro here due to header dependency issues. page_zone() is not
* available at this point.
*/
-#define PageHighMem(__p) is_highmem(page_zone(__p))
+#define PageHighMem(__p) is_highmem_idx(page_zonenum(__p))
+#define folio_test_highmem(__f) is_highmem_idx(folio_zonenum(__f))
#else
-PAGEFLAG_FALSE(HighMem)
+PAGEFLAG_FALSE(HighMem, highmem)
#endif
+#define PhysHighMem(__p) (PageHighMem(phys_to_page(__p)))
-#ifdef CONFIG_SWAP
-PAGEFLAG(SwapCache, swapcache)
+/* Does kmap_local_folio() only allow access to one page of the folio? */
+#ifdef CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP
+#define folio_test_partial_kmap(f) true
#else
-PAGEFLAG_FALSE(SwapCache)
- SETPAGEFLAG_NOOP(SwapCache) CLEARPAGEFLAG_NOOP(SwapCache)
+#define folio_test_partial_kmap(f) folio_test_highmem(f)
#endif
-PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable)
- TESTCLEARFLAG(Unevictable, unevictable)
+#ifdef CONFIG_SWAP
+static __always_inline bool folio_test_swapcache(const struct folio *folio)
+{
+ return folio_test_swapbacked(folio) &&
+ test_bit(PG_swapcache, const_folio_flags(folio, 0));
+}
-#ifdef CONFIG_MMU
-PAGEFLAG(Mlocked, mlocked) __CLEARPAGEFLAG(Mlocked, mlocked)
- TESTSCFLAG(Mlocked, mlocked) __TESTCLEARFLAG(Mlocked, mlocked)
+FOLIO_SET_FLAG(swapcache, FOLIO_HEAD_PAGE)
+FOLIO_CLEAR_FLAG(swapcache, FOLIO_HEAD_PAGE)
#else
-PAGEFLAG_FALSE(Mlocked) SETPAGEFLAG_NOOP(Mlocked)
- TESTCLEARFLAG_FALSE(Mlocked) __TESTCLEARFLAG_FALSE(Mlocked)
+FOLIO_FLAG_FALSE(swapcache)
#endif
-#ifdef CONFIG_ARCH_USES_PG_UNCACHED
-PAGEFLAG(Uncached, uncached)
+FOLIO_FLAG(unevictable, FOLIO_HEAD_PAGE)
+ __FOLIO_CLEAR_FLAG(unevictable, FOLIO_HEAD_PAGE)
+ FOLIO_TEST_CLEAR_FLAG(unevictable, FOLIO_HEAD_PAGE)
+
+#ifdef CONFIG_MMU
+FOLIO_FLAG(mlocked, FOLIO_HEAD_PAGE)
+ __FOLIO_CLEAR_FLAG(mlocked, FOLIO_HEAD_PAGE)
+ FOLIO_TEST_CLEAR_FLAG(mlocked, FOLIO_HEAD_PAGE)
+ FOLIO_TEST_SET_FLAG(mlocked, FOLIO_HEAD_PAGE)
#else
-PAGEFLAG_FALSE(Uncached)
+FOLIO_FLAG_FALSE(mlocked)
+ __FOLIO_CLEAR_FLAG_NOOP(mlocked)
+ FOLIO_TEST_CLEAR_FLAG_FALSE(mlocked)
+ FOLIO_TEST_SET_FLAG_FALSE(mlocked)
#endif
#ifdef CONFIG_MEMORY_FAILURE
-PAGEFLAG(HWPoison, hwpoison)
-TESTSCFLAG(HWPoison, hwpoison)
+PAGEFLAG(HWPoison, hwpoison, PF_ANY)
+TESTSCFLAG(HWPoison, hwpoison, PF_ANY)
#define __PG_HWPOISON (1UL << PG_hwpoison)
#else
-PAGEFLAG_FALSE(HWPoison)
+PAGEFLAG_FALSE(HWPoison, hwpoison)
#define __PG_HWPOISON 0
#endif
-u64 stable_page_flags(struct page *page);
+#ifdef CONFIG_PAGE_IDLE_FLAG
+#ifdef CONFIG_64BIT
+FOLIO_TEST_FLAG(young, FOLIO_HEAD_PAGE)
+FOLIO_SET_FLAG(young, FOLIO_HEAD_PAGE)
+FOLIO_TEST_CLEAR_FLAG(young, FOLIO_HEAD_PAGE)
+FOLIO_FLAG(idle, FOLIO_HEAD_PAGE)
+#endif
+/* See page_idle.h for !64BIT workaround */
+#else /* !CONFIG_PAGE_IDLE_FLAG */
+FOLIO_FLAG_FALSE(young)
+FOLIO_TEST_CLEAR_FLAG_FALSE(young)
+FOLIO_FLAG_FALSE(idle)
+#endif
+
+/*
+ * PageReported() is used to track reported free pages within the Buddy
+ * allocator. We can use the non-atomic version of the test and set
+ * operations as both should be shielded with the zone lock to prevent
+ * any possible races on the setting or clearing of the bit.
+ */
+__PAGEFLAG(Reported, reported, PF_NO_COMPOUND)
-static inline int PageUptodate(struct page *page)
+#ifdef CONFIG_MEMORY_HOTPLUG
+PAGEFLAG(VmemmapSelfHosted, vmemmap_self_hosted, PF_ANY)
+#else
+PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted)
+#endif
+
+/*
+ * On an anonymous folio mapped into a user virtual memory area,
+ * folio->mapping points to its anon_vma, not to a struct address_space;
+ * with the FOLIO_MAPPING_ANON bit set to distinguish it. See rmap.h.
+ *
+ * On an anonymous folio in a VM_MERGEABLE area, if CONFIG_KSM is enabled,
+ * the FOLIO_MAPPING_ANON_KSM bit may be set along with the FOLIO_MAPPING_ANON
+ * bit; and then folio->mapping points, not to an anon_vma, but to a private
+ * structure which KSM associates with that merged folio. See ksm.h.
+ *
+ * Please note that, confusingly, "folio_mapping" refers to the inode
+ * address_space which maps the folio from disk; whereas "folio_mapped"
+ * refers to user virtual address space into which the folio is mapped.
+ *
+ * For slab pages, since slab reuses the bits in struct page to store its
+ * internal states, the folio->mapping does not exist as such, nor do
+ * these flags below. So in order to avoid testing non-existent bits,
+ * please make sure that folio_test_slab(folio) actually evaluates to
+ * false before calling the following functions (e.g., folio_test_anon).
+ * See mm/slab.h.
+ */
+#define FOLIO_MAPPING_ANON 0x1
+#define FOLIO_MAPPING_ANON_KSM 0x2
+#define FOLIO_MAPPING_KSM (FOLIO_MAPPING_ANON | FOLIO_MAPPING_ANON_KSM)
+#define FOLIO_MAPPING_FLAGS (FOLIO_MAPPING_ANON | FOLIO_MAPPING_ANON_KSM)
+
+static __always_inline bool folio_test_anon(const struct folio *folio)
{
- int ret = test_bit(PG_uptodate, &(page)->flags);
+ return ((unsigned long)folio->mapping & FOLIO_MAPPING_ANON) != 0;
+}
+
+static __always_inline bool PageAnonNotKsm(const struct page *page)
+{
+ unsigned long flags = (unsigned long)page_folio(page)->mapping;
+ return (flags & FOLIO_MAPPING_FLAGS) == FOLIO_MAPPING_ANON;
+}
+
+static __always_inline bool PageAnon(const struct page *page)
+{
+ return folio_test_anon(page_folio(page));
+}
+#ifdef CONFIG_KSM
+/*
+ * A KSM page is one of those write-protected "shared pages" or "merged pages"
+ * which KSM maps into multiple mms, wherever identical anonymous page content
+ * is found in VM_MERGEABLE vmas. It's a PageAnon page, pointing not to any
+ * anon_vma, but to that page's node of the stable tree.
+ */
+static __always_inline bool folio_test_ksm(const struct folio *folio)
+{
+ return ((unsigned long)folio->mapping & FOLIO_MAPPING_FLAGS) ==
+ FOLIO_MAPPING_KSM;
+}
+#else
+FOLIO_TEST_FLAG_FALSE(ksm)
+#endif
+
+u64 stable_page_flags(const struct page *page);
+
+/**
+ * folio_xor_flags_has_waiters - Change some folio flags.
+ * @folio: The folio.
+ * @mask: Bits set in this word will be changed.
+ *
+ * This must only be used for flags which are changed with the folio
+ * lock held. For example, it is unsafe to use for PG_dirty as that
+ * can be set without the folio lock held. It can also only be used
+ * on flags which are in the range 0-6 as some of the implementations
+ * only affect those bits.
+ *
+ * Return: Whether there are tasks waiting on the folio.
+ */
+static inline bool folio_xor_flags_has_waiters(struct folio *folio,
+ unsigned long mask)
+{
+ return xor_unlock_is_negative_byte(mask, folio_flags(folio, 0));
+}
+
+/**
+ * folio_test_uptodate - Is this folio up to date?
+ * @folio: The folio.
+ *
+ * The uptodate flag is set on a folio when every byte in the folio is
+ * at least as new as the corresponding bytes on storage. Anonymous
+ * and CoW folios are always uptodate. If the folio is not uptodate,
+ * some of the bytes in it may be; see the is_partially_uptodate()
+ * address_space operation.
+ */
+static inline bool folio_test_uptodate(const struct folio *folio)
+{
+ bool ret = test_bit(PG_uptodate, const_folio_flags(folio, 0));
/*
- * Must ensure that the data we read out of the page is loaded
- * _after_ we've loaded page->flags to check for PageUptodate.
- * We can skip the barrier if the page is not uptodate, because
+ * Must ensure that the data we read out of the folio is loaded
+ * _after_ we've loaded folio->flags to check the uptodate bit.
+ * We can skip the barrier if the folio is not uptodate, because
* we wouldn't be reading anything from it.
*
- * See SetPageUptodate() for the other side of the story.
+ * See folio_mark_uptodate() for the other side of the story.
*/
if (ret)
smp_rmb();
@@ -295,236 +799,445 @@ static inline int PageUptodate(struct page *page)
return ret;
}
-static inline void __SetPageUptodate(struct page *page)
+static inline bool PageUptodate(const struct page *page)
+{
+ return folio_test_uptodate(page_folio(page));
+}
+
+static __always_inline void __folio_mark_uptodate(struct folio *folio)
{
smp_wmb();
- __set_bit(PG_uptodate, &(page)->flags);
+ __set_bit(PG_uptodate, folio_flags(folio, 0));
}
-static inline void SetPageUptodate(struct page *page)
+static __always_inline void folio_mark_uptodate(struct folio *folio)
{
/*
* Memory barrier must be issued before setting the PG_uptodate bit,
- * so that all previous stores issued in order to bring the page
- * uptodate are actually visible before PageUptodate becomes true.
+ * so that all previous stores issued in order to bring the folio
+ * uptodate are actually visible before folio_test_uptodate becomes true.
*/
smp_wmb();
- set_bit(PG_uptodate, &(page)->flags);
+ set_bit(PG_uptodate, folio_flags(folio, 0));
}
-CLEARPAGEFLAG(Uptodate, uptodate)
+static __always_inline void __SetPageUptodate(struct page *page)
+{
+ __folio_mark_uptodate((struct folio *)page);
+}
-extern void cancel_dirty_page(struct page *page, unsigned int account_size);
+static __always_inline void SetPageUptodate(struct page *page)
+{
+ folio_mark_uptodate((struct folio *)page);
+}
+
+CLEARPAGEFLAG(Uptodate, uptodate, PF_NO_TAIL)
+
+void __folio_start_writeback(struct folio *folio, bool keep_write);
+void set_page_writeback(struct page *page);
-int test_clear_page_writeback(struct page *page);
-int test_set_page_writeback(struct page *page);
+#define folio_start_writeback(folio) \
+ __folio_start_writeback(folio, false)
-static inline void set_page_writeback(struct page *page)
+static __always_inline bool folio_test_head(const struct folio *folio)
{
- test_set_page_writeback(page);
+ return test_bit(PG_head, const_folio_flags(folio, FOLIO_PF_ANY));
}
-#ifdef CONFIG_PAGEFLAGS_EXTENDED
-/*
- * System with lots of page flags available. This allows separate
- * flags for PageHead() and PageTail() checks of compound pages so that bit
- * tests can be used in performance sensitive paths. PageCompound is
- * generally not used in hot code paths.
+static __always_inline int PageHead(const struct page *page)
+{
+ PF_POISONED_CHECK(page);
+ return test_bit(PG_head, &page->flags.f) && !page_is_fake_head(page);
+}
+
+__SETPAGEFLAG(Head, head, PF_ANY)
+__CLEARPAGEFLAG(Head, head, PF_ANY)
+CLEARPAGEFLAG(Head, head, PF_ANY)
+
+/**
+ * folio_test_large() - Does this folio contain more than one page?
+ * @folio: The folio to test.
+ *
+ * Return: True if the folio is larger than one page.
*/
-__PAGEFLAG(Head, head) CLEARPAGEFLAG(Head, head)
-__PAGEFLAG(Tail, tail)
+static inline bool folio_test_large(const struct folio *folio)
+{
+ return folio_test_head(folio);
+}
-static inline int PageCompound(struct page *page)
+static __always_inline void set_compound_head(struct page *page, struct page *head)
{
- return page->flags & ((1L << PG_head) | (1L << PG_tail));
+ WRITE_ONCE(page->compound_head, (unsigned long)head + 1);
+}
+static __always_inline void clear_compound_head(struct page *page)
+{
+ WRITE_ONCE(page->compound_head, 0);
}
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static inline void ClearPageCompound(struct page *page)
{
BUG_ON(!PageHead(page));
ClearPageHead(page);
}
-#endif
+FOLIO_FLAG(large_rmappable, FOLIO_SECOND_PAGE)
+FOLIO_FLAG(partially_mapped, FOLIO_SECOND_PAGE)
#else
+FOLIO_FLAG_FALSE(large_rmappable)
+FOLIO_FLAG_FALSE(partially_mapped)
+#endif
+
+#define PG_head_mask ((1UL << PG_head))
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/*
- * Reduce page flag use as much as possible by overlapping
- * compound page flags with the flags used for page cache pages. Possible
- * because PageCompound is always set for compound pages and not for
- * pages on the LRU and/or pagecache.
+ * PageTransCompound returns true for both transparent huge pages
+ * and hugetlbfs pages, so it should only be called when it's known
+ * that hugetlbfs pages aren't involved.
*/
-TESTPAGEFLAG(Compound, compound)
-__SETPAGEFLAG(Head, compound) __CLEARPAGEFLAG(Head, compound)
+static inline int PageTransCompound(const struct page *page)
+{
+ return PageCompound(page);
+}
+#else
+TESTPAGEFLAG_FALSE(TransCompound, transcompound)
+#endif
+#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
/*
- * PG_reclaim is used in combination with PG_compound to mark the
- * head and tail of a compound page. This saves one page flag
- * but makes it impossible to use compound pages for the page cache.
- * The PG_reclaim bit would have to be used for reclaim or readahead
- * if compound pages enter the page cache.
+ * PageHasHWPoisoned indicates that at least one subpage is hwpoisoned in the
+ * compound page.
*
- * PG_compound & PG_reclaim => Tail page
- * PG_compound & ~PG_reclaim => Head page
+ * This flag is set by hwpoison handler. Cleared by THP split or free page.
*/
-#define PG_head_mask ((1L << PG_compound))
-#define PG_head_tail_mask ((1L << PG_compound) | (1L << PG_reclaim))
+FOLIO_FLAG(has_hwpoisoned, FOLIO_SECOND_PAGE)
+#else
+FOLIO_FLAG_FALSE(has_hwpoisoned)
+#endif
-static inline int PageHead(struct page *page)
+/*
+ * For pages that do not use mapcount, page_type may be used.
+ * The low 24 bits of pagetype may be used for your own purposes, as long
+ * as you are careful to not affect the top 8 bits. The low bits of
+ * pagetype will be overwritten when you clear the page_type from the page.
+ */
+enum pagetype {
+ /* 0x00-0x7f are positive numbers, ie mapcount */
+ /* Reserve 0x80-0xef for mapcount overflow. */
+ PGTY_buddy = 0xf0,
+ PGTY_offline = 0xf1,
+ PGTY_table = 0xf2,
+ PGTY_guard = 0xf3,
+ PGTY_hugetlb = 0xf4,
+ PGTY_slab = 0xf5,
+ PGTY_zsmalloc = 0xf6,
+ PGTY_unaccepted = 0xf7,
+ PGTY_large_kmalloc = 0xf8,
+
+ PGTY_mapcount_underflow = 0xff
+};
+
+static inline bool page_type_has_type(int page_type)
{
- return ((page->flags & PG_head_tail_mask) == PG_head_mask);
+ return page_type < (PGTY_mapcount_underflow << 24);
}
-static inline int PageTail(struct page *page)
+/* This takes a mapcount which is one more than page->_mapcount */
+static inline bool page_mapcount_is_type(unsigned int mapcount)
{
- return ((page->flags & PG_head_tail_mask) == PG_head_tail_mask);
+ return page_type_has_type(mapcount - 1);
}
-static inline void __SetPageTail(struct page *page)
+static inline bool page_has_type(const struct page *page)
{
- page->flags |= PG_head_tail_mask;
+ return page_type_has_type(data_race(page->page_type));
}
-static inline void __ClearPageTail(struct page *page)
-{
- page->flags &= ~PG_head_tail_mask;
+#define FOLIO_TYPE_OPS(lname, fname) \
+static __always_inline bool folio_test_##fname(const struct folio *folio) \
+{ \
+ return data_race(folio->page.page_type >> 24) == PGTY_##lname; \
+} \
+static __always_inline void __folio_set_##fname(struct folio *folio) \
+{ \
+ if (folio_test_##fname(folio)) \
+ return; \
+ VM_BUG_ON_FOLIO(data_race(folio->page.page_type) != UINT_MAX, \
+ folio); \
+ folio->page.page_type = (unsigned int)PGTY_##lname << 24; \
+} \
+static __always_inline void __folio_clear_##fname(struct folio *folio) \
+{ \
+ if (folio->page.page_type == UINT_MAX) \
+ return; \
+ VM_BUG_ON_FOLIO(!folio_test_##fname(folio), folio); \
+ folio->page.page_type = UINT_MAX; \
}
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-static inline void ClearPageCompound(struct page *page)
-{
- BUG_ON((page->flags & PG_head_tail_mask) != (1 << PG_compound));
- clear_bit(PG_compound, &page->flags);
+#define PAGE_TYPE_OPS(uname, lname, fname) \
+FOLIO_TYPE_OPS(lname, fname) \
+static __always_inline int Page##uname(const struct page *page) \
+{ \
+ return data_race(page->page_type >> 24) == PGTY_##lname; \
+} \
+static __always_inline void __SetPage##uname(struct page *page) \
+{ \
+ if (Page##uname(page)) \
+ return; \
+ VM_BUG_ON_PAGE(data_race(page->page_type) != UINT_MAX, page); \
+ page->page_type = (unsigned int)PGTY_##lname << 24; \
+} \
+static __always_inline void __ClearPage##uname(struct page *page) \
+{ \
+ if (page->page_type == UINT_MAX) \
+ return; \
+ VM_BUG_ON_PAGE(!Page##uname(page), page); \
+ page->page_type = UINT_MAX; \
}
-#endif
-#endif /* !PAGEFLAGS_EXTENDED */
+/*
+ * PageBuddy() indicates that the page is free and in the buddy system
+ * (see mm/page_alloc.c).
+ */
+PAGE_TYPE_OPS(Buddy, buddy, buddy)
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/*
- * PageHuge() only returns true for hugetlbfs pages, but not for
- * normal or transparent huge pages.
+ * PageOffline() indicates that the page is logically offline although the
+ * containing section is online. (e.g. inflated in a balloon driver or
+ * not onlined when onlining the section).
+ * The content of these pages is effectively stale. Such pages should not
+ * be touched (read/write/dump/save) except by their owner.
+ *
+ * When a memory block gets onlined, all pages are initialized with a
+ * refcount of 1 and PageOffline(). generic_online_page() will
+ * take care of clearing PageOffline().
+ *
+ * If a driver wants to allow to offline unmovable PageOffline() pages without
+ * putting them back to the buddy, it can do so via the memory notifier by
+ * decrementing the reference count in MEM_GOING_OFFLINE and incrementing the
+ * reference count in MEM_CANCEL_OFFLINE. When offlining, the PageOffline()
+ * pages (now with a reference count of zero) are treated like free (unmanaged)
+ * pages, allowing the containing memory block to get offlined. A driver that
+ * relies on this feature is aware that re-onlining the memory block will
+ * require not giving them to the buddy via generic_online_page().
+ *
+ * Memory offlining code will not adjust the managed page count for any
+ * PageOffline() pages, treating them like they were never exposed to the
+ * buddy using generic_online_page().
*
- * PageTransHuge() returns true for both transparent huge and
- * hugetlbfs pages, but not normal pages. PageTransHuge() can only be
- * called only in the core VM paths where hugetlbfs pages can't exist.
+ * There are drivers that mark a page PageOffline() and expect there won't be
+ * any further access to page content. PFN walkers that read content of random
+ * pages should check PageOffline() and synchronize with such drivers using
+ * page_offline_freeze()/page_offline_thaw().
*/
-static inline int PageTransHuge(struct page *page)
-{
- VM_BUG_ON(PageTail(page));
- return PageHead(page);
-}
+PAGE_TYPE_OPS(Offline, offline, offline)
+
+extern void page_offline_freeze(void);
+extern void page_offline_thaw(void);
+extern void page_offline_begin(void);
+extern void page_offline_end(void);
/*
- * PageTransCompound returns true for both transparent huge pages
- * and hugetlbfs pages, so it should only be called when it's known
- * that hugetlbfs pages aren't involved.
+ * Marks pages in use as page tables.
*/
-static inline int PageTransCompound(struct page *page)
-{
- return PageCompound(page);
-}
+PAGE_TYPE_OPS(Table, table, pgtable)
/*
- * PageTransTail returns true for both transparent huge pages
- * and hugetlbfs pages, so it should only be called when it's known
- * that hugetlbfs pages aren't involved.
+ * Marks guardpages used with debug_pagealloc.
*/
-static inline int PageTransTail(struct page *page)
-{
- return PageTail(page);
-}
+PAGE_TYPE_OPS(Guard, guard, guard)
+
+PAGE_TYPE_OPS(Slab, slab, slab)
+#ifdef CONFIG_HUGETLB_PAGE
+FOLIO_TYPE_OPS(hugetlb, hugetlb)
#else
+FOLIO_TEST_FLAG_FALSE(hugetlb)
+#endif
+
+PAGE_TYPE_OPS(Zsmalloc, zsmalloc, zsmalloc)
-static inline int PageTransHuge(struct page *page)
+/*
+ * Mark pages that has to be accepted before touched for the first time.
+ *
+ * Serialized with zone lock.
+ */
+PAGE_TYPE_OPS(Unaccepted, unaccepted, unaccepted)
+PAGE_TYPE_OPS(LargeKmalloc, large_kmalloc, large_kmalloc)
+
+/**
+ * PageHuge - Determine if the page belongs to hugetlbfs
+ * @page: The page to test.
+ *
+ * Context: Any context.
+ * Return: True for hugetlbfs pages, false for anon pages or pages
+ * belonging to other filesystems.
+ */
+static inline bool PageHuge(const struct page *page)
{
- return 0;
+ return folio_test_hugetlb(page_folio(page));
}
-static inline int PageTransCompound(struct page *page)
+/*
+ * Check if a page is currently marked HWPoisoned. Note that this check is
+ * best effort only and inherently racy: there is no way to synchronize with
+ * failing hardware.
+ */
+static inline bool is_page_hwpoison(const struct page *page)
{
- return 0;
+ const struct folio *folio;
+
+ if (PageHWPoison(page))
+ return true;
+ folio = page_folio(page);
+ return folio_test_hugetlb(folio) && PageHWPoison(&folio->page);
}
-static inline int PageTransTail(struct page *page)
+static inline bool folio_contain_hwpoisoned_page(struct folio *folio)
{
- return 0;
+ return folio_test_hwpoison(folio) ||
+ (folio_test_large(folio) && folio_test_has_hwpoisoned(folio));
}
-#endif
+bool is_free_buddy_page(const struct page *page);
+
+#ifdef CONFIG_MIGRATION
/*
- * If network-based swap is enabled, sl*b must keep track of whether pages
- * were allocated from pfmemalloc reserves.
+ * This page is migratable through movable_ops (for selected typed pages
+ * only).
+ *
+ * Page migration of such pages might fail, for example, if the page is
+ * already isolated by somebody else, or if the page is about to get freed.
+ *
+ * While a subsystem might set selected typed pages that support page migration
+ * as being movable through movable_ops, it must never clear this flag.
+ *
+ * This flag is only cleared when the page is freed back to the buddy.
+ *
+ * Only selected page types support this flag (see page_movable_ops()) and
+ * the flag might be used in other context for other pages. Always use
+ * page_has_movable_ops() instead.
+ */
+TESTPAGEFLAG(MovableOps, movable_ops, PF_NO_TAIL);
+SETPAGEFLAG(MovableOps, movable_ops, PF_NO_TAIL);
+/*
+ * A movable_ops page has this flag set while it is isolated for migration.
+ * This flag primarily protects against concurrent migration attempts.
+ *
+ * Once migration ended (success or failure), the flag is cleared. The
+ * flag is managed by the migration core.
+ */
+PAGEFLAG(MovableOpsIsolated, movable_ops_isolated, PF_NO_TAIL);
+#else /* !CONFIG_MIGRATION */
+TESTPAGEFLAG_FALSE(MovableOps, movable_ops);
+SETPAGEFLAG_NOOP(MovableOps, movable_ops);
+PAGEFLAG_FALSE(MovableOpsIsolated, movable_ops_isolated);
+#endif /* CONFIG_MIGRATION */
+
+/**
+ * page_has_movable_ops - test for a movable_ops page
+ * @page: The page to test.
+ *
+ * Test whether this is a movable_ops page. Such pages will stay that
+ * way until freed.
+ *
+ * Returns true if this is a movable_ops page, otherwise false.
*/
-static inline int PageSlabPfmemalloc(struct page *page)
+static inline bool page_has_movable_ops(const struct page *page)
+{
+ return PageMovableOps(page) &&
+ (PageOffline(page) || PageZsmalloc(page));
+}
+
+static __always_inline int PageAnonExclusive(const struct page *page)
{
- VM_BUG_ON(!PageSlab(page));
- return PageActive(page);
+ VM_BUG_ON_PGFLAGS(!PageAnon(page), page);
+ /*
+ * HugeTLB stores this information on the head page; THP keeps it per
+ * page
+ */
+ if (PageHuge(page))
+ page = compound_head(page);
+ return test_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags.f);
}
-static inline void SetPageSlabPfmemalloc(struct page *page)
+static __always_inline void SetPageAnonExclusive(struct page *page)
{
- VM_BUG_ON(!PageSlab(page));
- SetPageActive(page);
+ VM_BUG_ON_PGFLAGS(!PageAnonNotKsm(page), page);
+ VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page);
+ set_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags.f);
}
-static inline void __ClearPageSlabPfmemalloc(struct page *page)
+static __always_inline void ClearPageAnonExclusive(struct page *page)
{
- VM_BUG_ON(!PageSlab(page));
- __ClearPageActive(page);
+ VM_BUG_ON_PGFLAGS(!PageAnonNotKsm(page), page);
+ VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page);
+ clear_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags.f);
}
-static inline void ClearPageSlabPfmemalloc(struct page *page)
+static __always_inline void __ClearPageAnonExclusive(struct page *page)
{
- VM_BUG_ON(!PageSlab(page));
- ClearPageActive(page);
+ VM_BUG_ON_PGFLAGS(!PageAnon(page), page);
+ VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page);
+ __clear_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags.f);
}
#ifdef CONFIG_MMU
-#define __PG_MLOCKED (1 << PG_mlocked)
+#define __PG_MLOCKED (1UL << PG_mlocked)
#else
#define __PG_MLOCKED 0
#endif
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define __PG_COMPOUND_LOCK (1 << PG_compound_lock)
-#else
-#define __PG_COMPOUND_LOCK 0
-#endif
-
/*
* Flags checked when a page is freed. Pages being freed should not have
- * these flags set. It they are, there is a problem.
+ * these flags set. If they are, there is a problem.
*/
-#define PAGE_FLAGS_CHECK_AT_FREE \
- (1 << PG_lru | 1 << PG_locked | \
- 1 << PG_private | 1 << PG_private_2 | \
- 1 << PG_writeback | 1 << PG_reserved | \
- 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \
- 1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON | \
- __PG_COMPOUND_LOCK)
+#define PAGE_FLAGS_CHECK_AT_FREE \
+ (1UL << PG_lru | 1UL << PG_locked | \
+ 1UL << PG_private | 1UL << PG_private_2 | \
+ 1UL << PG_writeback | 1UL << PG_reserved | \
+ 1UL << PG_active | \
+ 1UL << PG_unevictable | __PG_MLOCKED | LRU_GEN_MASK)
/*
* Flags checked when a page is prepped for return by the page allocator.
- * Pages being prepped should not have any flags set. It they are set,
+ * Pages being prepped should not have these flags set. If they are set,
* there has been a kernel bug or struct page corruption.
+ *
+ * __PG_HWPOISON is exceptional because it needs to be kept beyond page's
+ * alloc-free cycle to prevent from reusing the page.
+ */
+#define PAGE_FLAGS_CHECK_AT_PREP \
+ ((PAGEFLAGS_MASK & ~__PG_HWPOISON) | LRU_GEN_MASK | LRU_REFS_MASK)
+
+/*
+ * Flags stored in the second page of a compound page. They may overlap
+ * the CHECK_AT_FREE flags above, so need to be cleared.
*/
-#define PAGE_FLAGS_CHECK_AT_PREP ((1 << NR_PAGEFLAGS) - 1)
+#define PAGE_FLAGS_SECOND \
+ (0xffUL /* order */ | 1UL << PG_has_hwpoisoned | \
+ 1UL << PG_large_rmappable | 1UL << PG_partially_mapped)
#define PAGE_FLAGS_PRIVATE \
- (1 << PG_private | 1 << PG_private_2)
+ (1UL << PG_private | 1UL << PG_private_2)
/**
- * page_has_private - Determine if page has private stuff
- * @page: The page to be checked
+ * folio_has_private - Determine if folio has private stuff
+ * @folio: The folio to be checked
*
- * Determine if a page has private stuff, indicating that release routines
+ * Determine if a folio has private stuff, indicating that release routines
* should be invoked upon it.
*/
-static inline int page_has_private(struct page *page)
+static inline int folio_has_private(const struct folio *folio)
{
- return !!(page->flags & PAGE_FLAGS_PRIVATE);
+ return !!(folio->flags.f & PAGE_FLAGS_PRIVATE);
}
+#undef PF_ANY
+#undef PF_HEAD
+#undef PF_NO_TAIL
+#undef PF_NO_COMPOUND
+#undef PF_SECOND
#endif /* !__GENERATING_BOUNDS_H */
#endif /* PAGE_FLAGS_H */