diff options
Diffstat (limited to 'include/linux/page-flags.h')
| -rw-r--r-- | include/linux/page-flags.h | 1213 |
1 files changed, 963 insertions, 250 deletions
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 6d53675c2b54..f7a0e4af0c73 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Macros for manipulating and testing page->flags */ @@ -16,8 +17,32 @@ /* * Various page->flags bits: * - * PG_reserved is set for special pages, which can never be swapped out. Some - * of them might not even exist (eg empty_bad_page)... + * PG_reserved is set for special pages. The "struct page" of such a page + * should in general not be touched (e.g. set dirty) except by its owner. + * Pages marked as PG_reserved include: + * - Pages part of the kernel image (including vDSO) and similar (e.g. BIOS, + * initrd, HW tables) + * - Pages reserved or allocated early during boot (before the page allocator + * was initialized). This includes (depending on the architecture) the + * initial vmemmap, initial page tables, crashkernel, elfcorehdr, and much + * much more. Once (if ever) freed, PG_reserved is cleared and they will + * be given to the page allocator. + * - Pages falling into physical memory gaps - not IORESOURCE_SYSRAM. Trying + * to read/write these pages might end badly. Don't touch! + * - The zero page(s) + * - Pages allocated in the context of kexec/kdump (loaded kernel image, + * control pages, vmcoreinfo) + * - MMIO/DMA pages. Some architectures don't allow to ioremap pages that are + * not marked PG_reserved (as they might be in use by somebody else who does + * not respect the caching strategy). + * - MCA pages on ia64 + * - Pages holding CPU notes for POWER Firmware Assisted Dump + * - Device memory (e.g. PMEM, DAX, HMM) + * Some PG_reserved pages will be excluded from the hibernation image. + * PG_reserved does in general not hinder anybody from dumping or swapping + * and is no longer required for remap_pfn_range(). ioremap might require it. + * Consequently, PG_reserved for a page mapped into user space can indicate + * the zero page, the vDSO, MMIO pages or device memory. * * The PG_private bitflag is set on pagecache pages if they contain filesystem * specific data (which is normally at page->private). It can be used by @@ -33,31 +58,25 @@ * page_waitqueue(page) is a wait queue of all tasks waiting for the page * to become unlocked. * - * PG_uptodate tells whether the page's contents is valid. When a read - * completes, the page becomes uptodate, unless a disk I/O error happened. + * PG_swapbacked is set when a page uses swap as a backing storage. This are + * usually PageAnon or shmem pages but please note that even anonymous pages + * might lose their PG_swapbacked flag when they simply can be dropped (e.g. as + * a result of MADV_FREE). * * PG_referenced, PG_reclaim are used for page reclaim for anonymous and * file-backed pagecache (see mm/vmscan.c). * - * PG_error is set to indicate that an I/O error occurred on this page. - * * PG_arch_1 is an architecture specific page state bit. The generic code * guarantees that this bit is cleared for a page when it first is entered into * the page cache. * - * PG_highmem pages are not permanently mapped into the kernel virtual address - * space, they need to be kmapped separately for doing IO on the pages. The - * struct page (these bits with information) are always mapped into kernel - * address space... - * * PG_hwpoison indicates that a page got corrupted in hardware and contains * data with incorrect ECC bits that triggered a machine check. Accessing is * not safe since it may cause another machine check. Don't touch! */ /* - * Don't use the *_dontuse flags. Use the macros. Otherwise you'll break - * locked- and dirty-page accounting. + * Don't use the pageflags directly. Use the PageFoo macros. * * The page flags field is split into two parts, the main flags area * which extends from the low bits upwards, and the fields area which @@ -73,47 +92,65 @@ */ enum pageflags { PG_locked, /* Page is locked. Don't touch. */ - PG_error, + PG_writeback, /* Page is under writeback */ PG_referenced, PG_uptodate, PG_dirty, PG_lru, + PG_head, /* Must be in bit 6 */ + PG_waiters, /* Page has waiters, check its waitqueue. Must be bit #7 and in the same byte as "PG_locked" */ PG_active, - PG_slab, - PG_owner_priv_1, /* Owner use. If pagecache, fs may use*/ + PG_workingset, + PG_owner_priv_1, /* Owner use. If pagecache, fs may use */ + PG_owner_2, /* Owner use. If pagecache, fs may use */ PG_arch_1, PG_reserved, PG_private, /* If pagecache, has fs-private data */ PG_private_2, /* If pagecache, has fs aux data */ - PG_writeback, /* Page is under writeback */ -#ifdef CONFIG_PAGEFLAGS_EXTENDED - PG_head, /* A head page */ - PG_tail, /* A tail page */ -#else - PG_compound, /* A compound page */ -#endif - PG_swapcache, /* Swap page: swp_entry_t in private */ - PG_mappedtodisk, /* Has blocks allocated on-disk */ PG_reclaim, /* To be reclaimed asap */ PG_swapbacked, /* Page is backed by RAM/swap */ PG_unevictable, /* Page is "unevictable" */ + PG_dropbehind, /* drop pages on IO completion */ #ifdef CONFIG_MMU PG_mlocked, /* Page is vma mlocked */ #endif -#ifdef CONFIG_ARCH_USES_PG_UNCACHED - PG_uncached, /* Page has been mapped as uncached */ -#endif #ifdef CONFIG_MEMORY_FAILURE PG_hwpoison, /* hardware poisoned page. Don't touch */ #endif -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - PG_compound_lock, +#if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT) + PG_young, + PG_idle, +#endif +#ifdef CONFIG_ARCH_USES_PG_ARCH_2 + PG_arch_2, +#endif +#ifdef CONFIG_ARCH_USES_PG_ARCH_3 + PG_arch_3, #endif __NR_PAGEFLAGS, - /* Filesystems */ + PG_readahead = PG_reclaim, + + /* Anonymous memory (and shmem) */ + PG_swapcache = PG_owner_priv_1, /* Swap page: swp_entry_t in private */ + /* Some filesystems */ PG_checked = PG_owner_priv_1, + /* + * Depending on the way an anonymous folio can be mapped into a page + * table (e.g., single PMD/PUD/CONT of the head page vs. PTE-mapped + * THP), PG_anon_exclusive may be set only for the head page or for + * tail pages of an anonymous folio. For now, we only expect it to be + * set on tail pages for PTE-mapped THP. + */ + PG_anon_exclusive = PG_owner_2, + + /* + * Set if all buffer heads in the folio are mapped. + * Filesystems which do not use BHs can use it for their own purpose. + */ + PG_mappedtodisk = PG_owner_2, + /* Two page bits are conscripted by FS-Cache to maintain local caching * state. These bits are set on pages belonging to the netfs's inodes * when those inodes are being locally cached. @@ -121,173 +158,640 @@ enum pageflags { PG_fscache = PG_private_2, /* page backed by cache */ /* XEN */ + /* Pinned in Xen as a read-only pagetable page. */ PG_pinned = PG_owner_priv_1, + /* Pinned as part of domain save (see xen_mm_pin_all()). */ PG_savepinned = PG_dirty, + /* Has a grant mapping of another (foreign) domain's page. */ + PG_foreign = PG_owner_priv_1, + /* Remapped by swiotlb-xen. */ + PG_xen_remapped = PG_owner_priv_1, + +#ifdef CONFIG_MIGRATION + /* movable_ops page that is isolated for migration */ + PG_movable_ops_isolated = PG_reclaim, + /* this is a movable_ops page (for selected typed pages only) */ + PG_movable_ops = PG_uptodate, +#endif + + /* Only valid for buddy pages. Used to track pages that are reported */ + PG_reported = PG_uptodate, + +#ifdef CONFIG_MEMORY_HOTPLUG + /* For self-hosted memmap pages */ + PG_vmemmap_self_hosted = PG_owner_priv_1, +#endif + + /* + * Flags only valid for compound pages. Stored in first tail page's + * flags word. Cannot use the first 8 flags or any flag marked as + * PF_ANY. + */ - /* SLOB */ - PG_slob_free = PG_private, + /* At least one page in this folio has the hwpoison flag set */ + PG_has_hwpoisoned = PG_active, + PG_large_rmappable = PG_workingset, /* anon or file-backed */ + PG_partially_mapped = PG_reclaim, /* was identified to be partially mapped */ }; +#define PAGEFLAGS_MASK ((1UL << NR_PAGEFLAGS) - 1) + #ifndef __GENERATING_BOUNDS_H +#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP +DECLARE_STATIC_KEY_FALSE(hugetlb_optimize_vmemmap_key); + /* - * Macros to create function definitions for page flags + * Return the real head page struct iff the @page is a fake head page, otherwise + * return the @page itself. See Documentation/mm/vmemmap_dedup.rst. */ -#define TESTPAGEFLAG(uname, lname) \ -static inline int Page##uname(const struct page *page) \ - { return test_bit(PG_##lname, &page->flags); } +static __always_inline const struct page *page_fixed_fake_head(const struct page *page) +{ + if (!static_branch_unlikely(&hugetlb_optimize_vmemmap_key)) + return page; + + /* + * Only addresses aligned with PAGE_SIZE of struct page may be fake head + * struct page. The alignment check aims to avoid access the fields ( + * e.g. compound_head) of the @page[1]. It can avoid touch a (possibly) + * cold cacheline in some cases. + */ + if (IS_ALIGNED((unsigned long)page, PAGE_SIZE) && + test_bit(PG_head, &page->flags.f)) { + /* + * We can safely access the field of the @page[1] with PG_head + * because the @page is a compound page composed with at least + * two contiguous pages. + */ + unsigned long head = READ_ONCE(page[1].compound_head); + + if (likely(head & 1)) + return (const struct page *)(head - 1); + } + return page; +} + +static __always_inline bool page_count_writable(const struct page *page, int u) +{ + if (!static_branch_unlikely(&hugetlb_optimize_vmemmap_key)) + return true; + + /* + * The refcount check is ordered before the fake-head check to prevent + * the following race: + * CPU 1 (HVO) CPU 2 (speculative PFN walker) + * + * page_ref_freeze() + * synchronize_rcu() + * rcu_read_lock() + * page_is_fake_head() is false + * vmemmap_remap_pte() + * XXX: struct page[] becomes r/o + * + * page_ref_unfreeze() + * page_ref_count() is not zero + * + * atomic_add_unless(&page->_refcount) + * XXX: try to modify r/o struct page[] + * + * The refcount check also prevents modification attempts to other (r/o) + * tail pages that are not fake heads. + */ + if (atomic_read_acquire(&page->_refcount) == u) + return false; + + return page_fixed_fake_head(page) == page; +} +#else +static inline const struct page *page_fixed_fake_head(const struct page *page) +{ + return page; +} -#define SETPAGEFLAG(uname, lname) \ -static inline void SetPage##uname(struct page *page) \ - { set_bit(PG_##lname, &page->flags); } +static inline bool page_count_writable(const struct page *page, int u) +{ + return true; +} +#endif + +static __always_inline int page_is_fake_head(const struct page *page) +{ + return page_fixed_fake_head(page) != page; +} + +static __always_inline unsigned long _compound_head(const struct page *page) +{ + unsigned long head = READ_ONCE(page->compound_head); + + if (unlikely(head & 1)) + return head - 1; + return (unsigned long)page_fixed_fake_head(page); +} + +#define compound_head(page) ((typeof(page))_compound_head(page)) + +/** + * page_folio - Converts from page to folio. + * @p: The page. + * + * Every page is part of a folio. This function cannot be called on a + * NULL pointer. + * + * Context: No reference, nor lock is required on @page. If the caller + * does not hold a reference, this call may race with a folio split, so + * it should re-check the folio still contains this page after gaining + * a reference on the folio. + * Return: The folio which contains this page. + */ +#define page_folio(p) (_Generic((p), \ + const struct page *: (const struct folio *)_compound_head(p), \ + struct page *: (struct folio *)_compound_head(p))) -#define CLEARPAGEFLAG(uname, lname) \ -static inline void ClearPage##uname(struct page *page) \ - { clear_bit(PG_##lname, &page->flags); } +/** + * folio_page - Return a page from a folio. + * @folio: The folio. + * @n: The page number to return. + * + * @n is relative to the start of the folio. This function does not + * check that the page number lies within @folio; the caller is presumed + * to have a reference to the page. + */ +#define folio_page(folio, n) (&(folio)->page + (n)) -#define __SETPAGEFLAG(uname, lname) \ -static inline void __SetPage##uname(struct page *page) \ - { __set_bit(PG_##lname, &page->flags); } +static __always_inline int PageTail(const struct page *page) +{ + return READ_ONCE(page->compound_head) & 1 || page_is_fake_head(page); +} -#define __CLEARPAGEFLAG(uname, lname) \ -static inline void __ClearPage##uname(struct page *page) \ - { __clear_bit(PG_##lname, &page->flags); } +static __always_inline int PageCompound(const struct page *page) +{ + return test_bit(PG_head, &page->flags.f) || + READ_ONCE(page->compound_head) & 1; +} -#define TESTSETFLAG(uname, lname) \ -static inline int TestSetPage##uname(struct page *page) \ - { return test_and_set_bit(PG_##lname, &page->flags); } +#define PAGE_POISON_PATTERN -1l +static inline int PagePoisoned(const struct page *page) +{ + return READ_ONCE(page->flags.f) == PAGE_POISON_PATTERN; +} -#define TESTCLEARFLAG(uname, lname) \ -static inline int TestClearPage##uname(struct page *page) \ - { return test_and_clear_bit(PG_##lname, &page->flags); } +#ifdef CONFIG_DEBUG_VM +void page_init_poison(struct page *page, size_t size); +#else +static inline void page_init_poison(struct page *page, size_t size) +{ +} +#endif -#define __TESTCLEARFLAG(uname, lname) \ -static inline int __TestClearPage##uname(struct page *page) \ - { return __test_and_clear_bit(PG_##lname, &page->flags); } +static const unsigned long *const_folio_flags(const struct folio *folio, + unsigned n) +{ + const struct page *page = &folio->page; -#define PAGEFLAG(uname, lname) TESTPAGEFLAG(uname, lname) \ - SETPAGEFLAG(uname, lname) CLEARPAGEFLAG(uname, lname) + VM_BUG_ON_PGFLAGS(page->compound_head & 1, page); + VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags.f), page); + return &page[n].flags.f; +} -#define __PAGEFLAG(uname, lname) TESTPAGEFLAG(uname, lname) \ - __SETPAGEFLAG(uname, lname) __CLEARPAGEFLAG(uname, lname) +static unsigned long *folio_flags(struct folio *folio, unsigned n) +{ + struct page *page = &folio->page; -#define PAGEFLAG_FALSE(uname) \ -static inline int Page##uname(const struct page *page) \ - { return 0; } + VM_BUG_ON_PGFLAGS(page->compound_head & 1, page); + VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags.f), page); + return &page[n].flags.f; +} -#define TESTSCFLAG(uname, lname) \ - TESTSETFLAG(uname, lname) TESTCLEARFLAG(uname, lname) +/* + * Page flags policies wrt compound pages + * + * PF_POISONED_CHECK + * check if this struct page poisoned/uninitialized + * + * PF_ANY: + * the page flag is relevant for small, head and tail pages. + * + * PF_HEAD: + * for compound page all operations related to the page flag applied to + * head page. + * + * PF_NO_TAIL: + * modifications of the page flag must be done on small or head pages, + * checks can be done on tail pages too. + * + * PF_NO_COMPOUND: + * the page flag is not relevant for compound pages. + * + * PF_SECOND: + * the page flag is stored in the first tail page. + */ +#define PF_POISONED_CHECK(page) ({ \ + VM_BUG_ON_PGFLAGS(PagePoisoned(page), page); \ + page; }) +#define PF_ANY(page, enforce) PF_POISONED_CHECK(page) +#define PF_HEAD(page, enforce) PF_POISONED_CHECK(compound_head(page)) +#define PF_NO_TAIL(page, enforce) ({ \ + VM_BUG_ON_PGFLAGS(enforce && PageTail(page), page); \ + PF_POISONED_CHECK(compound_head(page)); }) +#define PF_NO_COMPOUND(page, enforce) ({ \ + VM_BUG_ON_PGFLAGS(enforce && PageCompound(page), page); \ + PF_POISONED_CHECK(page); }) +#define PF_SECOND(page, enforce) ({ \ + VM_BUG_ON_PGFLAGS(!PageHead(page), page); \ + PF_POISONED_CHECK(&page[1]); }) + +/* Which page is the flag stored in */ +#define FOLIO_PF_ANY 0 +#define FOLIO_PF_HEAD 0 +#define FOLIO_PF_NO_TAIL 0 +#define FOLIO_PF_NO_COMPOUND 0 +#define FOLIO_PF_SECOND 1 + +#define FOLIO_HEAD_PAGE 0 +#define FOLIO_SECOND_PAGE 1 -#define SETPAGEFLAG_NOOP(uname) \ +/* + * Macros to create function definitions for page flags + */ +#define FOLIO_TEST_FLAG(name, page) \ +static __always_inline bool folio_test_##name(const struct folio *folio) \ +{ return test_bit(PG_##name, const_folio_flags(folio, page)); } + +#define FOLIO_SET_FLAG(name, page) \ +static __always_inline void folio_set_##name(struct folio *folio) \ +{ set_bit(PG_##name, folio_flags(folio, page)); } + +#define FOLIO_CLEAR_FLAG(name, page) \ +static __always_inline void folio_clear_##name(struct folio *folio) \ +{ clear_bit(PG_##name, folio_flags(folio, page)); } + +#define __FOLIO_SET_FLAG(name, page) \ +static __always_inline void __folio_set_##name(struct folio *folio) \ +{ __set_bit(PG_##name, folio_flags(folio, page)); } + +#define __FOLIO_CLEAR_FLAG(name, page) \ +static __always_inline void __folio_clear_##name(struct folio *folio) \ +{ __clear_bit(PG_##name, folio_flags(folio, page)); } + +#define FOLIO_TEST_SET_FLAG(name, page) \ +static __always_inline bool folio_test_set_##name(struct folio *folio) \ +{ return test_and_set_bit(PG_##name, folio_flags(folio, page)); } + +#define FOLIO_TEST_CLEAR_FLAG(name, page) \ +static __always_inline bool folio_test_clear_##name(struct folio *folio) \ +{ return test_and_clear_bit(PG_##name, folio_flags(folio, page)); } + +#define FOLIO_FLAG(name, page) \ +FOLIO_TEST_FLAG(name, page) \ +FOLIO_SET_FLAG(name, page) \ +FOLIO_CLEAR_FLAG(name, page) + +#define TESTPAGEFLAG(uname, lname, policy) \ +FOLIO_TEST_FLAG(lname, FOLIO_##policy) \ +static __always_inline int Page##uname(const struct page *page) \ +{ return test_bit(PG_##lname, &policy(page, 0)->flags.f); } + +#define SETPAGEFLAG(uname, lname, policy) \ +FOLIO_SET_FLAG(lname, FOLIO_##policy) \ +static __always_inline void SetPage##uname(struct page *page) \ +{ set_bit(PG_##lname, &policy(page, 1)->flags.f); } + +#define CLEARPAGEFLAG(uname, lname, policy) \ +FOLIO_CLEAR_FLAG(lname, FOLIO_##policy) \ +static __always_inline void ClearPage##uname(struct page *page) \ +{ clear_bit(PG_##lname, &policy(page, 1)->flags.f); } + +#define __SETPAGEFLAG(uname, lname, policy) \ +__FOLIO_SET_FLAG(lname, FOLIO_##policy) \ +static __always_inline void __SetPage##uname(struct page *page) \ +{ __set_bit(PG_##lname, &policy(page, 1)->flags.f); } + +#define __CLEARPAGEFLAG(uname, lname, policy) \ +__FOLIO_CLEAR_FLAG(lname, FOLIO_##policy) \ +static __always_inline void __ClearPage##uname(struct page *page) \ +{ __clear_bit(PG_##lname, &policy(page, 1)->flags.f); } + +#define TESTSETFLAG(uname, lname, policy) \ +FOLIO_TEST_SET_FLAG(lname, FOLIO_##policy) \ +static __always_inline int TestSetPage##uname(struct page *page) \ +{ return test_and_set_bit(PG_##lname, &policy(page, 1)->flags.f); } + +#define TESTCLEARFLAG(uname, lname, policy) \ +FOLIO_TEST_CLEAR_FLAG(lname, FOLIO_##policy) \ +static __always_inline int TestClearPage##uname(struct page *page) \ +{ return test_and_clear_bit(PG_##lname, &policy(page, 1)->flags.f); } + +#define PAGEFLAG(uname, lname, policy) \ + TESTPAGEFLAG(uname, lname, policy) \ + SETPAGEFLAG(uname, lname, policy) \ + CLEARPAGEFLAG(uname, lname, policy) + +#define __PAGEFLAG(uname, lname, policy) \ + TESTPAGEFLAG(uname, lname, policy) \ + __SETPAGEFLAG(uname, lname, policy) \ + __CLEARPAGEFLAG(uname, lname, policy) + +#define TESTSCFLAG(uname, lname, policy) \ + TESTSETFLAG(uname, lname, policy) \ + TESTCLEARFLAG(uname, lname, policy) + +#define FOLIO_TEST_FLAG_FALSE(name) \ +static inline bool folio_test_##name(const struct folio *folio) \ +{ return false; } +#define FOLIO_SET_FLAG_NOOP(name) \ +static inline void folio_set_##name(struct folio *folio) { } +#define FOLIO_CLEAR_FLAG_NOOP(name) \ +static inline void folio_clear_##name(struct folio *folio) { } +#define __FOLIO_SET_FLAG_NOOP(name) \ +static inline void __folio_set_##name(struct folio *folio) { } +#define __FOLIO_CLEAR_FLAG_NOOP(name) \ +static inline void __folio_clear_##name(struct folio *folio) { } +#define FOLIO_TEST_SET_FLAG_FALSE(name) \ +static inline bool folio_test_set_##name(struct folio *folio) \ +{ return false; } +#define FOLIO_TEST_CLEAR_FLAG_FALSE(name) \ +static inline bool folio_test_clear_##name(struct folio *folio) \ +{ return false; } + +#define FOLIO_FLAG_FALSE(name) \ +FOLIO_TEST_FLAG_FALSE(name) \ +FOLIO_SET_FLAG_NOOP(name) \ +FOLIO_CLEAR_FLAG_NOOP(name) + +#define TESTPAGEFLAG_FALSE(uname, lname) \ +FOLIO_TEST_FLAG_FALSE(lname) \ +static inline int Page##uname(const struct page *page) { return 0; } + +#define SETPAGEFLAG_NOOP(uname, lname) \ +FOLIO_SET_FLAG_NOOP(lname) \ static inline void SetPage##uname(struct page *page) { } -#define CLEARPAGEFLAG_NOOP(uname) \ +#define CLEARPAGEFLAG_NOOP(uname, lname) \ +FOLIO_CLEAR_FLAG_NOOP(lname) \ static inline void ClearPage##uname(struct page *page) { } -#define __CLEARPAGEFLAG_NOOP(uname) \ +#define __CLEARPAGEFLAG_NOOP(uname, lname) \ +__FOLIO_CLEAR_FLAG_NOOP(lname) \ static inline void __ClearPage##uname(struct page *page) { } -#define TESTCLEARFLAG_FALSE(uname) \ -static inline int TestClearPage##uname(struct page *page) { return 0; } - -#define __TESTCLEARFLAG_FALSE(uname) \ -static inline int __TestClearPage##uname(struct page *page) { return 0; } - -struct page; /* forward declaration */ +#define TESTSETFLAG_FALSE(uname, lname) \ +FOLIO_TEST_SET_FLAG_FALSE(lname) \ +static inline int TestSetPage##uname(struct page *page) { return 0; } -TESTPAGEFLAG(Locked, locked) -PAGEFLAG(Error, error) TESTCLEARFLAG(Error, error) -PAGEFLAG(Referenced, referenced) TESTCLEARFLAG(Referenced, referenced) -PAGEFLAG(Dirty, dirty) TESTSCFLAG(Dirty, dirty) __CLEARPAGEFLAG(Dirty, dirty) -PAGEFLAG(LRU, lru) __CLEARPAGEFLAG(LRU, lru) -PAGEFLAG(Active, active) __CLEARPAGEFLAG(Active, active) - TESTCLEARFLAG(Active, active) -__PAGEFLAG(Slab, slab) -PAGEFLAG(Checked, checked) /* Used by some filesystems */ -PAGEFLAG(Pinned, pinned) TESTSCFLAG(Pinned, pinned) /* Xen */ -PAGEFLAG(SavePinned, savepinned); /* Xen */ -PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved) -PAGEFLAG(SwapBacked, swapbacked) __CLEARPAGEFLAG(SwapBacked, swapbacked) +#define TESTCLEARFLAG_FALSE(uname, lname) \ +FOLIO_TEST_CLEAR_FLAG_FALSE(lname) \ +static inline int TestClearPage##uname(struct page *page) { return 0; } -__PAGEFLAG(SlobFree, slob_free) +#define PAGEFLAG_FALSE(uname, lname) TESTPAGEFLAG_FALSE(uname, lname) \ + SETPAGEFLAG_NOOP(uname, lname) CLEARPAGEFLAG_NOOP(uname, lname) + +#define TESTSCFLAG_FALSE(uname, lname) \ + TESTSETFLAG_FALSE(uname, lname) TESTCLEARFLAG_FALSE(uname, lname) + +__PAGEFLAG(Locked, locked, PF_NO_TAIL) +FOLIO_FLAG(waiters, FOLIO_HEAD_PAGE) +FOLIO_FLAG(referenced, FOLIO_HEAD_PAGE) + FOLIO_TEST_CLEAR_FLAG(referenced, FOLIO_HEAD_PAGE) + __FOLIO_SET_FLAG(referenced, FOLIO_HEAD_PAGE) +PAGEFLAG(Dirty, dirty, PF_HEAD) TESTSCFLAG(Dirty, dirty, PF_HEAD) + __CLEARPAGEFLAG(Dirty, dirty, PF_HEAD) +PAGEFLAG(LRU, lru, PF_HEAD) __CLEARPAGEFLAG(LRU, lru, PF_HEAD) + TESTCLEARFLAG(LRU, lru, PF_HEAD) +FOLIO_FLAG(active, FOLIO_HEAD_PAGE) + __FOLIO_CLEAR_FLAG(active, FOLIO_HEAD_PAGE) + FOLIO_TEST_CLEAR_FLAG(active, FOLIO_HEAD_PAGE) +PAGEFLAG(Workingset, workingset, PF_HEAD) + TESTCLEARFLAG(Workingset, workingset, PF_HEAD) +PAGEFLAG(Checked, checked, PF_NO_COMPOUND) /* Used by some filesystems */ + +/* Xen */ +PAGEFLAG(Pinned, pinned, PF_NO_COMPOUND) + TESTSCFLAG(Pinned, pinned, PF_NO_COMPOUND) +PAGEFLAG(SavePinned, savepinned, PF_NO_COMPOUND); +PAGEFLAG(Foreign, foreign, PF_NO_COMPOUND); +PAGEFLAG(XenRemapped, xen_remapped, PF_NO_COMPOUND) + TESTCLEARFLAG(XenRemapped, xen_remapped, PF_NO_COMPOUND) + +PAGEFLAG(Reserved, reserved, PF_NO_COMPOUND) + __CLEARPAGEFLAG(Reserved, reserved, PF_NO_COMPOUND) + __SETPAGEFLAG(Reserved, reserved, PF_NO_COMPOUND) +FOLIO_FLAG(swapbacked, FOLIO_HEAD_PAGE) + __FOLIO_CLEAR_FLAG(swapbacked, FOLIO_HEAD_PAGE) + __FOLIO_SET_FLAG(swapbacked, FOLIO_HEAD_PAGE) /* * Private page markings that may be used by the filesystem that owns the page * for its own purposes. - * - PG_private and PG_private_2 cause releasepage() and co to be invoked + * - PG_private and PG_private_2 cause release_folio() and co to be invoked */ -PAGEFLAG(Private, private) __SETPAGEFLAG(Private, private) - __CLEARPAGEFLAG(Private, private) -PAGEFLAG(Private2, private_2) TESTSCFLAG(Private2, private_2) -PAGEFLAG(OwnerPriv1, owner_priv_1) TESTCLEARFLAG(OwnerPriv1, owner_priv_1) +PAGEFLAG(Private, private, PF_ANY) +FOLIO_FLAG(private_2, FOLIO_HEAD_PAGE) + +/* owner_2 can be set on tail pages for anon memory */ +FOLIO_FLAG(owner_2, FOLIO_HEAD_PAGE) /* * Only test-and-set exist for PG_writeback. The unconditional operators are * risky: they bypass page accounting. */ -TESTPAGEFLAG(Writeback, writeback) TESTSCFLAG(Writeback, writeback) -PAGEFLAG(MappedToDisk, mappedtodisk) +TESTPAGEFLAG(Writeback, writeback, PF_NO_TAIL) + TESTSCFLAG(Writeback, writeback, PF_NO_TAIL) +FOLIO_FLAG(mappedtodisk, FOLIO_HEAD_PAGE) -/* PG_readahead is only used for file reads; PG_reclaim is only for writes */ -PAGEFLAG(Reclaim, reclaim) TESTCLEARFLAG(Reclaim, reclaim) -PAGEFLAG(Readahead, reclaim) /* Reminder to do async read-ahead */ +/* PG_readahead is only used for reads; PG_reclaim is only for writes */ +PAGEFLAG(Reclaim, reclaim, PF_NO_TAIL) + TESTCLEARFLAG(Reclaim, reclaim, PF_NO_TAIL) +FOLIO_FLAG(readahead, FOLIO_HEAD_PAGE) + FOLIO_TEST_CLEAR_FLAG(readahead, FOLIO_HEAD_PAGE) + +FOLIO_FLAG(dropbehind, FOLIO_HEAD_PAGE) + FOLIO_TEST_CLEAR_FLAG(dropbehind, FOLIO_HEAD_PAGE) + __FOLIO_SET_FLAG(dropbehind, FOLIO_HEAD_PAGE) #ifdef CONFIG_HIGHMEM /* * Must use a macro here due to header dependency issues. page_zone() is not * available at this point. */ -#define PageHighMem(__p) is_highmem(page_zone(__p)) +#define PageHighMem(__p) is_highmem_idx(page_zonenum(__p)) +#define folio_test_highmem(__f) is_highmem_idx(folio_zonenum(__f)) #else -PAGEFLAG_FALSE(HighMem) +PAGEFLAG_FALSE(HighMem, highmem) #endif +#define PhysHighMem(__p) (PageHighMem(phys_to_page(__p))) -#ifdef CONFIG_SWAP -PAGEFLAG(SwapCache, swapcache) +/* Does kmap_local_folio() only allow access to one page of the folio? */ +#ifdef CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP +#define folio_test_partial_kmap(f) true #else -PAGEFLAG_FALSE(SwapCache) - SETPAGEFLAG_NOOP(SwapCache) CLEARPAGEFLAG_NOOP(SwapCache) +#define folio_test_partial_kmap(f) folio_test_highmem(f) #endif -PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable) - TESTCLEARFLAG(Unevictable, unevictable) +#ifdef CONFIG_SWAP +static __always_inline bool folio_test_swapcache(const struct folio *folio) +{ + return folio_test_swapbacked(folio) && + test_bit(PG_swapcache, const_folio_flags(folio, 0)); +} -#ifdef CONFIG_MMU -PAGEFLAG(Mlocked, mlocked) __CLEARPAGEFLAG(Mlocked, mlocked) - TESTSCFLAG(Mlocked, mlocked) __TESTCLEARFLAG(Mlocked, mlocked) +FOLIO_SET_FLAG(swapcache, FOLIO_HEAD_PAGE) +FOLIO_CLEAR_FLAG(swapcache, FOLIO_HEAD_PAGE) #else -PAGEFLAG_FALSE(Mlocked) SETPAGEFLAG_NOOP(Mlocked) - TESTCLEARFLAG_FALSE(Mlocked) __TESTCLEARFLAG_FALSE(Mlocked) +FOLIO_FLAG_FALSE(swapcache) #endif -#ifdef CONFIG_ARCH_USES_PG_UNCACHED -PAGEFLAG(Uncached, uncached) +FOLIO_FLAG(unevictable, FOLIO_HEAD_PAGE) + __FOLIO_CLEAR_FLAG(unevictable, FOLIO_HEAD_PAGE) + FOLIO_TEST_CLEAR_FLAG(unevictable, FOLIO_HEAD_PAGE) + +#ifdef CONFIG_MMU +FOLIO_FLAG(mlocked, FOLIO_HEAD_PAGE) + __FOLIO_CLEAR_FLAG(mlocked, FOLIO_HEAD_PAGE) + FOLIO_TEST_CLEAR_FLAG(mlocked, FOLIO_HEAD_PAGE) + FOLIO_TEST_SET_FLAG(mlocked, FOLIO_HEAD_PAGE) #else -PAGEFLAG_FALSE(Uncached) +FOLIO_FLAG_FALSE(mlocked) + __FOLIO_CLEAR_FLAG_NOOP(mlocked) + FOLIO_TEST_CLEAR_FLAG_FALSE(mlocked) + FOLIO_TEST_SET_FLAG_FALSE(mlocked) #endif #ifdef CONFIG_MEMORY_FAILURE -PAGEFLAG(HWPoison, hwpoison) -TESTSCFLAG(HWPoison, hwpoison) +PAGEFLAG(HWPoison, hwpoison, PF_ANY) +TESTSCFLAG(HWPoison, hwpoison, PF_ANY) #define __PG_HWPOISON (1UL << PG_hwpoison) #else -PAGEFLAG_FALSE(HWPoison) +PAGEFLAG_FALSE(HWPoison, hwpoison) #define __PG_HWPOISON 0 #endif -u64 stable_page_flags(struct page *page); +#ifdef CONFIG_PAGE_IDLE_FLAG +#ifdef CONFIG_64BIT +FOLIO_TEST_FLAG(young, FOLIO_HEAD_PAGE) +FOLIO_SET_FLAG(young, FOLIO_HEAD_PAGE) +FOLIO_TEST_CLEAR_FLAG(young, FOLIO_HEAD_PAGE) +FOLIO_FLAG(idle, FOLIO_HEAD_PAGE) +#endif +/* See page_idle.h for !64BIT workaround */ +#else /* !CONFIG_PAGE_IDLE_FLAG */ +FOLIO_FLAG_FALSE(young) +FOLIO_TEST_CLEAR_FLAG_FALSE(young) +FOLIO_FLAG_FALSE(idle) +#endif + +/* + * PageReported() is used to track reported free pages within the Buddy + * allocator. We can use the non-atomic version of the test and set + * operations as both should be shielded with the zone lock to prevent + * any possible races on the setting or clearing of the bit. + */ +__PAGEFLAG(Reported, reported, PF_NO_COMPOUND) -static inline int PageUptodate(struct page *page) +#ifdef CONFIG_MEMORY_HOTPLUG +PAGEFLAG(VmemmapSelfHosted, vmemmap_self_hosted, PF_ANY) +#else +PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted) +#endif + +/* + * On an anonymous folio mapped into a user virtual memory area, + * folio->mapping points to its anon_vma, not to a struct address_space; + * with the FOLIO_MAPPING_ANON bit set to distinguish it. See rmap.h. + * + * On an anonymous folio in a VM_MERGEABLE area, if CONFIG_KSM is enabled, + * the FOLIO_MAPPING_ANON_KSM bit may be set along with the FOLIO_MAPPING_ANON + * bit; and then folio->mapping points, not to an anon_vma, but to a private + * structure which KSM associates with that merged folio. See ksm.h. + * + * Please note that, confusingly, "folio_mapping" refers to the inode + * address_space which maps the folio from disk; whereas "folio_mapped" + * refers to user virtual address space into which the folio is mapped. + * + * For slab pages, since slab reuses the bits in struct page to store its + * internal states, the folio->mapping does not exist as such, nor do + * these flags below. So in order to avoid testing non-existent bits, + * please make sure that folio_test_slab(folio) actually evaluates to + * false before calling the following functions (e.g., folio_test_anon). + * See mm/slab.h. + */ +#define FOLIO_MAPPING_ANON 0x1 +#define FOLIO_MAPPING_ANON_KSM 0x2 +#define FOLIO_MAPPING_KSM (FOLIO_MAPPING_ANON | FOLIO_MAPPING_ANON_KSM) +#define FOLIO_MAPPING_FLAGS (FOLIO_MAPPING_ANON | FOLIO_MAPPING_ANON_KSM) + +static __always_inline bool folio_test_anon(const struct folio *folio) { - int ret = test_bit(PG_uptodate, &(page)->flags); + return ((unsigned long)folio->mapping & FOLIO_MAPPING_ANON) != 0; +} + +static __always_inline bool PageAnonNotKsm(const struct page *page) +{ + unsigned long flags = (unsigned long)page_folio(page)->mapping; + return (flags & FOLIO_MAPPING_FLAGS) == FOLIO_MAPPING_ANON; +} + +static __always_inline bool PageAnon(const struct page *page) +{ + return folio_test_anon(page_folio(page)); +} +#ifdef CONFIG_KSM +/* + * A KSM page is one of those write-protected "shared pages" or "merged pages" + * which KSM maps into multiple mms, wherever identical anonymous page content + * is found in VM_MERGEABLE vmas. It's a PageAnon page, pointing not to any + * anon_vma, but to that page's node of the stable tree. + */ +static __always_inline bool folio_test_ksm(const struct folio *folio) +{ + return ((unsigned long)folio->mapping & FOLIO_MAPPING_FLAGS) == + FOLIO_MAPPING_KSM; +} +#else +FOLIO_TEST_FLAG_FALSE(ksm) +#endif + +u64 stable_page_flags(const struct page *page); + +/** + * folio_xor_flags_has_waiters - Change some folio flags. + * @folio: The folio. + * @mask: Bits set in this word will be changed. + * + * This must only be used for flags which are changed with the folio + * lock held. For example, it is unsafe to use for PG_dirty as that + * can be set without the folio lock held. It can also only be used + * on flags which are in the range 0-6 as some of the implementations + * only affect those bits. + * + * Return: Whether there are tasks waiting on the folio. + */ +static inline bool folio_xor_flags_has_waiters(struct folio *folio, + unsigned long mask) +{ + return xor_unlock_is_negative_byte(mask, folio_flags(folio, 0)); +} + +/** + * folio_test_uptodate - Is this folio up to date? + * @folio: The folio. + * + * The uptodate flag is set on a folio when every byte in the folio is + * at least as new as the corresponding bytes on storage. Anonymous + * and CoW folios are always uptodate. If the folio is not uptodate, + * some of the bytes in it may be; see the is_partially_uptodate() + * address_space operation. + */ +static inline bool folio_test_uptodate(const struct folio *folio) +{ + bool ret = test_bit(PG_uptodate, const_folio_flags(folio, 0)); /* - * Must ensure that the data we read out of the page is loaded - * _after_ we've loaded page->flags to check for PageUptodate. - * We can skip the barrier if the page is not uptodate, because + * Must ensure that the data we read out of the folio is loaded + * _after_ we've loaded folio->flags to check the uptodate bit. + * We can skip the barrier if the folio is not uptodate, because * we wouldn't be reading anything from it. * - * See SetPageUptodate() for the other side of the story. + * See folio_mark_uptodate() for the other side of the story. */ if (ret) smp_rmb(); @@ -295,236 +799,445 @@ static inline int PageUptodate(struct page *page) return ret; } -static inline void __SetPageUptodate(struct page *page) +static inline bool PageUptodate(const struct page *page) +{ + return folio_test_uptodate(page_folio(page)); +} + +static __always_inline void __folio_mark_uptodate(struct folio *folio) { smp_wmb(); - __set_bit(PG_uptodate, &(page)->flags); + __set_bit(PG_uptodate, folio_flags(folio, 0)); } -static inline void SetPageUptodate(struct page *page) +static __always_inline void folio_mark_uptodate(struct folio *folio) { /* * Memory barrier must be issued before setting the PG_uptodate bit, - * so that all previous stores issued in order to bring the page - * uptodate are actually visible before PageUptodate becomes true. + * so that all previous stores issued in order to bring the folio + * uptodate are actually visible before folio_test_uptodate becomes true. */ smp_wmb(); - set_bit(PG_uptodate, &(page)->flags); + set_bit(PG_uptodate, folio_flags(folio, 0)); } -CLEARPAGEFLAG(Uptodate, uptodate) +static __always_inline void __SetPageUptodate(struct page *page) +{ + __folio_mark_uptodate((struct folio *)page); +} -extern void cancel_dirty_page(struct page *page, unsigned int account_size); +static __always_inline void SetPageUptodate(struct page *page) +{ + folio_mark_uptodate((struct folio *)page); +} + +CLEARPAGEFLAG(Uptodate, uptodate, PF_NO_TAIL) + +void __folio_start_writeback(struct folio *folio, bool keep_write); +void set_page_writeback(struct page *page); -int test_clear_page_writeback(struct page *page); -int test_set_page_writeback(struct page *page); +#define folio_start_writeback(folio) \ + __folio_start_writeback(folio, false) -static inline void set_page_writeback(struct page *page) +static __always_inline bool folio_test_head(const struct folio *folio) { - test_set_page_writeback(page); + return test_bit(PG_head, const_folio_flags(folio, FOLIO_PF_ANY)); } -#ifdef CONFIG_PAGEFLAGS_EXTENDED -/* - * System with lots of page flags available. This allows separate - * flags for PageHead() and PageTail() checks of compound pages so that bit - * tests can be used in performance sensitive paths. PageCompound is - * generally not used in hot code paths. +static __always_inline int PageHead(const struct page *page) +{ + PF_POISONED_CHECK(page); + return test_bit(PG_head, &page->flags.f) && !page_is_fake_head(page); +} + +__SETPAGEFLAG(Head, head, PF_ANY) +__CLEARPAGEFLAG(Head, head, PF_ANY) +CLEARPAGEFLAG(Head, head, PF_ANY) + +/** + * folio_test_large() - Does this folio contain more than one page? + * @folio: The folio to test. + * + * Return: True if the folio is larger than one page. */ -__PAGEFLAG(Head, head) CLEARPAGEFLAG(Head, head) -__PAGEFLAG(Tail, tail) +static inline bool folio_test_large(const struct folio *folio) +{ + return folio_test_head(folio); +} -static inline int PageCompound(struct page *page) +static __always_inline void set_compound_head(struct page *page, struct page *head) { - return page->flags & ((1L << PG_head) | (1L << PG_tail)); + WRITE_ONCE(page->compound_head, (unsigned long)head + 1); +} +static __always_inline void clear_compound_head(struct page *page) +{ + WRITE_ONCE(page->compound_head, 0); } + #ifdef CONFIG_TRANSPARENT_HUGEPAGE static inline void ClearPageCompound(struct page *page) { BUG_ON(!PageHead(page)); ClearPageHead(page); } -#endif +FOLIO_FLAG(large_rmappable, FOLIO_SECOND_PAGE) +FOLIO_FLAG(partially_mapped, FOLIO_SECOND_PAGE) #else +FOLIO_FLAG_FALSE(large_rmappable) +FOLIO_FLAG_FALSE(partially_mapped) +#endif + +#define PG_head_mask ((1UL << PG_head)) + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE /* - * Reduce page flag use as much as possible by overlapping - * compound page flags with the flags used for page cache pages. Possible - * because PageCompound is always set for compound pages and not for - * pages on the LRU and/or pagecache. + * PageTransCompound returns true for both transparent huge pages + * and hugetlbfs pages, so it should only be called when it's known + * that hugetlbfs pages aren't involved. */ -TESTPAGEFLAG(Compound, compound) -__SETPAGEFLAG(Head, compound) __CLEARPAGEFLAG(Head, compound) +static inline int PageTransCompound(const struct page *page) +{ + return PageCompound(page); +} +#else +TESTPAGEFLAG_FALSE(TransCompound, transcompound) +#endif +#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_TRANSPARENT_HUGEPAGE) /* - * PG_reclaim is used in combination with PG_compound to mark the - * head and tail of a compound page. This saves one page flag - * but makes it impossible to use compound pages for the page cache. - * The PG_reclaim bit would have to be used for reclaim or readahead - * if compound pages enter the page cache. + * PageHasHWPoisoned indicates that at least one subpage is hwpoisoned in the + * compound page. * - * PG_compound & PG_reclaim => Tail page - * PG_compound & ~PG_reclaim => Head page + * This flag is set by hwpoison handler. Cleared by THP split or free page. */ -#define PG_head_mask ((1L << PG_compound)) -#define PG_head_tail_mask ((1L << PG_compound) | (1L << PG_reclaim)) +FOLIO_FLAG(has_hwpoisoned, FOLIO_SECOND_PAGE) +#else +FOLIO_FLAG_FALSE(has_hwpoisoned) +#endif -static inline int PageHead(struct page *page) +/* + * For pages that do not use mapcount, page_type may be used. + * The low 24 bits of pagetype may be used for your own purposes, as long + * as you are careful to not affect the top 8 bits. The low bits of + * pagetype will be overwritten when you clear the page_type from the page. + */ +enum pagetype { + /* 0x00-0x7f are positive numbers, ie mapcount */ + /* Reserve 0x80-0xef for mapcount overflow. */ + PGTY_buddy = 0xf0, + PGTY_offline = 0xf1, + PGTY_table = 0xf2, + PGTY_guard = 0xf3, + PGTY_hugetlb = 0xf4, + PGTY_slab = 0xf5, + PGTY_zsmalloc = 0xf6, + PGTY_unaccepted = 0xf7, + PGTY_large_kmalloc = 0xf8, + + PGTY_mapcount_underflow = 0xff +}; + +static inline bool page_type_has_type(int page_type) { - return ((page->flags & PG_head_tail_mask) == PG_head_mask); + return page_type < (PGTY_mapcount_underflow << 24); } -static inline int PageTail(struct page *page) +/* This takes a mapcount which is one more than page->_mapcount */ +static inline bool page_mapcount_is_type(unsigned int mapcount) { - return ((page->flags & PG_head_tail_mask) == PG_head_tail_mask); + return page_type_has_type(mapcount - 1); } -static inline void __SetPageTail(struct page *page) +static inline bool page_has_type(const struct page *page) { - page->flags |= PG_head_tail_mask; + return page_type_has_type(data_race(page->page_type)); } -static inline void __ClearPageTail(struct page *page) -{ - page->flags &= ~PG_head_tail_mask; +#define FOLIO_TYPE_OPS(lname, fname) \ +static __always_inline bool folio_test_##fname(const struct folio *folio) \ +{ \ + return data_race(folio->page.page_type >> 24) == PGTY_##lname; \ +} \ +static __always_inline void __folio_set_##fname(struct folio *folio) \ +{ \ + if (folio_test_##fname(folio)) \ + return; \ + VM_BUG_ON_FOLIO(data_race(folio->page.page_type) != UINT_MAX, \ + folio); \ + folio->page.page_type = (unsigned int)PGTY_##lname << 24; \ +} \ +static __always_inline void __folio_clear_##fname(struct folio *folio) \ +{ \ + if (folio->page.page_type == UINT_MAX) \ + return; \ + VM_BUG_ON_FOLIO(!folio_test_##fname(folio), folio); \ + folio->page.page_type = UINT_MAX; \ } -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -static inline void ClearPageCompound(struct page *page) -{ - BUG_ON((page->flags & PG_head_tail_mask) != (1 << PG_compound)); - clear_bit(PG_compound, &page->flags); +#define PAGE_TYPE_OPS(uname, lname, fname) \ +FOLIO_TYPE_OPS(lname, fname) \ +static __always_inline int Page##uname(const struct page *page) \ +{ \ + return data_race(page->page_type >> 24) == PGTY_##lname; \ +} \ +static __always_inline void __SetPage##uname(struct page *page) \ +{ \ + if (Page##uname(page)) \ + return; \ + VM_BUG_ON_PAGE(data_race(page->page_type) != UINT_MAX, page); \ + page->page_type = (unsigned int)PGTY_##lname << 24; \ +} \ +static __always_inline void __ClearPage##uname(struct page *page) \ +{ \ + if (page->page_type == UINT_MAX) \ + return; \ + VM_BUG_ON_PAGE(!Page##uname(page), page); \ + page->page_type = UINT_MAX; \ } -#endif -#endif /* !PAGEFLAGS_EXTENDED */ +/* + * PageBuddy() indicates that the page is free and in the buddy system + * (see mm/page_alloc.c). + */ +PAGE_TYPE_OPS(Buddy, buddy, buddy) -#ifdef CONFIG_TRANSPARENT_HUGEPAGE /* - * PageHuge() only returns true for hugetlbfs pages, but not for - * normal or transparent huge pages. + * PageOffline() indicates that the page is logically offline although the + * containing section is online. (e.g. inflated in a balloon driver or + * not onlined when onlining the section). + * The content of these pages is effectively stale. Such pages should not + * be touched (read/write/dump/save) except by their owner. + * + * When a memory block gets onlined, all pages are initialized with a + * refcount of 1 and PageOffline(). generic_online_page() will + * take care of clearing PageOffline(). + * + * If a driver wants to allow to offline unmovable PageOffline() pages without + * putting them back to the buddy, it can do so via the memory notifier by + * decrementing the reference count in MEM_GOING_OFFLINE and incrementing the + * reference count in MEM_CANCEL_OFFLINE. When offlining, the PageOffline() + * pages (now with a reference count of zero) are treated like free (unmanaged) + * pages, allowing the containing memory block to get offlined. A driver that + * relies on this feature is aware that re-onlining the memory block will + * require not giving them to the buddy via generic_online_page(). + * + * Memory offlining code will not adjust the managed page count for any + * PageOffline() pages, treating them like they were never exposed to the + * buddy using generic_online_page(). * - * PageTransHuge() returns true for both transparent huge and - * hugetlbfs pages, but not normal pages. PageTransHuge() can only be - * called only in the core VM paths where hugetlbfs pages can't exist. + * There are drivers that mark a page PageOffline() and expect there won't be + * any further access to page content. PFN walkers that read content of random + * pages should check PageOffline() and synchronize with such drivers using + * page_offline_freeze()/page_offline_thaw(). */ -static inline int PageTransHuge(struct page *page) -{ - VM_BUG_ON(PageTail(page)); - return PageHead(page); -} +PAGE_TYPE_OPS(Offline, offline, offline) + +extern void page_offline_freeze(void); +extern void page_offline_thaw(void); +extern void page_offline_begin(void); +extern void page_offline_end(void); /* - * PageTransCompound returns true for both transparent huge pages - * and hugetlbfs pages, so it should only be called when it's known - * that hugetlbfs pages aren't involved. + * Marks pages in use as page tables. */ -static inline int PageTransCompound(struct page *page) -{ - return PageCompound(page); -} +PAGE_TYPE_OPS(Table, table, pgtable) /* - * PageTransTail returns true for both transparent huge pages - * and hugetlbfs pages, so it should only be called when it's known - * that hugetlbfs pages aren't involved. + * Marks guardpages used with debug_pagealloc. */ -static inline int PageTransTail(struct page *page) -{ - return PageTail(page); -} +PAGE_TYPE_OPS(Guard, guard, guard) + +PAGE_TYPE_OPS(Slab, slab, slab) +#ifdef CONFIG_HUGETLB_PAGE +FOLIO_TYPE_OPS(hugetlb, hugetlb) #else +FOLIO_TEST_FLAG_FALSE(hugetlb) +#endif + +PAGE_TYPE_OPS(Zsmalloc, zsmalloc, zsmalloc) -static inline int PageTransHuge(struct page *page) +/* + * Mark pages that has to be accepted before touched for the first time. + * + * Serialized with zone lock. + */ +PAGE_TYPE_OPS(Unaccepted, unaccepted, unaccepted) +PAGE_TYPE_OPS(LargeKmalloc, large_kmalloc, large_kmalloc) + +/** + * PageHuge - Determine if the page belongs to hugetlbfs + * @page: The page to test. + * + * Context: Any context. + * Return: True for hugetlbfs pages, false for anon pages or pages + * belonging to other filesystems. + */ +static inline bool PageHuge(const struct page *page) { - return 0; + return folio_test_hugetlb(page_folio(page)); } -static inline int PageTransCompound(struct page *page) +/* + * Check if a page is currently marked HWPoisoned. Note that this check is + * best effort only and inherently racy: there is no way to synchronize with + * failing hardware. + */ +static inline bool is_page_hwpoison(const struct page *page) { - return 0; + const struct folio *folio; + + if (PageHWPoison(page)) + return true; + folio = page_folio(page); + return folio_test_hugetlb(folio) && PageHWPoison(&folio->page); } -static inline int PageTransTail(struct page *page) +static inline bool folio_contain_hwpoisoned_page(struct folio *folio) { - return 0; + return folio_test_hwpoison(folio) || + (folio_test_large(folio) && folio_test_has_hwpoisoned(folio)); } -#endif +bool is_free_buddy_page(const struct page *page); + +#ifdef CONFIG_MIGRATION /* - * If network-based swap is enabled, sl*b must keep track of whether pages - * were allocated from pfmemalloc reserves. + * This page is migratable through movable_ops (for selected typed pages + * only). + * + * Page migration of such pages might fail, for example, if the page is + * already isolated by somebody else, or if the page is about to get freed. + * + * While a subsystem might set selected typed pages that support page migration + * as being movable through movable_ops, it must never clear this flag. + * + * This flag is only cleared when the page is freed back to the buddy. + * + * Only selected page types support this flag (see page_movable_ops()) and + * the flag might be used in other context for other pages. Always use + * page_has_movable_ops() instead. + */ +TESTPAGEFLAG(MovableOps, movable_ops, PF_NO_TAIL); +SETPAGEFLAG(MovableOps, movable_ops, PF_NO_TAIL); +/* + * A movable_ops page has this flag set while it is isolated for migration. + * This flag primarily protects against concurrent migration attempts. + * + * Once migration ended (success or failure), the flag is cleared. The + * flag is managed by the migration core. + */ +PAGEFLAG(MovableOpsIsolated, movable_ops_isolated, PF_NO_TAIL); +#else /* !CONFIG_MIGRATION */ +TESTPAGEFLAG_FALSE(MovableOps, movable_ops); +SETPAGEFLAG_NOOP(MovableOps, movable_ops); +PAGEFLAG_FALSE(MovableOpsIsolated, movable_ops_isolated); +#endif /* CONFIG_MIGRATION */ + +/** + * page_has_movable_ops - test for a movable_ops page + * @page: The page to test. + * + * Test whether this is a movable_ops page. Such pages will stay that + * way until freed. + * + * Returns true if this is a movable_ops page, otherwise false. */ -static inline int PageSlabPfmemalloc(struct page *page) +static inline bool page_has_movable_ops(const struct page *page) +{ + return PageMovableOps(page) && + (PageOffline(page) || PageZsmalloc(page)); +} + +static __always_inline int PageAnonExclusive(const struct page *page) { - VM_BUG_ON(!PageSlab(page)); - return PageActive(page); + VM_BUG_ON_PGFLAGS(!PageAnon(page), page); + /* + * HugeTLB stores this information on the head page; THP keeps it per + * page + */ + if (PageHuge(page)) + page = compound_head(page); + return test_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags.f); } -static inline void SetPageSlabPfmemalloc(struct page *page) +static __always_inline void SetPageAnonExclusive(struct page *page) { - VM_BUG_ON(!PageSlab(page)); - SetPageActive(page); + VM_BUG_ON_PGFLAGS(!PageAnonNotKsm(page), page); + VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page); + set_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags.f); } -static inline void __ClearPageSlabPfmemalloc(struct page *page) +static __always_inline void ClearPageAnonExclusive(struct page *page) { - VM_BUG_ON(!PageSlab(page)); - __ClearPageActive(page); + VM_BUG_ON_PGFLAGS(!PageAnonNotKsm(page), page); + VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page); + clear_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags.f); } -static inline void ClearPageSlabPfmemalloc(struct page *page) +static __always_inline void __ClearPageAnonExclusive(struct page *page) { - VM_BUG_ON(!PageSlab(page)); - ClearPageActive(page); + VM_BUG_ON_PGFLAGS(!PageAnon(page), page); + VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page); + __clear_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags.f); } #ifdef CONFIG_MMU -#define __PG_MLOCKED (1 << PG_mlocked) +#define __PG_MLOCKED (1UL << PG_mlocked) #else #define __PG_MLOCKED 0 #endif -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -#define __PG_COMPOUND_LOCK (1 << PG_compound_lock) -#else -#define __PG_COMPOUND_LOCK 0 -#endif - /* * Flags checked when a page is freed. Pages being freed should not have - * these flags set. It they are, there is a problem. + * these flags set. If they are, there is a problem. */ -#define PAGE_FLAGS_CHECK_AT_FREE \ - (1 << PG_lru | 1 << PG_locked | \ - 1 << PG_private | 1 << PG_private_2 | \ - 1 << PG_writeback | 1 << PG_reserved | \ - 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ - 1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON | \ - __PG_COMPOUND_LOCK) +#define PAGE_FLAGS_CHECK_AT_FREE \ + (1UL << PG_lru | 1UL << PG_locked | \ + 1UL << PG_private | 1UL << PG_private_2 | \ + 1UL << PG_writeback | 1UL << PG_reserved | \ + 1UL << PG_active | \ + 1UL << PG_unevictable | __PG_MLOCKED | LRU_GEN_MASK) /* * Flags checked when a page is prepped for return by the page allocator. - * Pages being prepped should not have any flags set. It they are set, + * Pages being prepped should not have these flags set. If they are set, * there has been a kernel bug or struct page corruption. + * + * __PG_HWPOISON is exceptional because it needs to be kept beyond page's + * alloc-free cycle to prevent from reusing the page. + */ +#define PAGE_FLAGS_CHECK_AT_PREP \ + ((PAGEFLAGS_MASK & ~__PG_HWPOISON) | LRU_GEN_MASK | LRU_REFS_MASK) + +/* + * Flags stored in the second page of a compound page. They may overlap + * the CHECK_AT_FREE flags above, so need to be cleared. */ -#define PAGE_FLAGS_CHECK_AT_PREP ((1 << NR_PAGEFLAGS) - 1) +#define PAGE_FLAGS_SECOND \ + (0xffUL /* order */ | 1UL << PG_has_hwpoisoned | \ + 1UL << PG_large_rmappable | 1UL << PG_partially_mapped) #define PAGE_FLAGS_PRIVATE \ - (1 << PG_private | 1 << PG_private_2) + (1UL << PG_private | 1UL << PG_private_2) /** - * page_has_private - Determine if page has private stuff - * @page: The page to be checked + * folio_has_private - Determine if folio has private stuff + * @folio: The folio to be checked * - * Determine if a page has private stuff, indicating that release routines + * Determine if a folio has private stuff, indicating that release routines * should be invoked upon it. */ -static inline int page_has_private(struct page *page) +static inline int folio_has_private(const struct folio *folio) { - return !!(page->flags & PAGE_FLAGS_PRIVATE); + return !!(folio->flags.f & PAGE_FLAGS_PRIVATE); } +#undef PF_ANY +#undef PF_HEAD +#undef PF_NO_TAIL +#undef PF_NO_COMPOUND +#undef PF_SECOND #endif /* !__GENERATING_BOUNDS_H */ #endif /* PAGE_FLAGS_H */ |
