summaryrefslogtreecommitdiff
path: root/include/linux/hugetlb.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/hugetlb.h')
-rw-r--r--include/linux/hugetlb.h592
1 files changed, 386 insertions, 206 deletions
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 551834cd5299..019a1c5281e4 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -2,31 +2,30 @@
#ifndef _LINUX_HUGETLB_H
#define _LINUX_HUGETLB_H
+#include <linux/mm.h>
#include <linux/mm_types.h>
#include <linux/mmdebug.h>
#include <linux/fs.h>
#include <linux/hugetlb_inline.h>
#include <linux/cgroup.h>
+#include <linux/page_ref.h>
#include <linux/list.h>
#include <linux/kref.h>
#include <linux/pgtable.h>
#include <linux/gfp.h>
#include <linux/userfaultfd_k.h>
+#include <linux/nodemask.h>
struct ctl_table;
struct user_struct;
struct mmu_gather;
struct node;
-#ifndef CONFIG_ARCH_HAS_HUGEPD
-typedef struct { unsigned long pd; } hugepd_t;
-#define is_hugepd(hugepd) (0)
-#define __hugepd(x) ((hugepd_t) { (x) })
-#endif
+void free_huge_folio(struct folio *folio);
#ifdef CONFIG_HUGETLB_PAGE
-#include <linux/mempolicy.h>
+#include <linux/pagemap.h>
#include <linux/shm.h>
#include <asm/tlbflush.h>
@@ -56,6 +55,7 @@ struct resv_map {
long adds_in_progress;
struct list_head region_cache;
long region_cache_count;
+ struct rw_semaphore rw_sema;
#ifdef CONFIG_CGROUP_HUGETLB
/*
* On private mappings, the counter to uncharge reservations is stored
@@ -122,33 +122,19 @@ void hugepage_put_subpool(struct hugepage_subpool *spool);
void hugetlb_dup_vma_private(struct vm_area_struct *vma);
void clear_vma_resv_huge_pages(struct vm_area_struct *vma);
-int hugetlb_sysctl_handler(struct ctl_table *, int, void *, size_t *, loff_t *);
-int hugetlb_overcommit_handler(struct ctl_table *, int, void *, size_t *,
- loff_t *);
-int hugetlb_treat_movable_handler(struct ctl_table *, int, void *, size_t *,
- loff_t *);
-int hugetlb_mempolicy_sysctl_handler(struct ctl_table *, int, void *, size_t *,
- loff_t *);
-
int move_hugetlb_page_tables(struct vm_area_struct *vma,
struct vm_area_struct *new_vma,
unsigned long old_addr, unsigned long new_addr,
unsigned long len);
int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *,
struct vm_area_struct *, struct vm_area_struct *);
-struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma,
- unsigned long address, unsigned int flags);
-long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
- struct page **, struct vm_area_struct **,
- unsigned long *, unsigned long *, long, unsigned int,
- int *);
void unmap_hugepage_range(struct vm_area_struct *,
- unsigned long, unsigned long, struct page *,
- zap_flags_t);
-void __unmap_hugepage_range_final(struct mmu_gather *tlb,
+ unsigned long start, unsigned long end,
+ struct folio *, zap_flags_t);
+void __unmap_hugepage_range(struct mmu_gather *tlb,
struct vm_area_struct *vma,
unsigned long start, unsigned long end,
- struct page *ref_page, zap_flags_t zap_flags);
+ struct folio *, zap_flags_t zap_flags);
void hugetlb_report_meminfo(struct seq_file *);
int hugetlb_report_node_meminfo(char *buf, int len, int nid);
void hugetlb_show_meminfo_node(int nid);
@@ -156,42 +142,101 @@ unsigned long hugetlb_total_pages(void);
vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, unsigned int flags);
#ifdef CONFIG_USERFAULTFD
-int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, pte_t *dst_pte,
- struct vm_area_struct *dst_vma,
- unsigned long dst_addr,
- unsigned long src_addr,
- enum mcopy_atomic_mode mode,
- struct page **pagep,
- bool wp_copy);
+int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
+ struct vm_area_struct *dst_vma,
+ unsigned long dst_addr,
+ unsigned long src_addr,
+ uffd_flags_t flags,
+ struct folio **foliop);
#endif /* CONFIG_USERFAULTFD */
-bool hugetlb_reserve_pages(struct inode *inode, long from, long to,
- struct vm_area_struct *vma,
- vm_flags_t vm_flags);
+long hugetlb_reserve_pages(struct inode *inode, long from, long to,
+ struct vm_area_desc *desc, vm_flags_t vm_flags);
long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
long freed);
-int isolate_hugetlb(struct page *page, struct list_head *list);
-int get_hwpoison_huge_page(struct page *page, bool *hugetlb, bool unpoison);
+bool folio_isolate_hugetlb(struct folio *folio, struct list_head *list);
+int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison);
int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
bool *migratable_cleared);
-void putback_active_hugepage(struct page *page);
+void folio_putback_hugetlb(struct folio *folio);
void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int reason);
-void free_huge_page(struct page *page);
void hugetlb_fix_reserve_counts(struct inode *inode);
extern struct mutex *hugetlb_fault_mutex_table;
u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t idx);
pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr, pud_t *pud);
+bool hugetlbfs_pagecache_present(struct hstate *h,
+ struct vm_area_struct *vma,
+ unsigned long address);
+
+struct address_space *hugetlb_folio_mapping_lock_write(struct folio *folio);
-struct address_space *hugetlb_page_mapping_lock_write(struct page *hpage);
+extern int sysctl_hugetlb_shm_group __read_mostly;
+extern struct list_head huge_boot_pages[MAX_NUMNODES];
-extern int sysctl_hugetlb_shm_group;
-extern struct list_head huge_boot_pages;
+void hugetlb_bootmem_alloc(void);
+bool hugetlb_bootmem_allocated(void);
+extern nodemask_t hugetlb_bootmem_nodes;
+void hugetlb_bootmem_set_nodes(void);
/* arch callbacks */
+#ifndef CONFIG_HIGHPTE
+/*
+ * pte_offset_huge() and pte_alloc_huge() are helpers for those architectures
+ * which may go down to the lowest PTE level in their huge_pte_offset() and
+ * huge_pte_alloc(): to avoid reliance on pte_offset_map() without pte_unmap().
+ */
+static inline pte_t *pte_offset_huge(pmd_t *pmd, unsigned long address)
+{
+ return pte_offset_kernel(pmd, address);
+}
+static inline pte_t *pte_alloc_huge(struct mm_struct *mm, pmd_t *pmd,
+ unsigned long address)
+{
+ return pte_alloc(mm, pmd) ? NULL : pte_offset_huge(pmd, address);
+}
+#endif
+
pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr, unsigned long sz);
+/*
+ * huge_pte_offset(): Walk the hugetlb pgtable until the last level PTE.
+ * Returns the pte_t* if found, or NULL if the address is not mapped.
+ *
+ * IMPORTANT: we should normally not directly call this function, instead
+ * this is only a common interface to implement arch-specific
+ * walker. Please use hugetlb_walk() instead, because that will attempt to
+ * verify the locking for you.
+ *
+ * Since this function will walk all the pgtable pages (including not only
+ * high-level pgtable page, but also PUD entry that can be unshared
+ * concurrently for VM_SHARED), the caller of this function should be
+ * responsible of its thread safety. One can follow this rule:
+ *
+ * (1) For private mappings: pmd unsharing is not possible, so holding the
+ * mmap_lock for either read or write is sufficient. Most callers
+ * already hold the mmap_lock, so normally, no special action is
+ * required.
+ *
+ * (2) For shared mappings: pmd unsharing is possible (so the PUD-ranged
+ * pgtable page can go away from under us! It can be done by a pmd
+ * unshare with a follow up munmap() on the other process), then we
+ * need either:
+ *
+ * (2.1) hugetlb vma lock read or write held, to make sure pmd unshare
+ * won't happen upon the range (it also makes sure the pte_t we
+ * read is the right and stable one), or,
+ *
+ * (2.2) hugetlb mapping i_mmap_rwsem lock held read or write, to make
+ * sure even if unshare happened the racy unmap() will wait until
+ * i_mmap_rwsem is released.
+ *
+ * Option (2.1) is the safest, which guarantees pte stability from pmd
+ * sharing pov, until the vma lock released. Option (2.2) doesn't protect
+ * a concurrent pmd unshare, but it makes sure the pgtable page is safe to
+ * access.
+ */
pte_t *huge_pte_offset(struct mm_struct *mm,
unsigned long addr, unsigned long sz);
unsigned long hugetlb_mask_last_page(struct hstate *h);
@@ -200,6 +245,25 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
unsigned long *start, unsigned long *end);
+extern void __hugetlb_zap_begin(struct vm_area_struct *vma,
+ unsigned long *begin, unsigned long *end);
+extern void __hugetlb_zap_end(struct vm_area_struct *vma,
+ struct zap_details *details);
+
+static inline void hugetlb_zap_begin(struct vm_area_struct *vma,
+ unsigned long *start, unsigned long *end)
+{
+ if (is_vm_hugetlb_page(vma))
+ __hugetlb_zap_begin(vma, start, end);
+}
+
+static inline void hugetlb_zap_end(struct vm_area_struct *vma,
+ struct zap_details *details)
+{
+ if (is_vm_hugetlb_page(vma))
+ __hugetlb_zap_end(vma, details);
+}
+
void hugetlb_vma_lock_read(struct vm_area_struct *vma);
void hugetlb_vma_unlock_read(struct vm_area_struct *vma);
void hugetlb_vma_lock_write(struct vm_area_struct *vma);
@@ -207,15 +271,13 @@ void hugetlb_vma_unlock_write(struct vm_area_struct *vma);
int hugetlb_vma_trylock_write(struct vm_area_struct *vma);
void hugetlb_vma_assert_locked(struct vm_area_struct *vma);
void hugetlb_vma_lock_release(struct kref *kref);
-
-int pmd_huge(pmd_t pmd);
-int pud_huge(pud_t pud);
-unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
+long hugetlb_change_protection(struct vm_area_struct *vma,
unsigned long address, unsigned long end, pgprot_t newprot,
unsigned long cp_flags);
-
-bool is_hugetlb_entry_migration(pte_t pte);
void hugetlb_unshare_all_pmds(struct vm_area_struct *vma);
+void fixup_hugetlb_reservations(struct vm_area_struct *vma);
+void hugetlb_split(struct vm_area_struct *vma, unsigned long addr);
+int hugetlb_vma_lock_alloc(struct vm_area_struct *vma);
#else /* !CONFIG_HUGETLB_PAGE */
@@ -232,8 +294,8 @@ static inline unsigned long hugetlb_total_pages(void)
return 0;
}
-static inline struct address_space *hugetlb_page_mapping_lock_write(
- struct page *hpage)
+static inline struct address_space *hugetlb_folio_mapping_lock_write(
+ struct folio *folio)
{
return NULL;
}
@@ -251,20 +313,16 @@ static inline void adjust_range_if_pmd_sharing_possible(
{
}
-static inline struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma,
- unsigned long address, unsigned int flags)
+static inline void hugetlb_zap_begin(
+ struct vm_area_struct *vma,
+ unsigned long *start, unsigned long *end)
{
- BUILD_BUG(); /* should never be compiled in if !CONFIG_HUGETLB_PAGE*/
}
-static inline long follow_hugetlb_page(struct mm_struct *mm,
- struct vm_area_struct *vma, struct page **pages,
- struct vm_area_struct **vmas, unsigned long *position,
- unsigned long *nr_pages, long i, unsigned int flags,
- int *nonblocking)
+static inline void hugetlb_zap_end(
+ struct vm_area_struct *vma,
+ struct zap_details *details)
{
- BUG();
- return 0;
}
static inline int copy_hugetlb_page_range(struct mm_struct *dst,
@@ -299,12 +357,6 @@ static inline void hugetlb_show_meminfo_node(int nid)
{
}
-static inline int prepare_hugepage_range(struct file *file,
- unsigned long addr, unsigned long len)
-{
- return -EINVAL;
-}
-
static inline void hugetlb_vma_lock_read(struct vm_area_struct *vma)
{
}
@@ -330,38 +382,19 @@ static inline void hugetlb_vma_assert_locked(struct vm_area_struct *vma)
{
}
-static inline int pmd_huge(pmd_t pmd)
-{
- return 0;
-}
-
-static inline int pud_huge(pud_t pud)
-{
- return 0;
-}
-
static inline int is_hugepage_only_range(struct mm_struct *mm,
unsigned long addr, unsigned long len)
{
return 0;
}
-static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
- unsigned long addr, unsigned long end,
- unsigned long floor, unsigned long ceiling)
-{
- BUG();
-}
-
#ifdef CONFIG_USERFAULTFD
-static inline int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
- pte_t *dst_pte,
- struct vm_area_struct *dst_vma,
- unsigned long dst_addr,
- unsigned long src_addr,
- enum mcopy_atomic_mode mode,
- struct page **pagep,
- bool wp_copy)
+static inline int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
+ struct vm_area_struct *dst_vma,
+ unsigned long dst_addr,
+ unsigned long src_addr,
+ uffd_flags_t flags,
+ struct folio **foliop)
{
BUG();
return 0;
@@ -374,12 +407,12 @@ static inline pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr,
return NULL;
}
-static inline int isolate_hugetlb(struct page *page, struct list_head *list)
+static inline bool folio_isolate_hugetlb(struct folio *folio, struct list_head *list)
{
- return -EBUSY;
+ return false;
}
-static inline int get_hwpoison_huge_page(struct page *page, bool *hugetlb, bool unpoison)
+static inline int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison)
{
return 0;
}
@@ -390,7 +423,7 @@ static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
return 0;
}
-static inline void putback_active_hugepage(struct page *page)
+static inline void folio_putback_hugetlb(struct folio *folio)
{
}
@@ -399,7 +432,7 @@ static inline void move_hugetlb_state(struct folio *old_folio,
{
}
-static inline unsigned long hugetlb_change_protection(
+static inline long hugetlb_change_protection(
struct vm_area_struct *vma, unsigned long address,
unsigned long end, pgprot_t newprot,
unsigned long cp_flags)
@@ -407,9 +440,9 @@ static inline unsigned long hugetlb_change_protection(
return 0;
}
-static inline void __unmap_hugepage_range_final(struct mmu_gather *tlb,
+static inline void __unmap_hugepage_range(struct mmu_gather *tlb,
struct vm_area_struct *vma, unsigned long start,
- unsigned long end, struct page *ref_page,
+ unsigned long end, struct folio *folio,
zap_flags_t zap_flags)
{
BUG();
@@ -425,17 +458,18 @@ static inline vm_fault_t hugetlb_fault(struct mm_struct *mm,
static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { }
+static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma)
+{
+}
+
+static inline void hugetlb_split(struct vm_area_struct *vma, unsigned long addr) {}
+
+static inline int hugetlb_vma_lock_alloc(struct vm_area_struct *vma)
+{
+ return 0;
+}
+
#endif /* !CONFIG_HUGETLB_PAGE */
-/*
- * hugepages at page global directory. If arch support
- * hugepages at pgd level, they need to define this.
- */
-#ifndef pgd_huge
-#define pgd_huge(x) 0
-#endif
-#ifndef p4d_huge
-#define p4d_huge(x) 0
-#endif
#ifndef pgd_write
static inline int pgd_write(pgd_t pgd)
@@ -478,7 +512,6 @@ static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb)
}
struct hugetlbfs_inode_info {
- struct shared_policy policy;
struct inode vfs_inode;
unsigned int seals;
};
@@ -488,17 +521,13 @@ static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode)
return container_of(inode, struct hugetlbfs_inode_info, vfs_inode);
}
-extern const struct file_operations hugetlbfs_file_operations;
extern const struct vm_operations_struct hugetlb_vm_ops;
struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct,
int creat_flags, int page_size_log);
-static inline bool is_file_hugepages(struct file *file)
+static inline bool is_file_hugepages(const struct file *file)
{
- if (file->f_op == &hugetlbfs_file_operations)
- return true;
-
- return is_file_shm_hugepages(file);
+ return file->f_op->fop_flags & FOP_HUGE_PAGES;
}
static inline struct hstate *hstate_inode(struct inode *i)
@@ -521,16 +550,10 @@ static inline struct hstate *hstate_inode(struct inode *i)
}
#endif /* !CONFIG_HUGETLBFS */
-#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
-unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
- unsigned long len, unsigned long pgoff,
- unsigned long flags);
-#endif /* HAVE_ARCH_HUGETLB_UNMAPPED_AREA */
-
unsigned long
-generic_hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
- unsigned long len, unsigned long pgoff,
- unsigned long flags);
+hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags);
/*
* huegtlb page specific state flags. These flags are located in page.private
@@ -569,6 +592,7 @@ enum hugetlb_page_flags {
HPG_freed,
HPG_vmemmap_optimized,
HPG_raw_hwp_unreliable,
+ HPG_cma,
__NR_HPAGEFLAGS,
};
@@ -582,47 +606,35 @@ static __always_inline \
bool folio_test_hugetlb_##flname(struct folio *folio) \
{ void *private = &folio->private; \
return test_bit(HPG_##flname, private); \
- } \
-static inline int HPage##uname(struct page *page) \
- { return test_bit(HPG_##flname, &(page->private)); }
+ }
#define SETHPAGEFLAG(uname, flname) \
static __always_inline \
void folio_set_hugetlb_##flname(struct folio *folio) \
{ void *private = &folio->private; \
set_bit(HPG_##flname, private); \
- } \
-static inline void SetHPage##uname(struct page *page) \
- { set_bit(HPG_##flname, &(page->private)); }
+ }
#define CLEARHPAGEFLAG(uname, flname) \
static __always_inline \
void folio_clear_hugetlb_##flname(struct folio *folio) \
{ void *private = &folio->private; \
clear_bit(HPG_##flname, private); \
- } \
-static inline void ClearHPage##uname(struct page *page) \
- { clear_bit(HPG_##flname, &(page->private)); }
+ }
#else
#define TESTHPAGEFLAG(uname, flname) \
static inline bool \
folio_test_hugetlb_##flname(struct folio *folio) \
- { return 0; } \
-static inline int HPage##uname(struct page *page) \
{ return 0; }
#define SETHPAGEFLAG(uname, flname) \
static inline void \
folio_set_hugetlb_##flname(struct folio *folio) \
- { } \
-static inline void SetHPage##uname(struct page *page) \
{ }
#define CLEARHPAGEFLAG(uname, flname) \
static inline void \
folio_clear_hugetlb_##flname(struct folio *folio) \
- { } \
-static inline void ClearHPage##uname(struct page *page) \
{ }
#endif
@@ -640,6 +652,7 @@ HPAGEFLAG(Temporary, temporary)
HPAGEFLAG(Freed, freed)
HPAGEFLAG(VmemmapOptimized, vmemmap_optimized)
HPAGEFLAG(RawHwpUnreliable, raw_hwp_unreliable)
+HPAGEFLAG(Cma, cma)
#ifdef CONFIG_HUGETLB_PAGE
@@ -647,6 +660,7 @@ HPAGEFLAG(RawHwpUnreliable, raw_hwp_unreliable)
/* Defines one hugetlb page size */
struct hstate {
struct mutex resize_lock;
+ struct lock_class_key resize_key;
int next_nid_to_alloc;
int next_nid_to_free;
unsigned int order;
@@ -664,30 +678,39 @@ struct hstate {
unsigned int nr_huge_pages_node[MAX_NUMNODES];
unsigned int free_huge_pages_node[MAX_NUMNODES];
unsigned int surplus_huge_pages_node[MAX_NUMNODES];
-#ifdef CONFIG_CGROUP_HUGETLB
- /* cgroup control files */
- struct cftype cgroup_files_dfl[8];
- struct cftype cgroup_files_legacy[10];
-#endif
char name[HSTATE_NAME_LEN];
};
+struct cma;
+
struct huge_bootmem_page {
struct list_head list;
struct hstate *hstate;
+ unsigned long flags;
+ struct cma *cma;
};
-int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list);
-struct page *alloc_huge_page(struct vm_area_struct *vma,
- unsigned long addr, int avoid_reserve);
-struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
- nodemask_t *nmask, gfp_t gfp_mask);
-struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
- unsigned long address);
-int hugetlb_add_to_page_cache(struct page *page, struct address_space *mapping,
+#define HUGE_BOOTMEM_HVO 0x0001
+#define HUGE_BOOTMEM_ZONES_VALID 0x0002
+#define HUGE_BOOTMEM_CMA 0x0004
+
+bool hugetlb_bootmem_page_zones_valid(int nid, struct huge_bootmem_page *m);
+
+int isolate_or_dissolve_huge_folio(struct folio *folio, struct list_head *list);
+int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn);
+void wait_for_freed_hugetlb_folios(void);
+struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
+ unsigned long addr, bool cow_from_owner);
+struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
+ nodemask_t *nmask, gfp_t gfp_mask,
+ bool allow_alloc_fallback);
+struct folio *alloc_hugetlb_folio_reserve(struct hstate *h, int preferred_nid,
+ nodemask_t *nmask, gfp_t gfp_mask);
+
+int hugetlb_add_to_page_cache(struct folio *folio, struct address_space *mapping,
pgoff_t idx);
void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
- unsigned long address, struct page *page);
+ unsigned long address, struct folio *folio);
/* arch callback */
int __init __alloc_bootmem_huge_page(struct hstate *h, int nid);
@@ -707,17 +730,14 @@ extern unsigned int default_hstate_idx;
#define default_hstate (hstates[default_hstate_idx])
-static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio)
+static inline struct hugepage_subpool *subpool_inode(struct inode *inode)
{
- return folio->_hugetlb_subpool;
+ return HUGETLBFS_SB(inode->i_sb)->spool;
}
-/*
- * hugetlb page subpool pointer located in hpage[2].hugetlb_subpool
- */
-static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage)
+static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio)
{
- return hugetlb_folio_subpool(page_folio(hpage));
+ return folio->_hugetlb_subpool;
}
static inline void hugetlb_set_folio_subpool(struct folio *folio,
@@ -726,12 +746,6 @@ static inline void hugetlb_set_folio_subpool(struct folio *folio,
folio->_hugetlb_subpool = subpool;
}
-static inline void hugetlb_set_page_subpool(struct page *hpage,
- struct hugepage_subpool *subpool)
-{
- hugetlb_set_folio_subpool(page_folio(hpage), subpool);
-}
-
static inline struct hstate *hstate_file(struct file *f)
{
return hstate_inode(file_inode(f));
@@ -742,7 +756,10 @@ static inline struct hstate *hstate_sizelog(int page_size_log)
if (!page_size_log)
return &default_hstate;
- return size_to_hstate(1UL << page_size_log);
+ if (page_size_log < BITS_PER_LONG)
+ return size_to_hstate(1UL << page_size_log);
+
+ return NULL;
}
static inline struct hstate *hstate_vma(struct vm_area_struct *vma)
@@ -774,9 +791,14 @@ static inline unsigned huge_page_shift(struct hstate *h)
return h->order + PAGE_SHIFT;
}
+static inline bool order_is_gigantic(unsigned int order)
+{
+ return order > MAX_PAGE_ORDER;
+}
+
static inline bool hstate_is_gigantic(struct hstate *h)
{
- return huge_page_order(h) >= MAX_ORDER;
+ return order_is_gigantic(huge_page_order(h));
}
static inline unsigned int pages_per_huge_page(const struct hstate *h)
@@ -789,6 +811,12 @@ static inline unsigned int blocks_per_huge_page(struct hstate *h)
return huge_page_size(h) / 512;
}
+static inline struct folio *filemap_lock_hugetlb_folio(struct hstate *h,
+ struct address_space *mapping, pgoff_t idx)
+{
+ return filemap_lock_folio(mapping, idx << huge_page_order(h));
+}
+
#include <asm/hugetlb.h>
#ifndef is_hugepage_only_range
@@ -800,9 +828,9 @@ static inline int is_hugepage_only_range(struct mm_struct *mm,
#define is_hugepage_only_range is_hugepage_only_range
#endif
-#ifndef arch_clear_hugepage_flags
-static inline void arch_clear_hugepage_flags(struct page *page) { }
-#define arch_clear_hugepage_flags arch_clear_hugepage_flags
+#ifndef arch_clear_hugetlb_flags
+static inline void arch_clear_hugetlb_flags(struct folio *folio) { }
+#define arch_clear_hugetlb_flags arch_clear_hugetlb_flags
#endif
#ifndef arch_make_huge_pte
@@ -813,15 +841,21 @@ static inline pte_t arch_make_huge_pte(pte_t entry, unsigned int shift,
}
#endif
-static inline struct hstate *folio_hstate(struct folio *folio)
+#ifndef arch_has_huge_bootmem_alloc
+/*
+ * Some architectures do their own bootmem allocation, so they can't use
+ * early CMA allocation.
+ */
+static inline bool arch_has_huge_bootmem_alloc(void)
{
- VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio);
- return size_to_hstate(folio_size(folio));
+ return false;
}
+#endif
-static inline struct hstate *page_hstate(struct page *page)
+static inline struct hstate *folio_hstate(struct folio *folio)
{
- return folio_hstate(page_folio(page));
+ VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio);
+ return size_to_hstate(folio_size(folio));
}
static inline unsigned hstate_index_to_shift(unsigned index)
@@ -834,14 +868,14 @@ static inline int hstate_index(struct hstate *h)
return h - hstates;
}
-extern int dissolve_free_huge_page(struct page *page);
-extern int dissolve_free_huge_pages(unsigned long start_pfn,
+int dissolve_free_hugetlb_folio(struct folio *folio);
+int dissolve_free_hugetlb_folios(unsigned long start_pfn,
unsigned long end_pfn);
#ifdef CONFIG_MEMORY_FAILURE
-extern void hugetlb_clear_page_hwpoison(struct page *hpage);
+extern void folio_clear_hugetlb_hwpoison(struct folio *folio);
#else
-static inline void hugetlb_clear_page_hwpoison(struct page *hpage)
+static inline void folio_clear_hugetlb_hwpoison(struct folio *folio)
{
}
#endif
@@ -898,10 +932,11 @@ static inline bool hugepage_movable_supported(struct hstate *h)
/* Movability of hugepages depends on migration support. */
static inline gfp_t htlb_alloc_mask(struct hstate *h)
{
- if (hugepage_movable_supported(h))
- return GFP_HIGHUSER_MOVABLE;
- else
- return GFP_HIGHUSER;
+ gfp_t gfp = __GFP_COMP | __GFP_NOWARN;
+
+ gfp |= hugepage_movable_supported(h) ? GFP_HIGHUSER_MOVABLE : GFP_HIGHUSER;
+
+ return gfp;
}
static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask)
@@ -916,13 +951,64 @@ static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask)
return modified_mask;
}
+static inline bool htlb_allow_alloc_fallback(int reason)
+{
+ bool allowed_fallback = false;
+
+ /*
+ * Note: the memory offline, memory failure and migration syscalls will
+ * be allowed to fallback to other nodes due to lack of a better chioce,
+ * that might break the per-node hugetlb pool. While other cases will
+ * set the __GFP_THISNODE to avoid breaking the per-node hugetlb pool.
+ */
+ switch (reason) {
+ case MR_MEMORY_HOTPLUG:
+ case MR_MEMORY_FAILURE:
+ case MR_SYSCALL:
+ case MR_MEMPOLICY_MBIND:
+ allowed_fallback = true;
+ break;
+ default:
+ break;
+ }
+
+ return allowed_fallback;
+}
+
static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
struct mm_struct *mm, pte_t *pte)
{
- if (huge_page_size(h) == PMD_SIZE)
+ const unsigned long size = huge_page_size(h);
+
+ VM_WARN_ON(size == PAGE_SIZE);
+
+ /*
+ * hugetlb must use the exact same PT locks as core-mm page table
+ * walkers would. When modifying a PTE table, hugetlb must take the
+ * PTE PT lock, when modifying a PMD table, hugetlb must take the PMD
+ * PT lock etc.
+ *
+ * The expectation is that any hugetlb folio smaller than a PMD is
+ * always mapped into a single PTE table and that any hugetlb folio
+ * smaller than a PUD (but at least as big as a PMD) is always mapped
+ * into a single PMD table.
+ *
+ * If that does not hold for an architecture, then that architecture
+ * must disable split PT locks such that all *_lockptr() functions
+ * will give us the same result: the per-MM PT lock.
+ *
+ * Note that with e.g., CONFIG_PGTABLE_LEVELS=2 where
+ * PGDIR_SIZE==P4D_SIZE==PUD_SIZE==PMD_SIZE, we'd use pud_lockptr()
+ * and core-mm would use pmd_lockptr(). However, in such configurations
+ * split PMD locks are disabled -- they don't make sense on a single
+ * PGDIR page table -- and the end result is the same.
+ */
+ if (size >= PUD_SIZE)
+ return pud_lockptr(mm, (pud_t *) pte);
+ else if (size >= PMD_SIZE || IS_ENABLED(CONFIG_HIGHPTE))
return pmd_lockptr(mm, (pmd_t *) pte);
- VM_BUG_ON(huge_page_size(h) == PAGE_SIZE);
- return &mm->page_table_lock;
+ /* pte_alloc_huge() only applies with !CONFIG_HIGHPTE */
+ return ptep_lockptr(mm, pte);
}
#ifndef hugepages_supported
@@ -956,7 +1042,9 @@ static inline void hugetlb_count_sub(long l, struct mm_struct *mm)
static inline pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
- return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
+ unsigned long psize = huge_page_size(hstate_vma(vma));
+
+ return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep, psize);
}
#endif
@@ -966,7 +1054,9 @@ static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
pte_t old_pte, pte_t pte)
{
- set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
+ unsigned long psize = huge_page_size(hstate_vma(vma));
+
+ set_huge_pte_at(vma->vm_mm, addr, ptep, pte, psize);
}
#endif
@@ -975,42 +1065,69 @@ void hugetlb_register_node(struct node *node);
void hugetlb_unregister_node(struct node *node);
#endif
+/*
+ * Check if a given raw @page in a hugepage is HWPOISON.
+ */
+bool is_raw_hwpoison_page_in_hugepage(struct page *page);
+
+static inline unsigned long huge_page_mask_align(struct file *file)
+{
+ return PAGE_MASK & ~huge_page_mask(hstate_file(file));
+}
+
#else /* CONFIG_HUGETLB_PAGE */
struct hstate {};
+static inline unsigned long huge_page_mask_align(struct file *file)
+{
+ return 0;
+}
+
static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio)
{
return NULL;
}
-static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage)
+static inline struct folio *filemap_lock_hugetlb_folio(struct hstate *h,
+ struct address_space *mapping, pgoff_t idx)
{
return NULL;
}
-static inline int isolate_or_dissolve_huge_page(struct page *page,
+static inline int isolate_or_dissolve_huge_folio(struct folio *folio,
struct list_head *list)
{
return -ENOMEM;
}
-static inline struct page *alloc_huge_page(struct vm_area_struct *vma,
+static inline int replace_free_hugepage_folios(unsigned long start_pfn,
+ unsigned long end_pfn)
+{
+ return 0;
+}
+
+static inline void wait_for_freed_hugetlb_folios(void)
+{
+}
+
+static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
unsigned long addr,
- int avoid_reserve)
+ bool cow_from_owner)
{
return NULL;
}
-static inline struct page *
-alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
- nodemask_t *nmask, gfp_t gfp_mask)
+static inline struct folio *
+alloc_hugetlb_folio_reserve(struct hstate *h, int preferred_nid,
+ nodemask_t *nmask, gfp_t gfp_mask)
{
return NULL;
}
-static inline struct page *alloc_huge_page_vma(struct hstate *h,
- struct vm_area_struct *vma,
- unsigned long address)
+static inline struct folio *
+alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
+ nodemask_t *nmask, gfp_t gfp_mask,
+ bool allow_alloc_fallback)
{
return NULL;
}
@@ -1040,11 +1157,6 @@ static inline struct hstate *folio_hstate(struct folio *folio)
return NULL;
}
-static inline struct hstate *page_hstate(struct page *page)
-{
- return NULL;
-}
-
static inline struct hstate *size_to_hstate(unsigned long size)
{
return NULL;
@@ -1100,12 +1212,12 @@ static inline int hstate_index(struct hstate *h)
return 0;
}
-static inline int dissolve_free_huge_page(struct page *page)
+static inline int dissolve_free_hugetlb_folio(struct folio *folio)
{
return 0;
}
-static inline int dissolve_free_huge_pages(unsigned long start_pfn,
+static inline int dissolve_free_hugetlb_folios(unsigned long start_pfn,
unsigned long end_pfn)
{
return 0;
@@ -1131,6 +1243,11 @@ static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask)
return 0;
}
+static inline bool htlb_allow_alloc_fallback(int reason)
+{
+ return false;
+}
+
static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
struct mm_struct *mm, pte_t *pte)
{
@@ -1152,11 +1269,15 @@ static inline void hugetlb_count_sub(long l, struct mm_struct *mm)
static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
+#ifdef CONFIG_MMU
+ return ptep_get(ptep);
+#else
return *ptep;
+#endif
}
static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pte)
+ pte_t *ptep, pte_t pte, unsigned long sz)
{
}
@@ -1167,6 +1288,21 @@ static inline void hugetlb_register_node(struct node *node)
static inline void hugetlb_unregister_node(struct node *node)
{
}
+
+static inline bool hugetlbfs_pagecache_present(
+ struct hstate *h, struct vm_area_struct *vma, unsigned long address)
+{
+ return false;
+}
+
+static inline void hugetlb_bootmem_alloc(void)
+{
+}
+
+static inline bool hugetlb_bootmem_allocated(void)
+{
+ return false;
+}
#endif /* CONFIG_HUGETLB_PAGE */
static inline spinlock_t *huge_pte_lock(struct hstate *h,
@@ -1187,6 +1323,18 @@ static inline __init void hugetlb_cma_reserve(int order)
}
#endif
+#ifdef CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING
+static inline bool hugetlb_pmd_shared(pte_t *pte)
+{
+ return page_count(virt_to_page(pte)) > 1;
+}
+#else
+static inline bool hugetlb_pmd_shared(pte_t *pte)
+{
+ return false;
+}
+#endif
+
bool want_pmd_share(struct vm_area_struct *vma, unsigned long addr);
#ifndef __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE
@@ -1197,4 +1345,36 @@ bool want_pmd_share(struct vm_area_struct *vma, unsigned long addr);
#define flush_hugetlb_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end)
#endif
+static inline bool __vma_shareable_lock(struct vm_area_struct *vma)
+{
+ return (vma->vm_flags & VM_MAYSHARE) && vma->vm_private_data;
+}
+
+bool __vma_private_lock(struct vm_area_struct *vma);
+
+/*
+ * Safe version of huge_pte_offset() to check the locks. See comments
+ * above huge_pte_offset().
+ */
+static inline pte_t *
+hugetlb_walk(struct vm_area_struct *vma, unsigned long addr, unsigned long sz)
+{
+#if defined(CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING) && defined(CONFIG_LOCKDEP)
+ struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
+
+ /*
+ * If pmd sharing possible, locking needed to safely walk the
+ * hugetlb pgtables. More information can be found at the comment
+ * above huge_pte_offset() in the same file.
+ *
+ * NOTE: lockdep_is_held() is only defined with CONFIG_LOCKDEP.
+ */
+ if (__vma_shareable_lock(vma))
+ WARN_ON_ONCE(!lockdep_is_held(&vma_lock->rw_sema) &&
+ !lockdep_is_held(
+ &vma->vm_file->f_mapping->i_mmap_rwsem));
+#endif
+ return huge_pte_offset(vma->vm_mm, addr, sz);
+}
+
#endif /* _LINUX_HUGETLB_H */