diff options
Diffstat (limited to 'include/linux/hugetlb.h')
| -rw-r--r-- | include/linux/hugetlb.h | 592 |
1 files changed, 386 insertions, 206 deletions
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 551834cd5299..019a1c5281e4 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -2,31 +2,30 @@ #ifndef _LINUX_HUGETLB_H #define _LINUX_HUGETLB_H +#include <linux/mm.h> #include <linux/mm_types.h> #include <linux/mmdebug.h> #include <linux/fs.h> #include <linux/hugetlb_inline.h> #include <linux/cgroup.h> +#include <linux/page_ref.h> #include <linux/list.h> #include <linux/kref.h> #include <linux/pgtable.h> #include <linux/gfp.h> #include <linux/userfaultfd_k.h> +#include <linux/nodemask.h> struct ctl_table; struct user_struct; struct mmu_gather; struct node; -#ifndef CONFIG_ARCH_HAS_HUGEPD -typedef struct { unsigned long pd; } hugepd_t; -#define is_hugepd(hugepd) (0) -#define __hugepd(x) ((hugepd_t) { (x) }) -#endif +void free_huge_folio(struct folio *folio); #ifdef CONFIG_HUGETLB_PAGE -#include <linux/mempolicy.h> +#include <linux/pagemap.h> #include <linux/shm.h> #include <asm/tlbflush.h> @@ -56,6 +55,7 @@ struct resv_map { long adds_in_progress; struct list_head region_cache; long region_cache_count; + struct rw_semaphore rw_sema; #ifdef CONFIG_CGROUP_HUGETLB /* * On private mappings, the counter to uncharge reservations is stored @@ -122,33 +122,19 @@ void hugepage_put_subpool(struct hugepage_subpool *spool); void hugetlb_dup_vma_private(struct vm_area_struct *vma); void clear_vma_resv_huge_pages(struct vm_area_struct *vma); -int hugetlb_sysctl_handler(struct ctl_table *, int, void *, size_t *, loff_t *); -int hugetlb_overcommit_handler(struct ctl_table *, int, void *, size_t *, - loff_t *); -int hugetlb_treat_movable_handler(struct ctl_table *, int, void *, size_t *, - loff_t *); -int hugetlb_mempolicy_sysctl_handler(struct ctl_table *, int, void *, size_t *, - loff_t *); - int move_hugetlb_page_tables(struct vm_area_struct *vma, struct vm_area_struct *new_vma, unsigned long old_addr, unsigned long new_addr, unsigned long len); int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *, struct vm_area_struct *); -struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma, - unsigned long address, unsigned int flags); -long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, - struct page **, struct vm_area_struct **, - unsigned long *, unsigned long *, long, unsigned int, - int *); void unmap_hugepage_range(struct vm_area_struct *, - unsigned long, unsigned long, struct page *, - zap_flags_t); -void __unmap_hugepage_range_final(struct mmu_gather *tlb, + unsigned long start, unsigned long end, + struct folio *, zap_flags_t); +void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start, unsigned long end, - struct page *ref_page, zap_flags_t zap_flags); + struct folio *, zap_flags_t zap_flags); void hugetlb_report_meminfo(struct seq_file *); int hugetlb_report_node_meminfo(char *buf, int len, int nid); void hugetlb_show_meminfo_node(int nid); @@ -156,42 +142,101 @@ unsigned long hugetlb_total_pages(void); vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags); #ifdef CONFIG_USERFAULTFD -int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, pte_t *dst_pte, - struct vm_area_struct *dst_vma, - unsigned long dst_addr, - unsigned long src_addr, - enum mcopy_atomic_mode mode, - struct page **pagep, - bool wp_copy); +int hugetlb_mfill_atomic_pte(pte_t *dst_pte, + struct vm_area_struct *dst_vma, + unsigned long dst_addr, + unsigned long src_addr, + uffd_flags_t flags, + struct folio **foliop); #endif /* CONFIG_USERFAULTFD */ -bool hugetlb_reserve_pages(struct inode *inode, long from, long to, - struct vm_area_struct *vma, - vm_flags_t vm_flags); +long hugetlb_reserve_pages(struct inode *inode, long from, long to, + struct vm_area_desc *desc, vm_flags_t vm_flags); long hugetlb_unreserve_pages(struct inode *inode, long start, long end, long freed); -int isolate_hugetlb(struct page *page, struct list_head *list); -int get_hwpoison_huge_page(struct page *page, bool *hugetlb, bool unpoison); +bool folio_isolate_hugetlb(struct folio *folio, struct list_head *list); +int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison); int get_huge_page_for_hwpoison(unsigned long pfn, int flags, bool *migratable_cleared); -void putback_active_hugepage(struct page *page); +void folio_putback_hugetlb(struct folio *folio); void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int reason); -void free_huge_page(struct page *page); void hugetlb_fix_reserve_counts(struct inode *inode); extern struct mutex *hugetlb_fault_mutex_table; u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t idx); pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, pud_t *pud); +bool hugetlbfs_pagecache_present(struct hstate *h, + struct vm_area_struct *vma, + unsigned long address); + +struct address_space *hugetlb_folio_mapping_lock_write(struct folio *folio); -struct address_space *hugetlb_page_mapping_lock_write(struct page *hpage); +extern int sysctl_hugetlb_shm_group __read_mostly; +extern struct list_head huge_boot_pages[MAX_NUMNODES]; -extern int sysctl_hugetlb_shm_group; -extern struct list_head huge_boot_pages; +void hugetlb_bootmem_alloc(void); +bool hugetlb_bootmem_allocated(void); +extern nodemask_t hugetlb_bootmem_nodes; +void hugetlb_bootmem_set_nodes(void); /* arch callbacks */ +#ifndef CONFIG_HIGHPTE +/* + * pte_offset_huge() and pte_alloc_huge() are helpers for those architectures + * which may go down to the lowest PTE level in their huge_pte_offset() and + * huge_pte_alloc(): to avoid reliance on pte_offset_map() without pte_unmap(). + */ +static inline pte_t *pte_offset_huge(pmd_t *pmd, unsigned long address) +{ + return pte_offset_kernel(pmd, address); +} +static inline pte_t *pte_alloc_huge(struct mm_struct *mm, pmd_t *pmd, + unsigned long address) +{ + return pte_alloc(mm, pmd) ? NULL : pte_offset_huge(pmd, address); +} +#endif + pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long sz); +/* + * huge_pte_offset(): Walk the hugetlb pgtable until the last level PTE. + * Returns the pte_t* if found, or NULL if the address is not mapped. + * + * IMPORTANT: we should normally not directly call this function, instead + * this is only a common interface to implement arch-specific + * walker. Please use hugetlb_walk() instead, because that will attempt to + * verify the locking for you. + * + * Since this function will walk all the pgtable pages (including not only + * high-level pgtable page, but also PUD entry that can be unshared + * concurrently for VM_SHARED), the caller of this function should be + * responsible of its thread safety. One can follow this rule: + * + * (1) For private mappings: pmd unsharing is not possible, so holding the + * mmap_lock for either read or write is sufficient. Most callers + * already hold the mmap_lock, so normally, no special action is + * required. + * + * (2) For shared mappings: pmd unsharing is possible (so the PUD-ranged + * pgtable page can go away from under us! It can be done by a pmd + * unshare with a follow up munmap() on the other process), then we + * need either: + * + * (2.1) hugetlb vma lock read or write held, to make sure pmd unshare + * won't happen upon the range (it also makes sure the pte_t we + * read is the right and stable one), or, + * + * (2.2) hugetlb mapping i_mmap_rwsem lock held read or write, to make + * sure even if unshare happened the racy unmap() will wait until + * i_mmap_rwsem is released. + * + * Option (2.1) is the safest, which guarantees pte stability from pmd + * sharing pov, until the vma lock released. Option (2.2) doesn't protect + * a concurrent pmd unshare, but it makes sure the pgtable page is safe to + * access. + */ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz); unsigned long hugetlb_mask_last_page(struct hstate *h); @@ -200,6 +245,25 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, unsigned long *start, unsigned long *end); +extern void __hugetlb_zap_begin(struct vm_area_struct *vma, + unsigned long *begin, unsigned long *end); +extern void __hugetlb_zap_end(struct vm_area_struct *vma, + struct zap_details *details); + +static inline void hugetlb_zap_begin(struct vm_area_struct *vma, + unsigned long *start, unsigned long *end) +{ + if (is_vm_hugetlb_page(vma)) + __hugetlb_zap_begin(vma, start, end); +} + +static inline void hugetlb_zap_end(struct vm_area_struct *vma, + struct zap_details *details) +{ + if (is_vm_hugetlb_page(vma)) + __hugetlb_zap_end(vma, details); +} + void hugetlb_vma_lock_read(struct vm_area_struct *vma); void hugetlb_vma_unlock_read(struct vm_area_struct *vma); void hugetlb_vma_lock_write(struct vm_area_struct *vma); @@ -207,15 +271,13 @@ void hugetlb_vma_unlock_write(struct vm_area_struct *vma); int hugetlb_vma_trylock_write(struct vm_area_struct *vma); void hugetlb_vma_assert_locked(struct vm_area_struct *vma); void hugetlb_vma_lock_release(struct kref *kref); - -int pmd_huge(pmd_t pmd); -int pud_huge(pud_t pud); -unsigned long hugetlb_change_protection(struct vm_area_struct *vma, +long hugetlb_change_protection(struct vm_area_struct *vma, unsigned long address, unsigned long end, pgprot_t newprot, unsigned long cp_flags); - -bool is_hugetlb_entry_migration(pte_t pte); void hugetlb_unshare_all_pmds(struct vm_area_struct *vma); +void fixup_hugetlb_reservations(struct vm_area_struct *vma); +void hugetlb_split(struct vm_area_struct *vma, unsigned long addr); +int hugetlb_vma_lock_alloc(struct vm_area_struct *vma); #else /* !CONFIG_HUGETLB_PAGE */ @@ -232,8 +294,8 @@ static inline unsigned long hugetlb_total_pages(void) return 0; } -static inline struct address_space *hugetlb_page_mapping_lock_write( - struct page *hpage) +static inline struct address_space *hugetlb_folio_mapping_lock_write( + struct folio *folio) { return NULL; } @@ -251,20 +313,16 @@ static inline void adjust_range_if_pmd_sharing_possible( { } -static inline struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma, - unsigned long address, unsigned int flags) +static inline void hugetlb_zap_begin( + struct vm_area_struct *vma, + unsigned long *start, unsigned long *end) { - BUILD_BUG(); /* should never be compiled in if !CONFIG_HUGETLB_PAGE*/ } -static inline long follow_hugetlb_page(struct mm_struct *mm, - struct vm_area_struct *vma, struct page **pages, - struct vm_area_struct **vmas, unsigned long *position, - unsigned long *nr_pages, long i, unsigned int flags, - int *nonblocking) +static inline void hugetlb_zap_end( + struct vm_area_struct *vma, + struct zap_details *details) { - BUG(); - return 0; } static inline int copy_hugetlb_page_range(struct mm_struct *dst, @@ -299,12 +357,6 @@ static inline void hugetlb_show_meminfo_node(int nid) { } -static inline int prepare_hugepage_range(struct file *file, - unsigned long addr, unsigned long len) -{ - return -EINVAL; -} - static inline void hugetlb_vma_lock_read(struct vm_area_struct *vma) { } @@ -330,38 +382,19 @@ static inline void hugetlb_vma_assert_locked(struct vm_area_struct *vma) { } -static inline int pmd_huge(pmd_t pmd) -{ - return 0; -} - -static inline int pud_huge(pud_t pud) -{ - return 0; -} - static inline int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, unsigned long len) { return 0; } -static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, - unsigned long addr, unsigned long end, - unsigned long floor, unsigned long ceiling) -{ - BUG(); -} - #ifdef CONFIG_USERFAULTFD -static inline int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, - pte_t *dst_pte, - struct vm_area_struct *dst_vma, - unsigned long dst_addr, - unsigned long src_addr, - enum mcopy_atomic_mode mode, - struct page **pagep, - bool wp_copy) +static inline int hugetlb_mfill_atomic_pte(pte_t *dst_pte, + struct vm_area_struct *dst_vma, + unsigned long dst_addr, + unsigned long src_addr, + uffd_flags_t flags, + struct folio **foliop) { BUG(); return 0; @@ -374,12 +407,12 @@ static inline pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, return NULL; } -static inline int isolate_hugetlb(struct page *page, struct list_head *list) +static inline bool folio_isolate_hugetlb(struct folio *folio, struct list_head *list) { - return -EBUSY; + return false; } -static inline int get_hwpoison_huge_page(struct page *page, bool *hugetlb, bool unpoison) +static inline int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison) { return 0; } @@ -390,7 +423,7 @@ static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags, return 0; } -static inline void putback_active_hugepage(struct page *page) +static inline void folio_putback_hugetlb(struct folio *folio) { } @@ -399,7 +432,7 @@ static inline void move_hugetlb_state(struct folio *old_folio, { } -static inline unsigned long hugetlb_change_protection( +static inline long hugetlb_change_protection( struct vm_area_struct *vma, unsigned long address, unsigned long end, pgprot_t newprot, unsigned long cp_flags) @@ -407,9 +440,9 @@ static inline unsigned long hugetlb_change_protection( return 0; } -static inline void __unmap_hugepage_range_final(struct mmu_gather *tlb, +static inline void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start, - unsigned long end, struct page *ref_page, + unsigned long end, struct folio *folio, zap_flags_t zap_flags) { BUG(); @@ -425,17 +458,18 @@ static inline vm_fault_t hugetlb_fault(struct mm_struct *mm, static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { } +static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma) +{ +} + +static inline void hugetlb_split(struct vm_area_struct *vma, unsigned long addr) {} + +static inline int hugetlb_vma_lock_alloc(struct vm_area_struct *vma) +{ + return 0; +} + #endif /* !CONFIG_HUGETLB_PAGE */ -/* - * hugepages at page global directory. If arch support - * hugepages at pgd level, they need to define this. - */ -#ifndef pgd_huge -#define pgd_huge(x) 0 -#endif -#ifndef p4d_huge -#define p4d_huge(x) 0 -#endif #ifndef pgd_write static inline int pgd_write(pgd_t pgd) @@ -478,7 +512,6 @@ static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb) } struct hugetlbfs_inode_info { - struct shared_policy policy; struct inode vfs_inode; unsigned int seals; }; @@ -488,17 +521,13 @@ static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode) return container_of(inode, struct hugetlbfs_inode_info, vfs_inode); } -extern const struct file_operations hugetlbfs_file_operations; extern const struct vm_operations_struct hugetlb_vm_ops; struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct, int creat_flags, int page_size_log); -static inline bool is_file_hugepages(struct file *file) +static inline bool is_file_hugepages(const struct file *file) { - if (file->f_op == &hugetlbfs_file_operations) - return true; - - return is_file_shm_hugepages(file); + return file->f_op->fop_flags & FOP_HUGE_PAGES; } static inline struct hstate *hstate_inode(struct inode *i) @@ -521,16 +550,10 @@ static inline struct hstate *hstate_inode(struct inode *i) } #endif /* !CONFIG_HUGETLBFS */ -#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA -unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, - unsigned long len, unsigned long pgoff, - unsigned long flags); -#endif /* HAVE_ARCH_HUGETLB_UNMAPPED_AREA */ - unsigned long -generic_hugetlb_get_unmapped_area(struct file *file, unsigned long addr, - unsigned long len, unsigned long pgoff, - unsigned long flags); +hugetlb_get_unmapped_area(struct file *file, unsigned long addr, + unsigned long len, unsigned long pgoff, + unsigned long flags); /* * huegtlb page specific state flags. These flags are located in page.private @@ -569,6 +592,7 @@ enum hugetlb_page_flags { HPG_freed, HPG_vmemmap_optimized, HPG_raw_hwp_unreliable, + HPG_cma, __NR_HPAGEFLAGS, }; @@ -582,47 +606,35 @@ static __always_inline \ bool folio_test_hugetlb_##flname(struct folio *folio) \ { void *private = &folio->private; \ return test_bit(HPG_##flname, private); \ - } \ -static inline int HPage##uname(struct page *page) \ - { return test_bit(HPG_##flname, &(page->private)); } + } #define SETHPAGEFLAG(uname, flname) \ static __always_inline \ void folio_set_hugetlb_##flname(struct folio *folio) \ { void *private = &folio->private; \ set_bit(HPG_##flname, private); \ - } \ -static inline void SetHPage##uname(struct page *page) \ - { set_bit(HPG_##flname, &(page->private)); } + } #define CLEARHPAGEFLAG(uname, flname) \ static __always_inline \ void folio_clear_hugetlb_##flname(struct folio *folio) \ { void *private = &folio->private; \ clear_bit(HPG_##flname, private); \ - } \ -static inline void ClearHPage##uname(struct page *page) \ - { clear_bit(HPG_##flname, &(page->private)); } + } #else #define TESTHPAGEFLAG(uname, flname) \ static inline bool \ folio_test_hugetlb_##flname(struct folio *folio) \ - { return 0; } \ -static inline int HPage##uname(struct page *page) \ { return 0; } #define SETHPAGEFLAG(uname, flname) \ static inline void \ folio_set_hugetlb_##flname(struct folio *folio) \ - { } \ -static inline void SetHPage##uname(struct page *page) \ { } #define CLEARHPAGEFLAG(uname, flname) \ static inline void \ folio_clear_hugetlb_##flname(struct folio *folio) \ - { } \ -static inline void ClearHPage##uname(struct page *page) \ { } #endif @@ -640,6 +652,7 @@ HPAGEFLAG(Temporary, temporary) HPAGEFLAG(Freed, freed) HPAGEFLAG(VmemmapOptimized, vmemmap_optimized) HPAGEFLAG(RawHwpUnreliable, raw_hwp_unreliable) +HPAGEFLAG(Cma, cma) #ifdef CONFIG_HUGETLB_PAGE @@ -647,6 +660,7 @@ HPAGEFLAG(RawHwpUnreliable, raw_hwp_unreliable) /* Defines one hugetlb page size */ struct hstate { struct mutex resize_lock; + struct lock_class_key resize_key; int next_nid_to_alloc; int next_nid_to_free; unsigned int order; @@ -664,30 +678,39 @@ struct hstate { unsigned int nr_huge_pages_node[MAX_NUMNODES]; unsigned int free_huge_pages_node[MAX_NUMNODES]; unsigned int surplus_huge_pages_node[MAX_NUMNODES]; -#ifdef CONFIG_CGROUP_HUGETLB - /* cgroup control files */ - struct cftype cgroup_files_dfl[8]; - struct cftype cgroup_files_legacy[10]; -#endif char name[HSTATE_NAME_LEN]; }; +struct cma; + struct huge_bootmem_page { struct list_head list; struct hstate *hstate; + unsigned long flags; + struct cma *cma; }; -int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list); -struct page *alloc_huge_page(struct vm_area_struct *vma, - unsigned long addr, int avoid_reserve); -struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, - nodemask_t *nmask, gfp_t gfp_mask); -struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma, - unsigned long address); -int hugetlb_add_to_page_cache(struct page *page, struct address_space *mapping, +#define HUGE_BOOTMEM_HVO 0x0001 +#define HUGE_BOOTMEM_ZONES_VALID 0x0002 +#define HUGE_BOOTMEM_CMA 0x0004 + +bool hugetlb_bootmem_page_zones_valid(int nid, struct huge_bootmem_page *m); + +int isolate_or_dissolve_huge_folio(struct folio *folio, struct list_head *list); +int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn); +void wait_for_freed_hugetlb_folios(void); +struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, + unsigned long addr, bool cow_from_owner); +struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, + nodemask_t *nmask, gfp_t gfp_mask, + bool allow_alloc_fallback); +struct folio *alloc_hugetlb_folio_reserve(struct hstate *h, int preferred_nid, + nodemask_t *nmask, gfp_t gfp_mask); + +int hugetlb_add_to_page_cache(struct folio *folio, struct address_space *mapping, pgoff_t idx); void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, - unsigned long address, struct page *page); + unsigned long address, struct folio *folio); /* arch callback */ int __init __alloc_bootmem_huge_page(struct hstate *h, int nid); @@ -707,17 +730,14 @@ extern unsigned int default_hstate_idx; #define default_hstate (hstates[default_hstate_idx]) -static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio) +static inline struct hugepage_subpool *subpool_inode(struct inode *inode) { - return folio->_hugetlb_subpool; + return HUGETLBFS_SB(inode->i_sb)->spool; } -/* - * hugetlb page subpool pointer located in hpage[2].hugetlb_subpool - */ -static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage) +static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio) { - return hugetlb_folio_subpool(page_folio(hpage)); + return folio->_hugetlb_subpool; } static inline void hugetlb_set_folio_subpool(struct folio *folio, @@ -726,12 +746,6 @@ static inline void hugetlb_set_folio_subpool(struct folio *folio, folio->_hugetlb_subpool = subpool; } -static inline void hugetlb_set_page_subpool(struct page *hpage, - struct hugepage_subpool *subpool) -{ - hugetlb_set_folio_subpool(page_folio(hpage), subpool); -} - static inline struct hstate *hstate_file(struct file *f) { return hstate_inode(file_inode(f)); @@ -742,7 +756,10 @@ static inline struct hstate *hstate_sizelog(int page_size_log) if (!page_size_log) return &default_hstate; - return size_to_hstate(1UL << page_size_log); + if (page_size_log < BITS_PER_LONG) + return size_to_hstate(1UL << page_size_log); + + return NULL; } static inline struct hstate *hstate_vma(struct vm_area_struct *vma) @@ -774,9 +791,14 @@ static inline unsigned huge_page_shift(struct hstate *h) return h->order + PAGE_SHIFT; } +static inline bool order_is_gigantic(unsigned int order) +{ + return order > MAX_PAGE_ORDER; +} + static inline bool hstate_is_gigantic(struct hstate *h) { - return huge_page_order(h) >= MAX_ORDER; + return order_is_gigantic(huge_page_order(h)); } static inline unsigned int pages_per_huge_page(const struct hstate *h) @@ -789,6 +811,12 @@ static inline unsigned int blocks_per_huge_page(struct hstate *h) return huge_page_size(h) / 512; } +static inline struct folio *filemap_lock_hugetlb_folio(struct hstate *h, + struct address_space *mapping, pgoff_t idx) +{ + return filemap_lock_folio(mapping, idx << huge_page_order(h)); +} + #include <asm/hugetlb.h> #ifndef is_hugepage_only_range @@ -800,9 +828,9 @@ static inline int is_hugepage_only_range(struct mm_struct *mm, #define is_hugepage_only_range is_hugepage_only_range #endif -#ifndef arch_clear_hugepage_flags -static inline void arch_clear_hugepage_flags(struct page *page) { } -#define arch_clear_hugepage_flags arch_clear_hugepage_flags +#ifndef arch_clear_hugetlb_flags +static inline void arch_clear_hugetlb_flags(struct folio *folio) { } +#define arch_clear_hugetlb_flags arch_clear_hugetlb_flags #endif #ifndef arch_make_huge_pte @@ -813,15 +841,21 @@ static inline pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, } #endif -static inline struct hstate *folio_hstate(struct folio *folio) +#ifndef arch_has_huge_bootmem_alloc +/* + * Some architectures do their own bootmem allocation, so they can't use + * early CMA allocation. + */ +static inline bool arch_has_huge_bootmem_alloc(void) { - VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio); - return size_to_hstate(folio_size(folio)); + return false; } +#endif -static inline struct hstate *page_hstate(struct page *page) +static inline struct hstate *folio_hstate(struct folio *folio) { - return folio_hstate(page_folio(page)); + VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio); + return size_to_hstate(folio_size(folio)); } static inline unsigned hstate_index_to_shift(unsigned index) @@ -834,14 +868,14 @@ static inline int hstate_index(struct hstate *h) return h - hstates; } -extern int dissolve_free_huge_page(struct page *page); -extern int dissolve_free_huge_pages(unsigned long start_pfn, +int dissolve_free_hugetlb_folio(struct folio *folio); +int dissolve_free_hugetlb_folios(unsigned long start_pfn, unsigned long end_pfn); #ifdef CONFIG_MEMORY_FAILURE -extern void hugetlb_clear_page_hwpoison(struct page *hpage); +extern void folio_clear_hugetlb_hwpoison(struct folio *folio); #else -static inline void hugetlb_clear_page_hwpoison(struct page *hpage) +static inline void folio_clear_hugetlb_hwpoison(struct folio *folio) { } #endif @@ -898,10 +932,11 @@ static inline bool hugepage_movable_supported(struct hstate *h) /* Movability of hugepages depends on migration support. */ static inline gfp_t htlb_alloc_mask(struct hstate *h) { - if (hugepage_movable_supported(h)) - return GFP_HIGHUSER_MOVABLE; - else - return GFP_HIGHUSER; + gfp_t gfp = __GFP_COMP | __GFP_NOWARN; + + gfp |= hugepage_movable_supported(h) ? GFP_HIGHUSER_MOVABLE : GFP_HIGHUSER; + + return gfp; } static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask) @@ -916,13 +951,64 @@ static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask) return modified_mask; } +static inline bool htlb_allow_alloc_fallback(int reason) +{ + bool allowed_fallback = false; + + /* + * Note: the memory offline, memory failure and migration syscalls will + * be allowed to fallback to other nodes due to lack of a better chioce, + * that might break the per-node hugetlb pool. While other cases will + * set the __GFP_THISNODE to avoid breaking the per-node hugetlb pool. + */ + switch (reason) { + case MR_MEMORY_HOTPLUG: + case MR_MEMORY_FAILURE: + case MR_SYSCALL: + case MR_MEMPOLICY_MBIND: + allowed_fallback = true; + break; + default: + break; + } + + return allowed_fallback; +} + static inline spinlock_t *huge_pte_lockptr(struct hstate *h, struct mm_struct *mm, pte_t *pte) { - if (huge_page_size(h) == PMD_SIZE) + const unsigned long size = huge_page_size(h); + + VM_WARN_ON(size == PAGE_SIZE); + + /* + * hugetlb must use the exact same PT locks as core-mm page table + * walkers would. When modifying a PTE table, hugetlb must take the + * PTE PT lock, when modifying a PMD table, hugetlb must take the PMD + * PT lock etc. + * + * The expectation is that any hugetlb folio smaller than a PMD is + * always mapped into a single PTE table and that any hugetlb folio + * smaller than a PUD (but at least as big as a PMD) is always mapped + * into a single PMD table. + * + * If that does not hold for an architecture, then that architecture + * must disable split PT locks such that all *_lockptr() functions + * will give us the same result: the per-MM PT lock. + * + * Note that with e.g., CONFIG_PGTABLE_LEVELS=2 where + * PGDIR_SIZE==P4D_SIZE==PUD_SIZE==PMD_SIZE, we'd use pud_lockptr() + * and core-mm would use pmd_lockptr(). However, in such configurations + * split PMD locks are disabled -- they don't make sense on a single + * PGDIR page table -- and the end result is the same. + */ + if (size >= PUD_SIZE) + return pud_lockptr(mm, (pud_t *) pte); + else if (size >= PMD_SIZE || IS_ENABLED(CONFIG_HIGHPTE)) return pmd_lockptr(mm, (pmd_t *) pte); - VM_BUG_ON(huge_page_size(h) == PAGE_SIZE); - return &mm->page_table_lock; + /* pte_alloc_huge() only applies with !CONFIG_HIGHPTE */ + return ptep_lockptr(mm, pte); } #ifndef hugepages_supported @@ -956,7 +1042,9 @@ static inline void hugetlb_count_sub(long l, struct mm_struct *mm) static inline pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { - return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); + unsigned long psize = huge_page_size(hstate_vma(vma)); + + return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep, psize); } #endif @@ -966,7 +1054,9 @@ static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t old_pte, pte_t pte) { - set_huge_pte_at(vma->vm_mm, addr, ptep, pte); + unsigned long psize = huge_page_size(hstate_vma(vma)); + + set_huge_pte_at(vma->vm_mm, addr, ptep, pte, psize); } #endif @@ -975,42 +1065,69 @@ void hugetlb_register_node(struct node *node); void hugetlb_unregister_node(struct node *node); #endif +/* + * Check if a given raw @page in a hugepage is HWPOISON. + */ +bool is_raw_hwpoison_page_in_hugepage(struct page *page); + +static inline unsigned long huge_page_mask_align(struct file *file) +{ + return PAGE_MASK & ~huge_page_mask(hstate_file(file)); +} + #else /* CONFIG_HUGETLB_PAGE */ struct hstate {}; +static inline unsigned long huge_page_mask_align(struct file *file) +{ + return 0; +} + static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio) { return NULL; } -static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage) +static inline struct folio *filemap_lock_hugetlb_folio(struct hstate *h, + struct address_space *mapping, pgoff_t idx) { return NULL; } -static inline int isolate_or_dissolve_huge_page(struct page *page, +static inline int isolate_or_dissolve_huge_folio(struct folio *folio, struct list_head *list) { return -ENOMEM; } -static inline struct page *alloc_huge_page(struct vm_area_struct *vma, +static inline int replace_free_hugepage_folios(unsigned long start_pfn, + unsigned long end_pfn) +{ + return 0; +} + +static inline void wait_for_freed_hugetlb_folios(void) +{ +} + +static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, unsigned long addr, - int avoid_reserve) + bool cow_from_owner) { return NULL; } -static inline struct page * -alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, - nodemask_t *nmask, gfp_t gfp_mask) +static inline struct folio * +alloc_hugetlb_folio_reserve(struct hstate *h, int preferred_nid, + nodemask_t *nmask, gfp_t gfp_mask) { return NULL; } -static inline struct page *alloc_huge_page_vma(struct hstate *h, - struct vm_area_struct *vma, - unsigned long address) +static inline struct folio * +alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, + nodemask_t *nmask, gfp_t gfp_mask, + bool allow_alloc_fallback) { return NULL; } @@ -1040,11 +1157,6 @@ static inline struct hstate *folio_hstate(struct folio *folio) return NULL; } -static inline struct hstate *page_hstate(struct page *page) -{ - return NULL; -} - static inline struct hstate *size_to_hstate(unsigned long size) { return NULL; @@ -1100,12 +1212,12 @@ static inline int hstate_index(struct hstate *h) return 0; } -static inline int dissolve_free_huge_page(struct page *page) +static inline int dissolve_free_hugetlb_folio(struct folio *folio) { return 0; } -static inline int dissolve_free_huge_pages(unsigned long start_pfn, +static inline int dissolve_free_hugetlb_folios(unsigned long start_pfn, unsigned long end_pfn) { return 0; @@ -1131,6 +1243,11 @@ static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask) return 0; } +static inline bool htlb_allow_alloc_fallback(int reason) +{ + return false; +} + static inline spinlock_t *huge_pte_lockptr(struct hstate *h, struct mm_struct *mm, pte_t *pte) { @@ -1152,11 +1269,15 @@ static inline void hugetlb_count_sub(long l, struct mm_struct *mm) static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { +#ifdef CONFIG_MMU + return ptep_get(ptep); +#else return *ptep; +#endif } static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte) + pte_t *ptep, pte_t pte, unsigned long sz) { } @@ -1167,6 +1288,21 @@ static inline void hugetlb_register_node(struct node *node) static inline void hugetlb_unregister_node(struct node *node) { } + +static inline bool hugetlbfs_pagecache_present( + struct hstate *h, struct vm_area_struct *vma, unsigned long address) +{ + return false; +} + +static inline void hugetlb_bootmem_alloc(void) +{ +} + +static inline bool hugetlb_bootmem_allocated(void) +{ + return false; +} #endif /* CONFIG_HUGETLB_PAGE */ static inline spinlock_t *huge_pte_lock(struct hstate *h, @@ -1187,6 +1323,18 @@ static inline __init void hugetlb_cma_reserve(int order) } #endif +#ifdef CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING +static inline bool hugetlb_pmd_shared(pte_t *pte) +{ + return page_count(virt_to_page(pte)) > 1; +} +#else +static inline bool hugetlb_pmd_shared(pte_t *pte) +{ + return false; +} +#endif + bool want_pmd_share(struct vm_area_struct *vma, unsigned long addr); #ifndef __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE @@ -1197,4 +1345,36 @@ bool want_pmd_share(struct vm_area_struct *vma, unsigned long addr); #define flush_hugetlb_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end) #endif +static inline bool __vma_shareable_lock(struct vm_area_struct *vma) +{ + return (vma->vm_flags & VM_MAYSHARE) && vma->vm_private_data; +} + +bool __vma_private_lock(struct vm_area_struct *vma); + +/* + * Safe version of huge_pte_offset() to check the locks. See comments + * above huge_pte_offset(). + */ +static inline pte_t * +hugetlb_walk(struct vm_area_struct *vma, unsigned long addr, unsigned long sz) +{ +#if defined(CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING) && defined(CONFIG_LOCKDEP) + struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; + + /* + * If pmd sharing possible, locking needed to safely walk the + * hugetlb pgtables. More information can be found at the comment + * above huge_pte_offset() in the same file. + * + * NOTE: lockdep_is_held() is only defined with CONFIG_LOCKDEP. + */ + if (__vma_shareable_lock(vma)) + WARN_ON_ONCE(!lockdep_is_held(&vma_lock->rw_sema) && + !lockdep_is_held( + &vma->vm_file->f_mapping->i_mmap_rwsem)); +#endif + return huge_pte_offset(vma->vm_mm, addr, sz); +} + #endif /* _LINUX_HUGETLB_H */ |
