summaryrefslogtreecommitdiff
path: root/include/linux/hugetlb.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/hugetlb.h')
-rw-r--r--include/linux/hugetlb.h109
1 files changed, 89 insertions, 20 deletions
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 9ab9d3105d5c..7c977d234aba 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -2,6 +2,7 @@
#ifndef _LINUX_HUGETLB_H
#define _LINUX_HUGETLB_H
+#include <linux/mm.h>
#include <linux/mm_types.h>
#include <linux/mmdebug.h>
#include <linux/fs.h>
@@ -170,11 +171,11 @@ bool hugetlb_reserve_pages(struct inode *inode, long from, long to,
vm_flags_t vm_flags);
long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
long freed);
-int isolate_hugetlb(struct page *page, struct list_head *list);
-int get_hwpoison_huge_page(struct page *page, bool *hugetlb, bool unpoison);
+bool isolate_hugetlb(struct folio *folio, struct list_head *list);
+int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison);
int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
bool *migratable_cleared);
-void putback_active_hugepage(struct page *page);
+void folio_putback_active_hugetlb(struct folio *folio);
void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int reason);
void free_huge_page(struct page *page);
void hugetlb_fix_reserve_counts(struct inode *inode);
@@ -193,6 +194,43 @@ extern struct list_head huge_boot_pages;
pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr, unsigned long sz);
+/*
+ * huge_pte_offset(): Walk the hugetlb pgtable until the last level PTE.
+ * Returns the pte_t* if found, or NULL if the address is not mapped.
+ *
+ * IMPORTANT: we should normally not directly call this function, instead
+ * this is only a common interface to implement arch-specific
+ * walker. Please use hugetlb_walk() instead, because that will attempt to
+ * verify the locking for you.
+ *
+ * Since this function will walk all the pgtable pages (including not only
+ * high-level pgtable page, but also PUD entry that can be unshared
+ * concurrently for VM_SHARED), the caller of this function should be
+ * responsible of its thread safety. One can follow this rule:
+ *
+ * (1) For private mappings: pmd unsharing is not possible, so holding the
+ * mmap_lock for either read or write is sufficient. Most callers
+ * already hold the mmap_lock, so normally, no special action is
+ * required.
+ *
+ * (2) For shared mappings: pmd unsharing is possible (so the PUD-ranged
+ * pgtable page can go away from under us! It can be done by a pmd
+ * unshare with a follow up munmap() on the other process), then we
+ * need either:
+ *
+ * (2.1) hugetlb vma lock read or write held, to make sure pmd unshare
+ * won't happen upon the range (it also makes sure the pte_t we
+ * read is the right and stable one), or,
+ *
+ * (2.2) hugetlb mapping i_mmap_rwsem lock held read or write, to make
+ * sure even if unshare happened the racy unmap() will wait until
+ * i_mmap_rwsem is released.
+ *
+ * Option (2.1) is the safest, which guarantees pte stability from pmd
+ * sharing pov, until the vma lock released. Option (2.2) doesn't protect
+ * a concurrent pmd unshare, but it makes sure the pgtable page is safe to
+ * access.
+ */
pte_t *huge_pte_offset(struct mm_struct *mm,
unsigned long addr, unsigned long sz);
unsigned long hugetlb_mask_last_page(struct hstate *h);
@@ -211,7 +249,7 @@ void hugetlb_vma_lock_release(struct kref *kref);
int pmd_huge(pmd_t pmd);
int pud_huge(pud_t pud);
-unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
+long hugetlb_change_protection(struct vm_area_struct *vma,
unsigned long address, unsigned long end, pgprot_t newprot,
unsigned long cp_flags);
@@ -375,12 +413,12 @@ static inline pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr,
return NULL;
}
-static inline int isolate_hugetlb(struct page *page, struct list_head *list)
+static inline bool isolate_hugetlb(struct folio *folio, struct list_head *list)
{
- return -EBUSY;
+ return false;
}
-static inline int get_hwpoison_huge_page(struct page *page, bool *hugetlb, bool unpoison)
+static inline int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison)
{
return 0;
}
@@ -391,7 +429,7 @@ static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
return 0;
}
-static inline void putback_active_hugepage(struct page *page)
+static inline void folio_putback_active_hugetlb(struct folio *folio)
{
}
@@ -400,7 +438,7 @@ static inline void move_hugetlb_state(struct folio *old_folio,
{
}
-static inline unsigned long hugetlb_change_protection(
+static inline long hugetlb_change_protection(
struct vm_area_struct *vma, unsigned long address,
unsigned long end, pgprot_t newprot,
unsigned long cp_flags)
@@ -679,16 +717,16 @@ struct huge_bootmem_page {
};
int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list);
-struct page *alloc_huge_page(struct vm_area_struct *vma,
+struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
unsigned long addr, int avoid_reserve);
-struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
+struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
nodemask_t *nmask, gfp_t gfp_mask);
-struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
+struct folio *alloc_hugetlb_folio_vma(struct hstate *h, struct vm_area_struct *vma,
unsigned long address);
-int hugetlb_add_to_page_cache(struct page *page, struct address_space *mapping,
+int hugetlb_add_to_page_cache(struct folio *folio, struct address_space *mapping,
pgoff_t idx);
void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
- unsigned long address, struct page *page);
+ unsigned long address, struct folio *folio);
/* arch callback */
int __init __alloc_bootmem_huge_page(struct hstate *h, int nid);
@@ -843,9 +881,9 @@ extern int dissolve_free_huge_pages(unsigned long start_pfn,
unsigned long end_pfn);
#ifdef CONFIG_MEMORY_FAILURE
-extern void hugetlb_clear_page_hwpoison(struct page *hpage);
+extern void folio_clear_hugetlb_hwpoison(struct folio *folio);
#else
-static inline void hugetlb_clear_page_hwpoison(struct page *hpage)
+static inline void folio_clear_hugetlb_hwpoison(struct folio *folio)
{
}
#endif
@@ -998,21 +1036,21 @@ static inline int isolate_or_dissolve_huge_page(struct page *page,
return -ENOMEM;
}
-static inline struct page *alloc_huge_page(struct vm_area_struct *vma,
+static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
unsigned long addr,
int avoid_reserve)
{
return NULL;
}
-static inline struct page *
-alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
+static inline struct folio *
+alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
nodemask_t *nmask, gfp_t gfp_mask)
{
return NULL;
}
-static inline struct page *alloc_huge_page_vma(struct hstate *h,
+static inline struct folio *alloc_hugetlb_folio_vma(struct hstate *h,
struct vm_area_struct *vma,
unsigned long address)
{
@@ -1213,4 +1251,35 @@ bool want_pmd_share(struct vm_area_struct *vma, unsigned long addr);
#define flush_hugetlb_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end)
#endif
+static inline bool __vma_shareable_lock(struct vm_area_struct *vma)
+{
+ return (vma->vm_flags & VM_MAYSHARE) && vma->vm_private_data;
+}
+
+/*
+ * Safe version of huge_pte_offset() to check the locks. See comments
+ * above huge_pte_offset().
+ */
+static inline pte_t *
+hugetlb_walk(struct vm_area_struct *vma, unsigned long addr, unsigned long sz)
+{
+#if defined(CONFIG_HUGETLB_PAGE) && \
+ defined(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) && defined(CONFIG_LOCKDEP)
+ struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
+
+ /*
+ * If pmd sharing possible, locking needed to safely walk the
+ * hugetlb pgtables. More information can be found at the comment
+ * above huge_pte_offset() in the same file.
+ *
+ * NOTE: lockdep_is_held() is only defined with CONFIG_LOCKDEP.
+ */
+ if (__vma_shareable_lock(vma))
+ WARN_ON_ONCE(!lockdep_is_held(&vma_lock->rw_sema) &&
+ !lockdep_is_held(
+ &vma->vm_file->f_mapping->i_mmap_rwsem));
+#endif
+ return huge_pte_offset(vma->vm_mm, addr, sz);
+}
+
#endif /* _LINUX_HUGETLB_H */