summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/hugetlb.h7
-rw-r--r--include/linux/sysctl.h4
-rw-r--r--mm/hugetlb.c15
-rw-r--r--mm/hugetlb_vmemmap.c143
-rw-r--r--mm/hugetlb_vmemmap.h41
5 files changed, 102 insertions, 108 deletions
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 4cdfce976644..6d0620edf0a6 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -638,9 +638,6 @@ struct hstate {
unsigned int nr_huge_pages_node[MAX_NUMNODES];
unsigned int free_huge_pages_node[MAX_NUMNODES];
unsigned int surplus_huge_pages_node[MAX_NUMNODES];
-#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
- unsigned int optimize_vmemmap_pages;
-#endif
#ifdef CONFIG_CGROUP_HUGETLB
/* cgroup control files */
struct cftype cgroup_files_dfl[8];
@@ -716,7 +713,7 @@ static inline struct hstate *hstate_vma(struct vm_area_struct *vma)
return hstate_file(vma->vm_file);
}
-static inline unsigned long huge_page_size(struct hstate *h)
+static inline unsigned long huge_page_size(const struct hstate *h)
{
return (unsigned long)PAGE_SIZE << h->order;
}
@@ -745,7 +742,7 @@ static inline bool hstate_is_gigantic(struct hstate *h)
return huge_page_order(h) >= MAX_ORDER;
}
-static inline unsigned int pages_per_huge_page(struct hstate *h)
+static inline unsigned int pages_per_huge_page(const struct hstate *h)
{
return 1 << h->order;
}
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 17b42ce89d3e..780690dc08cd 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -268,6 +268,10 @@ static inline struct ctl_table_header *register_sysctl_table(struct ctl_table *
return NULL;
}
+static inline void register_sysctl_init(const char *path, struct ctl_table *table)
+{
+}
+
static inline struct ctl_table_header *register_sysctl_mount_point(const char *path)
{
return NULL;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index f044962ad9df..598c37279fee 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1535,7 +1535,7 @@ static void __update_and_free_page(struct hstate *h, struct page *page)
if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported())
return;
- if (hugetlb_vmemmap_alloc(h, page)) {
+ if (hugetlb_vmemmap_restore(h, page)) {
spin_lock_irq(&hugetlb_lock);
/*
* If we cannot allocate vmemmap pages, just refuse to free the
@@ -1612,7 +1612,7 @@ static DECLARE_WORK(free_hpage_work, free_hpage_workfn);
static inline void flush_free_hpage_work(struct hstate *h)
{
- if (hugetlb_optimize_vmemmap_pages(h))
+ if (hugetlb_vmemmap_optimizable(h))
flush_work(&free_hpage_work);
}
@@ -1734,7 +1734,7 @@ static void __prep_account_new_huge_page(struct hstate *h, int nid)
static void __prep_new_huge_page(struct hstate *h, struct page *page)
{
- hugetlb_vmemmap_free(h, page);
+ hugetlb_vmemmap_optimize(h, page);
INIT_LIST_HEAD(&page->lru);
set_compound_page_dtor(page, HUGETLB_PAGE_DTOR);
hugetlb_set_page_subpool(page, NULL);
@@ -2107,7 +2107,7 @@ retry:
* Attempt to allocate vmemmmap here so that we can take
* appropriate action on failure.
*/
- rc = hugetlb_vmemmap_alloc(h, head);
+ rc = hugetlb_vmemmap_restore(h, head);
if (!rc) {
/*
* Move PageHWPoison flag from head page to the raw
@@ -3182,8 +3182,10 @@ static void __init report_hugepages(void)
char buf[32];
string_get_size(huge_page_size(h), 1, STRING_UNITS_2, buf, 32);
- pr_info("HugeTLB registered %s page size, pre-allocated %ld pages\n",
+ pr_info("HugeTLB: registered %s page size, pre-allocated %ld pages\n",
buf, h->free_huge_pages);
+ pr_info("HugeTLB: %d KiB vmemmap can be freed for a %s page\n",
+ hugetlb_vmemmap_optimizable_size(h) / SZ_1K, buf);
}
}
@@ -3421,7 +3423,7 @@ static int demote_free_huge_page(struct hstate *h, struct page *page)
remove_hugetlb_page_for_demote(h, page, false);
spin_unlock_irq(&hugetlb_lock);
- rc = hugetlb_vmemmap_alloc(h, page);
+ rc = hugetlb_vmemmap_restore(h, page);
if (rc) {
/* Allocation of vmemmmap failed, we can not demote page */
spin_lock_irq(&hugetlb_lock);
@@ -4111,7 +4113,6 @@ void __init hugetlb_add_hstate(unsigned int order)
h->next_nid_to_free = first_memory_node;
snprintf(h->name, HSTATE_NAME_LEN, "hugepages-%lukB",
huge_page_size(h)/1024);
- hugetlb_vmemmap_init(h);
parsed_hstate = h;
}
diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index 6c7117c30e56..8da2b31bb59f 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -35,16 +35,6 @@ struct vmemmap_remap_walk {
struct list_head *vmemmap_pages;
};
-/*
- * There are a lot of struct page structures associated with each HugeTLB page.
- * For tail pages, the value of compound_head is the same. So we can reuse first
- * page of head page structures. We map the virtual addresses of all the pages
- * of tail page structures to the head page struct, and then free these page
- * frames. Therefore, we need to reserve one pages as vmemmap areas.
- */
-#define RESERVE_VMEMMAP_NR 1U
-#define RESERVE_VMEMMAP_SIZE (RESERVE_VMEMMAP_NR << PAGE_SHIFT)
-
static int __split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start)
{
pmd_t __pmd;
@@ -426,32 +416,37 @@ EXPORT_SYMBOL(hugetlb_optimize_vmemmap_key);
static bool vmemmap_optimize_enabled = IS_ENABLED(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON);
core_param(hugetlb_free_vmemmap, vmemmap_optimize_enabled, bool, 0);
-/*
- * Previously discarded vmemmap pages will be allocated and remapping
- * after this function returns zero.
+/**
+ * hugetlb_vmemmap_restore - restore previously optimized (by
+ * hugetlb_vmemmap_optimize()) vmemmap pages which
+ * will be reallocated and remapped.
+ * @h: struct hstate.
+ * @head: the head page whose vmemmap pages will be restored.
+ *
+ * Return: %0 if @head's vmemmap pages have been reallocated and remapped,
+ * negative error code otherwise.
*/
-int hugetlb_vmemmap_alloc(struct hstate *h, struct page *head)
+int hugetlb_vmemmap_restore(const struct hstate *h, struct page *head)
{
int ret;
- unsigned long vmemmap_addr = (unsigned long)head;
- unsigned long vmemmap_end, vmemmap_reuse, vmemmap_pages;
+ unsigned long vmemmap_start = (unsigned long)head, vmemmap_end;
+ unsigned long vmemmap_reuse;
if (!HPageVmemmapOptimized(head))
return 0;
- vmemmap_addr += RESERVE_VMEMMAP_SIZE;
- vmemmap_pages = hugetlb_optimize_vmemmap_pages(h);
- vmemmap_end = vmemmap_addr + (vmemmap_pages << PAGE_SHIFT);
- vmemmap_reuse = vmemmap_addr - PAGE_SIZE;
+ vmemmap_end = vmemmap_start + hugetlb_vmemmap_size(h);
+ vmemmap_reuse = vmemmap_start;
+ vmemmap_start += HUGETLB_VMEMMAP_RESERVE_SIZE;
/*
- * The pages which the vmemmap virtual address range [@vmemmap_addr,
+ * The pages which the vmemmap virtual address range [@vmemmap_start,
* @vmemmap_end) are mapped to are freed to the buddy allocator, and
* the range is mapped to the page which @vmemmap_reuse is mapped to.
* When a HugeTLB page is freed to the buddy allocator, previously
* discarded vmemmap pages must be allocated and remapping.
*/
- ret = vmemmap_remap_alloc(vmemmap_addr, vmemmap_end, vmemmap_reuse,
+ ret = vmemmap_remap_alloc(vmemmap_start, vmemmap_end, vmemmap_reuse,
GFP_KERNEL | __GFP_NORETRY | __GFP_THISNODE);
if (!ret) {
ClearHPageVmemmapOptimized(head);
@@ -461,11 +456,14 @@ int hugetlb_vmemmap_alloc(struct hstate *h, struct page *head)
return ret;
}
-static unsigned int vmemmap_optimizable_pages(struct hstate *h,
- struct page *head)
+/* Return true iff a HugeTLB whose vmemmap should and can be optimized. */
+static bool vmemmap_should_optimize(const struct hstate *h, const struct page *head)
{
if (!READ_ONCE(vmemmap_optimize_enabled))
- return 0;
+ return false;
+
+ if (!hugetlb_vmemmap_optimizable(h))
+ return false;
if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG)) {
pmd_t *pmdp, pmd;
@@ -508,73 +506,47 @@ static unsigned int vmemmap_optimizable_pages(struct hstate *h,
* +-------------------------------------------+
*/
if (PageVmemmapSelfHosted(vmemmap_page))
- return 0;
+ return false;
}
- return hugetlb_optimize_vmemmap_pages(h);
+ return true;
}
-void hugetlb_vmemmap_free(struct hstate *h, struct page *head)
+/**
+ * hugetlb_vmemmap_optimize - optimize @head page's vmemmap pages.
+ * @h: struct hstate.
+ * @head: the head page whose vmemmap pages will be optimized.
+ *
+ * This function only tries to optimize @head's vmemmap pages and does not
+ * guarantee that the optimization will succeed after it returns. The caller
+ * can use HPageVmemmapOptimized(@head) to detect if @head's vmemmap pages
+ * have been optimized.
+ */
+void hugetlb_vmemmap_optimize(const struct hstate *h, struct page *head)
{
- unsigned long vmemmap_addr = (unsigned long)head;
- unsigned long vmemmap_end, vmemmap_reuse, vmemmap_pages;
+ unsigned long vmemmap_start = (unsigned long)head, vmemmap_end;
+ unsigned long vmemmap_reuse;
- vmemmap_pages = vmemmap_optimizable_pages(h, head);
- if (!vmemmap_pages)
+ if (!vmemmap_should_optimize(h, head))
return;
static_branch_inc(&hugetlb_optimize_vmemmap_key);
- vmemmap_addr += RESERVE_VMEMMAP_SIZE;
- vmemmap_end = vmemmap_addr + (vmemmap_pages << PAGE_SHIFT);
- vmemmap_reuse = vmemmap_addr - PAGE_SIZE;
+ vmemmap_end = vmemmap_start + hugetlb_vmemmap_size(h);
+ vmemmap_reuse = vmemmap_start;
+ vmemmap_start += HUGETLB_VMEMMAP_RESERVE_SIZE;
/*
- * Remap the vmemmap virtual address range [@vmemmap_addr, @vmemmap_end)
+ * Remap the vmemmap virtual address range [@vmemmap_start, @vmemmap_end)
* to the page which @vmemmap_reuse is mapped to, then free the pages
- * which the range [@vmemmap_addr, @vmemmap_end] is mapped to.
+ * which the range [@vmemmap_start, @vmemmap_end] is mapped to.
*/
- if (vmemmap_remap_free(vmemmap_addr, vmemmap_end, vmemmap_reuse))
+ if (vmemmap_remap_free(vmemmap_start, vmemmap_end, vmemmap_reuse))
static_branch_dec(&hugetlb_optimize_vmemmap_key);
else
SetHPageVmemmapOptimized(head);
}
-void __init hugetlb_vmemmap_init(struct hstate *h)
-{
- unsigned int nr_pages = pages_per_huge_page(h);
- unsigned int vmemmap_pages;
-
- /*
- * There are only (RESERVE_VMEMMAP_SIZE / sizeof(struct page)) struct
- * page structs that can be used when HVO is enabled, add a BUILD_BUG_ON
- * to catch invalid usage of the tail page structs.
- */
- BUILD_BUG_ON(__NR_USED_SUBPAGE >=
- RESERVE_VMEMMAP_SIZE / sizeof(struct page));
-
- if (!is_power_of_2(sizeof(struct page))) {
- pr_warn_once("cannot optimize vmemmap pages because \"struct page\" crosses page boundaries\n");
- return;
- }
-
- vmemmap_pages = (nr_pages * sizeof(struct page)) >> PAGE_SHIFT;
- /*
- * The head page is not to be freed to buddy allocator, the other tail
- * pages will map to the head page, so they can be freed.
- *
- * Could RESERVE_VMEMMAP_NR be greater than @vmemmap_pages? It is true
- * on some architectures (e.g. aarch64). See Documentation/arm64/
- * hugetlbpage.rst for more details.
- */
- if (likely(vmemmap_pages > RESERVE_VMEMMAP_NR))
- h->optimize_vmemmap_pages = vmemmap_pages - RESERVE_VMEMMAP_NR;
-
- pr_info("can optimize %d vmemmap pages for %s\n",
- h->optimize_vmemmap_pages, h->name);
-}
-
-#ifdef CONFIG_PROC_SYSCTL
static struct ctl_table hugetlb_vmemmap_sysctls[] = {
{
.procname = "hugetlb_optimize_vmemmap",
@@ -586,16 +558,21 @@ static struct ctl_table hugetlb_vmemmap_sysctls[] = {
{ }
};
-static __init int hugetlb_vmemmap_sysctls_init(void)
+static int __init hugetlb_vmemmap_init(void)
{
- /*
- * If "struct page" crosses page boundaries, the vmemmap pages cannot
- * be optimized.
- */
- if (is_power_of_2(sizeof(struct page)))
- register_sysctl_init("vm", hugetlb_vmemmap_sysctls);
-
+ /* HUGETLB_VMEMMAP_RESERVE_SIZE should cover all used struct pages */
+ BUILD_BUG_ON(__NR_USED_SUBPAGE * sizeof(struct page) > HUGETLB_VMEMMAP_RESERVE_SIZE);
+
+ if (IS_ENABLED(CONFIG_PROC_SYSCTL)) {
+ const struct hstate *h;
+
+ for_each_hstate(h) {
+ if (hugetlb_vmemmap_optimizable(h)) {
+ register_sysctl_init("vm", hugetlb_vmemmap_sysctls);
+ break;
+ }
+ }
+ }
return 0;
}
-late_initcall(hugetlb_vmemmap_sysctls_init);
-#endif /* CONFIG_PROC_SYSCTL */
+late_initcall(hugetlb_vmemmap_init);
diff --git a/mm/hugetlb_vmemmap.h b/mm/hugetlb_vmemmap.h
index ba66fadad9fc..25bd0e002431 100644
--- a/mm/hugetlb_vmemmap.h
+++ b/mm/hugetlb_vmemmap.h
@@ -11,35 +11,50 @@
#include <linux/hugetlb.h>
#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
-int hugetlb_vmemmap_alloc(struct hstate *h, struct page *head);
-void hugetlb_vmemmap_free(struct hstate *h, struct page *head);
-void hugetlb_vmemmap_init(struct hstate *h);
+int hugetlb_vmemmap_restore(const struct hstate *h, struct page *head);
+void hugetlb_vmemmap_optimize(const struct hstate *h, struct page *head);
/*
- * How many vmemmap pages associated with a HugeTLB page that can be
- * optimized and freed to the buddy allocator.
+ * Reserve one vmemmap page, all vmemmap addresses are mapped to it. See
+ * Documentation/vm/vmemmap_dedup.rst.
*/
-static inline unsigned int hugetlb_optimize_vmemmap_pages(struct hstate *h)
+#define HUGETLB_VMEMMAP_RESERVE_SIZE PAGE_SIZE
+
+static inline unsigned int hugetlb_vmemmap_size(const struct hstate *h)
{
- return h->optimize_vmemmap_pages;
+ return pages_per_huge_page(h) * sizeof(struct page);
+}
+
+/*
+ * Return how many vmemmap size associated with a HugeTLB page that can be
+ * optimized and can be freed to the buddy allocator.
+ */
+static inline unsigned int hugetlb_vmemmap_optimizable_size(const struct hstate *h)
+{
+ int size = hugetlb_vmemmap_size(h) - HUGETLB_VMEMMAP_RESERVE_SIZE;
+
+ if (!is_power_of_2(sizeof(struct page)))
+ return 0;
+ return size > 0 ? size : 0;
}
#else
-static inline int hugetlb_vmemmap_alloc(struct hstate *h, struct page *head)
+static inline int hugetlb_vmemmap_restore(const struct hstate *h, struct page *head)
{
return 0;
}
-static inline void hugetlb_vmemmap_free(struct hstate *h, struct page *head)
+static inline void hugetlb_vmemmap_optimize(const struct hstate *h, struct page *head)
{
}
-static inline void hugetlb_vmemmap_init(struct hstate *h)
+static inline unsigned int hugetlb_vmemmap_optimizable_size(const struct hstate *h)
{
+ return 0;
}
+#endif /* CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP */
-static inline unsigned int hugetlb_optimize_vmemmap_pages(struct hstate *h)
+static inline bool hugetlb_vmemmap_optimizable(const struct hstate *h)
{
- return 0;
+ return hugetlb_vmemmap_optimizable_size(h) != 0;
}
-#endif /* CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP */
#endif /* _LINUX_HUGETLB_VMEMMAP_H */