summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt8
-rw-r--r--arch/arm64/mm/init.c6
-rw-r--r--arch/x86/kernel/setup.c4
-rw-r--r--include/linux/hugetlb.h12
-rw-r--r--mm/hugetlb.c109
5 files changed, 139 insertions, 0 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 86aae1fa099a..d7df9a8302c4 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1475,6 +1475,14 @@
hpet_mmap= [X86, HPET_MMAP] Allow userspace to mmap HPET
registers. Default set by CONFIG_HPET_MMAP_DEFAULT.
+ hugetlb_cma= [HW] The size of a cma area used for allocation
+ of gigantic hugepages.
+ Format: nn[KMGTPE]
+
+ Reserve a cma area of given size and allocate gigantic
+ hugepages using the cma allocator. If enabled, the
+ boot-time allocation of gigantic hugepages is skipped.
+
hugepages= [HW,X86-32,IA-64] HugeTLB pages to allocate at boot.
hugepagesz= [HW,IA-64,PPC,X86-64] The size of the HugeTLB pages.
On x86-64 and powerpc, this option can be specified
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index b65dffdfb201..e42727e3568e 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -29,6 +29,7 @@
#include <linux/mm.h>
#include <linux/kexec.h>
#include <linux/crash_dump.h>
+#include <linux/hugetlb.h>
#include <asm/boot.h>
#include <asm/fixmap.h>
@@ -457,6 +458,11 @@ void __init arm64_memblock_init(void)
high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
dma_contiguous_reserve(arm64_dma32_phys_limit);
+
+#ifdef CONFIG_ARM64_4K_PAGES
+ hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT);
+#endif
+
}
void __init bootmem_init(void)
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index e6b545047f38..4b3fa6cd3106 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -16,6 +16,7 @@
#include <linux/pci.h>
#include <linux/root_dev.h>
#include <linux/sfi.h>
+#include <linux/hugetlb.h>
#include <linux/tboot.h>
#include <linux/usb/xhci-dbgp.h>
@@ -1157,6 +1158,9 @@ void __init setup_arch(char **cmdline_p)
initmem_init();
dma_contiguous_reserve(max_pfn_mapped << PAGE_SHIFT);
+ if (boot_cpu_has(X86_FEATURE_GBPAGES))
+ hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT);
+
/*
* Reserve memory for crash kernel after SRAT is parsed so that it
* won't consume hotpluggable memory.
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 5ea05879a0a9..43a1cef8f0f1 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -895,4 +895,16 @@ static inline spinlock_t *huge_pte_lock(struct hstate *h,
return ptl;
}
+#if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_CMA)
+extern void __init hugetlb_cma_reserve(int order);
+extern void __init hugetlb_cma_check(void);
+#else
+static inline __init void hugetlb_cma_reserve(int order)
+{
+}
+static inline __init void hugetlb_cma_check(void)
+{
+}
+#endif
+
#endif /* _LINUX_HUGETLB_H */
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index f5fb53fdfa02..cd459155d28a 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -28,6 +28,7 @@
#include <linux/jhash.h>
#include <linux/numa.h>
#include <linux/llist.h>
+#include <linux/cma.h>
#include <asm/page.h>
#include <asm/pgtable.h>
@@ -44,6 +45,9 @@
int hugetlb_max_hstate __read_mostly;
unsigned int default_hstate_idx;
struct hstate hstates[HUGE_MAX_HSTATE];
+
+static struct cma *hugetlb_cma[MAX_NUMNODES];
+
/*
* Minimum page order among possible hugepage sizes, set to a proper value
* at boot time.
@@ -1228,6 +1232,14 @@ static void destroy_compound_gigantic_page(struct page *page,
static void free_gigantic_page(struct page *page, unsigned int order)
{
+ /*
+ * If the page isn't allocated using the cma allocator,
+ * cma_release() returns false.
+ */
+ if (IS_ENABLED(CONFIG_CMA) &&
+ cma_release(hugetlb_cma[page_to_nid(page)], page, 1 << order))
+ return;
+
free_contig_range(page_to_pfn(page), 1 << order);
}
@@ -1237,6 +1249,21 @@ static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
{
unsigned long nr_pages = 1UL << huge_page_order(h);
+ if (IS_ENABLED(CONFIG_CMA)) {
+ struct page *page;
+ int node;
+
+ for_each_node_mask(node, *nodemask) {
+ if (!hugetlb_cma[node])
+ continue;
+
+ page = cma_alloc(hugetlb_cma[node], nr_pages,
+ huge_page_order(h), true);
+ if (page)
+ return page;
+ }
+ }
+
return alloc_contig_pages(nr_pages, gfp_mask, nid, nodemask);
}
@@ -1281,8 +1308,14 @@ static void update_and_free_page(struct hstate *h, struct page *page)
set_compound_page_dtor(page, NULL_COMPOUND_DTOR);
set_page_refcounted(page);
if (hstate_is_gigantic(h)) {
+ /*
+ * Temporarily drop the hugetlb_lock, because
+ * we might block in free_gigantic_page().
+ */
+ spin_unlock(&hugetlb_lock);
destroy_compound_gigantic_page(page, huge_page_order(h));
free_gigantic_page(page, huge_page_order(h));
+ spin_lock(&hugetlb_lock);
} else {
__free_pages(page, huge_page_order(h));
}
@@ -2539,6 +2572,10 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
for (i = 0; i < h->max_huge_pages; ++i) {
if (hstate_is_gigantic(h)) {
+ if (IS_ENABLED(CONFIG_CMA) && hugetlb_cma[0]) {
+ pr_warn_once("HugeTLB: hugetlb_cma is enabled, skip boot time allocation\n");
+ break;
+ }
if (!alloc_bootmem_huge_page(h))
break;
} else if (!alloc_pool_huge_page(h,
@@ -3194,6 +3231,7 @@ static int __init hugetlb_init(void)
default_hstate.max_huge_pages = default_hstate_max_huge_pages;
}
+ hugetlb_cma_check();
hugetlb_init_hstates();
gather_bootmem_prealloc();
report_hugepages();
@@ -5506,3 +5544,74 @@ void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason)
spin_unlock(&hugetlb_lock);
}
}
+
+#ifdef CONFIG_CMA
+static unsigned long hugetlb_cma_size __initdata;
+static bool cma_reserve_called __initdata;
+
+static int __init cmdline_parse_hugetlb_cma(char *p)
+{
+ hugetlb_cma_size = memparse(p, &p);
+ return 0;
+}
+
+early_param("hugetlb_cma", cmdline_parse_hugetlb_cma);
+
+void __init hugetlb_cma_reserve(int order)
+{
+ unsigned long size, reserved, per_node;
+ int nid;
+
+ cma_reserve_called = true;
+
+ if (!hugetlb_cma_size)
+ return;
+
+ if (hugetlb_cma_size < (PAGE_SIZE << order)) {
+ pr_warn("hugetlb_cma: cma area should be at least %lu MiB\n",
+ (PAGE_SIZE << order) / SZ_1M);
+ return;
+ }
+
+ /*
+ * If 3 GB area is requested on a machine with 4 numa nodes,
+ * let's allocate 1 GB on first three nodes and ignore the last one.
+ */
+ per_node = DIV_ROUND_UP(hugetlb_cma_size, nr_online_nodes);
+ pr_info("hugetlb_cma: reserve %lu MiB, up to %lu MiB per node\n",
+ hugetlb_cma_size / SZ_1M, per_node / SZ_1M);
+
+ reserved = 0;
+ for_each_node_state(nid, N_ONLINE) {
+ int res;
+
+ size = min(per_node, hugetlb_cma_size - reserved);
+ size = round_up(size, PAGE_SIZE << order);
+
+ res = cma_declare_contiguous_nid(0, size, 0, PAGE_SIZE << order,
+ 0, false, "hugetlb",
+ &hugetlb_cma[nid], nid);
+ if (res) {
+ pr_warn("hugetlb_cma: reservation failed: err %d, node %d",
+ res, nid);
+ continue;
+ }
+
+ reserved += size;
+ pr_info("hugetlb_cma: reserved %lu MiB on node %d\n",
+ size / SZ_1M, nid);
+
+ if (reserved >= hugetlb_cma_size)
+ break;
+ }
+}
+
+void __init hugetlb_cma_check(void)
+{
+ if (!hugetlb_cma_size || cma_reserve_called)
+ return;
+
+ pr_warn("hugetlb_cma: the option isn't supported by current arch\n");
+}
+
+#endif /* CONFIG_CMA */