From 658013e93eb70494f7300bc90457b09a807232a4 Mon Sep 17 00:00:00 2001 From: Jon Tollefson Date: Wed, 23 Jul 2008 21:27:54 -0700 Subject: powerpc: scan device tree for gigantic pages The 16G huge pages have to be reserved in the HMC prior to boot. The location of the pages are placed in the device tree. This patch adds code to scan the device tree during very early boot and save these page locations until hugetlbfs is ready for them. Acked-by: Adam Litke Signed-off-by: Jon Tollefson Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/mm/hash_utils_64.c | 44 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/mm/hash_utils_64.c') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 8d3b58ebd38e..ae4c717243a5 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -68,6 +68,7 @@ #define KB (1024) #define MB (1024*KB) +#define GB (1024L*MB) /* * Note: pte --> Linux PTE @@ -329,6 +330,44 @@ static int __init htab_dt_scan_page_sizes(unsigned long node, return 0; } +/* Scan for 16G memory blocks that have been set aside for huge pages + * and reserve those blocks for 16G huge pages. + */ +static int __init htab_dt_scan_hugepage_blocks(unsigned long node, + const char *uname, int depth, + void *data) { + char *type = of_get_flat_dt_prop(node, "device_type", NULL); + unsigned long *addr_prop; + u32 *page_count_prop; + unsigned int expected_pages; + long unsigned int phys_addr; + long unsigned int block_size; + + /* We are scanning "memory" nodes only */ + if (type == NULL || strcmp(type, "memory") != 0) + return 0; + + /* This property is the log base 2 of the number of virtual pages that + * will represent this memory block. */ + page_count_prop = of_get_flat_dt_prop(node, "ibm,expected#pages", NULL); + if (page_count_prop == NULL) + return 0; + expected_pages = (1 << page_count_prop[0]); + addr_prop = of_get_flat_dt_prop(node, "reg", NULL); + if (addr_prop == NULL) + return 0; + phys_addr = addr_prop[0]; + block_size = addr_prop[1]; + if (block_size != (16 * GB)) + return 0; + printk(KERN_INFO "Huge page(16GB) memory: " + "addr = 0x%lX size = 0x%lX pages = %d\n", + phys_addr, block_size, expected_pages); + lmb_reserve(phys_addr, block_size * expected_pages); + add_gpage(phys_addr, block_size, expected_pages); + return 0; +} + static void __init htab_init_page_sizes(void) { int rc; @@ -418,7 +457,10 @@ static void __init htab_init_page_sizes(void) ); #ifdef CONFIG_HUGETLB_PAGE - /* Init large page size. Currently, we pick 16M or 1M depending + /* Reserve 16G huge page memory sections for huge pages */ + of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL); + +/* Init large page size. Currently, we pick 16M or 1M depending * on what is available */ if (mmu_psize_defs[MMU_PAGE_16M].shift) -- cgit From 0d9ea75443dc7e37843e656b8ebc947a6d16d618 Mon Sep 17 00:00:00 2001 From: Jon Tollefson Date: Wed, 23 Jul 2008 21:27:56 -0700 Subject: powerpc: support multiple hugepage sizes Instead of using the variable mmu_huge_psize to keep track of the huge page size we use an array of MMU_PAGE_* values. For each supported huge page size we need to know the hugepte_shift value and have a pgtable_cache. The hstate or an mmu_huge_psizes index is passed to functions so that they know which huge page size they should use. The hugepage sizes 16M and 64K are setup(if available on the hardware) so that they don't have to be set on the boot cmd line in order to use them. The number of 16G pages have to be specified at boot-time though (e.g. hugepagesz=16G hugepages=5). Signed-off-by: Jon Tollefson Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/mm/hash_utils_64.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/mm/hash_utils_64.c') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index ae4c717243a5..5ce5a4dcd008 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -103,7 +103,6 @@ int mmu_kernel_ssize = MMU_SEGSIZE_256M; int mmu_highuser_ssize = MMU_SEGSIZE_256M; u16 mmu_slb_size = 64; #ifdef CONFIG_HUGETLB_PAGE -int mmu_huge_psize = MMU_PAGE_16M; unsigned int HPAGE_SHIFT; #endif #ifdef CONFIG_PPC_64K_PAGES @@ -460,15 +459,15 @@ static void __init htab_init_page_sizes(void) /* Reserve 16G huge page memory sections for huge pages */ of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL); -/* Init large page size. Currently, we pick 16M or 1M depending +/* Set default large page size. Currently, we pick 16M or 1M depending * on what is available */ if (mmu_psize_defs[MMU_PAGE_16M].shift) - set_huge_psize(MMU_PAGE_16M); + HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_16M].shift; /* With 4k/4level pagetables, we can't (for now) cope with a * huge page size < PMD_SIZE */ else if (mmu_psize_defs[MMU_PAGE_1M].shift) - set_huge_psize(MMU_PAGE_1M); + HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_1M].shift; #endif /* CONFIG_HUGETLB_PAGE */ } @@ -889,7 +888,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) #ifdef CONFIG_HUGETLB_PAGE /* Handle hugepage regions */ - if (HPAGE_SHIFT && psize == mmu_huge_psize) { + if (HPAGE_SHIFT && mmu_huge_psizes[psize]) { DBG_LOW(" -> huge page !\n"); return hash_huge_page(mm, access, ea, vsid, local, trap); } -- cgit