diff options
Diffstat (limited to 'arch/powerpc/mm/hugetlbpage.c')
-rw-r--r-- | arch/powerpc/mm/hugetlbpage.c | 274 |
1 files changed, 107 insertions, 167 deletions
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 73d4873fc7f8..594a4b7b2ca2 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -19,18 +19,19 @@ #include <linux/swap.h> #include <linux/swapops.h> #include <linux/kmemleak.h> -#include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/tlb.h> #include <asm/setup.h> #include <asm/hugetlb.h> #include <asm/pte-walk.h> +#include <asm/firmware.h> bool hugetlb_disabled = false; #define hugepd_none(hpd) (hpd_val(hpd) == 0) -#define PTE_T_ORDER (__builtin_ffs(sizeof(pte_t)) - __builtin_ffs(sizeof(void *))) +#define PTE_T_ORDER (__builtin_ffs(sizeof(pte_basic_t)) - \ + __builtin_ffs(sizeof(void *))) pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz) { @@ -53,9 +54,6 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, if (pshift >= pdshift) { cachep = PGT_CACHE(PTE_T_ORDER); num_hugepd = 1 << (pshift - pdshift); - } else if (IS_ENABLED(CONFIG_PPC_8xx)) { - cachep = PGT_CACHE(PTE_INDEX_SIZE); - num_hugepd = 1; } else { cachep = PGT_CACHE(pdshift - pshift); num_hugepd = 1; @@ -109,9 +107,11 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, * At this point we do the placement change only for BOOK3S 64. This would * possibly work on other subarchs. */ -pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) +pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long addr, unsigned long sz) { pgd_t *pg; + p4d_t *p4; pud_t *pu; pmd_t *pm; hugepd_t *hpdp = NULL; @@ -121,20 +121,21 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz addr &= ~(sz-1); pg = pgd_offset(mm, addr); + p4 = p4d_offset(pg, addr); #ifdef CONFIG_PPC_BOOK3S_64 if (pshift == PGDIR_SHIFT) /* 16GB huge page */ - return (pte_t *) pg; + return (pte_t *) p4; else if (pshift > PUD_SHIFT) { /* * We need to use hugepd table */ ptl = &mm->page_table_lock; - hpdp = (hugepd_t *)pg; + hpdp = (hugepd_t *)p4; } else { pdshift = PUD_SHIFT; - pu = pud_alloc(mm, pg, addr); + pu = pud_alloc(mm, p4, addr); if (!pu) return NULL; if (pshift == PUD_SHIFT) @@ -159,10 +160,10 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz #else if (pshift >= PGDIR_SHIFT) { ptl = &mm->page_table_lock; - hpdp = (hugepd_t *)pg; + hpdp = (hugepd_t *)p4; } else { pdshift = PUD_SHIFT; - pu = pud_alloc(mm, pg, addr); + pu = pud_alloc(mm, p4, addr); if (!pu) return NULL; if (pshift >= PUD_SHIFT) { @@ -181,6 +182,9 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz if (!hpdp) return NULL; + if (IS_ENABLED(CONFIG_PPC_8xx) && pshift < PMD_SHIFT) + return pte_alloc_huge(mm, (pmd_t *)hpdp, addr); + BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp)); if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, @@ -215,28 +219,33 @@ void __init pseries_add_gpage(u64 addr, u64 page_size, unsigned long number_of_p } } -int __init pseries_alloc_bootmem_huge_page(struct hstate *hstate) +static int __init pseries_alloc_bootmem_huge_page(struct hstate *hstate) { struct huge_bootmem_page *m; if (nr_gpages == 0) return 0; m = phys_to_virt(gpage_freearray[--nr_gpages]); gpage_freearray[nr_gpages] = 0; - list_add(&m->list, &huge_boot_pages); + list_add(&m->list, &huge_boot_pages[0]); m->hstate = hstate; return 1; } + +bool __init hugetlb_node_alloc_supported(void) +{ + return false; +} #endif -int __init alloc_bootmem_huge_page(struct hstate *h) +int __init alloc_bootmem_huge_page(struct hstate *h, int nid) { #ifdef CONFIG_PPC_BOOK3S_64 if (firmware_has_feature(FW_FEATURE_LPAR) && !radix_enabled()) return pseries_alloc_bootmem_huge_page(h); #endif - return __alloc_bootmem_huge_page(h); + return __alloc_bootmem_huge_page(h, nid); } #ifndef CONFIG_PPC_BOOK3S_64 @@ -246,7 +255,7 @@ int __init alloc_bootmem_huge_page(struct hstate *h) struct hugepd_freelist { struct rcu_head rcu; unsigned int index; - void *ptes[0]; + void *ptes[]; }; static DEFINE_PER_CPU(struct hugepd_freelist *, hugepd_freelist_cur); @@ -292,6 +301,21 @@ static void hugepd_free(struct mmu_gather *tlb, void *hugepte) static inline void hugepd_free(struct mmu_gather *tlb, void *hugepte) {} #endif +/* Return true when the entry to be freed maps more than the area being freed */ +static bool range_is_outside_limits(unsigned long start, unsigned long end, + unsigned long floor, unsigned long ceiling, + unsigned long mask) +{ + if ((start & mask) < floor) + return true; + if (ceiling) { + ceiling &= mask; + if (!ceiling) + return true; + } + return end - 1 > ceiling - 1; +} + static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift, unsigned long start, unsigned long end, unsigned long floor, unsigned long ceiling) @@ -307,15 +331,7 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif if (shift > pdshift) num_hugepd = 1 << (shift - pdshift); - start &= pdmask; - if (start < floor) - return; - if (ceiling) { - ceiling &= pdmask; - if (! ceiling) - return; - } - if (end - 1 > ceiling - 1) + if (range_is_outside_limits(start, end, floor, ceiling, pdmask)) return; for (i = 0; i < num_hugepd; i++, hpdp++) @@ -323,14 +339,25 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif if (shift >= pdshift) hugepd_free(tlb, hugepte); - else if (IS_ENABLED(CONFIG_PPC_8xx)) - pgtable_free_tlb(tlb, hugepte, - get_hugepd_cache_index(PTE_INDEX_SIZE)); else pgtable_free_tlb(tlb, hugepte, get_hugepd_cache_index(pdshift - shift)); } +static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd, + unsigned long addr, unsigned long end, + unsigned long floor, unsigned long ceiling) +{ + pgtable_t token = pmd_pgtable(*pmd); + + if (range_is_outside_limits(addr, end, floor, ceiling, PMD_MASK)) + return; + + pmd_clear(pmd); + pte_free_tlb(tlb, token, addr); + mm_dec_nr_ptes(tlb->mm); +} + static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling) @@ -346,11 +373,17 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, pmd = pmd_offset(pud, addr); next = pmd_addr_end(addr, end); if (!is_hugepd(__hugepd(pmd_val(*pmd)))) { + if (pmd_none_or_clear_bad(pmd)) + continue; + /* * if it is not hugepd pointer, we should already find * it cleared. */ - WARN_ON(!pmd_none_or_clear_bad(pmd)); + WARN_ON(!IS_ENABLED(CONFIG_PPC_8xx)); + + hugetlb_free_pte_range(tlb, pmd, addr, end, floor, ceiling); + continue; } /* @@ -359,7 +392,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, * single hugepage, but all of them point to * the same kmem cache that holds the hugepte. */ - more = addr + (1 << hugepd_shift(*(hugepd_t *)pmd)); + more = addr + (1UL << hugepd_shift(*(hugepd_t *)pmd)); if (more > next) next = more; @@ -367,24 +400,16 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, addr, next, floor, ceiling); } while (addr = next, addr != end); - start &= PUD_MASK; - if (start < floor) - return; - if (ceiling) { - ceiling &= PUD_MASK; - if (!ceiling) - return; - } - if (end - 1 > ceiling - 1) + if (range_is_outside_limits(start, end, floor, ceiling, PUD_MASK)) return; - pmd = pmd_offset(pud, start); + pmd = pmd_offset(pud, start & PUD_MASK); pud_clear(pud); - pmd_free_tlb(tlb, pmd, start); + pmd_free_tlb(tlb, pmd, start & PUD_MASK); mm_dec_nr_pmds(tlb->mm); } -static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, +static void hugetlb_free_pud_range(struct mmu_gather *tlb, p4d_t *p4d, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling) { @@ -394,7 +419,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, start = addr; do { - pud = pud_offset(pgd, addr); + pud = pud_offset(p4d, addr); next = pud_addr_end(addr, end); if (!is_hugepd(__hugepd(pud_val(*pud)))) { if (pud_none_or_clear_bad(pud)) @@ -409,7 +434,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, * single hugepage, but all of them point to * the same kmem cache that holds the hugepte. */ - more = addr + (1 << hugepd_shift(*(hugepd_t *)pud)); + more = addr + (1UL << hugepd_shift(*(hugepd_t *)pud)); if (more > next) next = more; @@ -418,20 +443,12 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, } } while (addr = next, addr != end); - start &= PGDIR_MASK; - if (start < floor) - return; - if (ceiling) { - ceiling &= PGDIR_MASK; - if (!ceiling) - return; - } - if (end - 1 > ceiling - 1) + if (range_is_outside_limits(start, end, floor, ceiling, PGDIR_MASK)) return; - pud = pud_offset(pgd, start); - pgd_clear(pgd); - pud_free_tlb(tlb, pud, start); + pud = pud_offset(p4d, start & PGDIR_MASK); + p4d_clear(p4d); + pud_free_tlb(tlb, pud, start & PGDIR_MASK); mm_dec_nr_puds(tlb->mm); } @@ -443,6 +460,7 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long floor, unsigned long ceiling) { pgd_t *pgd; + p4d_t *p4d; unsigned long next; /* @@ -465,10 +483,11 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, do { next = pgd_addr_end(addr, end); pgd = pgd_offset(tlb->mm, addr); + p4d = p4d_offset(pgd, addr); if (!is_hugepd(__hugepd(pgd_val(*pgd)))) { - if (pgd_none_or_clear_bad(pgd)) + if (p4d_none_or_clear_bad(p4d)) continue; - hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); + hugetlb_free_pud_range(tlb, p4d, addr, next, floor, ceiling); } else { unsigned long more; /* @@ -477,82 +496,17 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, * for a single hugepage, but all of them point to the * same kmem cache that holds the hugepte. */ - more = addr + (1 << hugepd_shift(*(hugepd_t *)pgd)); + more = addr + (1UL << hugepd_shift(*(hugepd_t *)pgd)); if (more > next) next = more; - free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT, + free_hugepd_range(tlb, (hugepd_t *)p4d, PGDIR_SHIFT, addr, next, floor, ceiling); } } while (addr = next, addr != end); } -struct page *follow_huge_pd(struct vm_area_struct *vma, - unsigned long address, hugepd_t hpd, - int flags, int pdshift) -{ - pte_t *ptep; - spinlock_t *ptl; - struct page *page = NULL; - unsigned long mask; - int shift = hugepd_shift(hpd); - struct mm_struct *mm = vma->vm_mm; - -retry: - /* - * hugepage directory entries are protected by mm->page_table_lock - * Use this instead of huge_pte_lockptr - */ - ptl = &mm->page_table_lock; - spin_lock(ptl); - - ptep = hugepte_offset(hpd, address, pdshift); - if (pte_present(*ptep)) { - mask = (1UL << shift) - 1; - page = pte_page(*ptep); - page += ((address & mask) >> PAGE_SHIFT); - if (flags & FOLL_GET) - get_page(page); - } else { - if (is_hugetlb_entry_migration(*ptep)) { - spin_unlock(ptl); - __migration_entry_wait(mm, ptep, ptl); - goto retry; - } - } - spin_unlock(ptl); - return page; -} - -#ifdef CONFIG_PPC_MM_SLICES -unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, - unsigned long len, unsigned long pgoff, - unsigned long flags) -{ - struct hstate *hstate = hstate_file(file); - int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate)); - -#ifdef CONFIG_PPC_RADIX_MMU - if (radix_enabled()) - return radix__hugetlb_get_unmapped_area(file, addr, len, - pgoff, flags); -#endif - return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1); -} -#endif - -unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) -{ - /* With radix we don't use slice, so derive it from vma*/ - if (IS_ENABLED(CONFIG_PPC_MM_SLICES) && !radix_enabled()) { - unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start); - - return 1UL << mmu_psize_to_shift(psize); - } - return vma_kernel_pagesize(vma); -} - -static int __init add_huge_page_size(unsigned long long size) +bool __init arch_hugetlb_valid_size(unsigned long size) { int shift = __ffs(size); int mmu_psize; @@ -560,37 +514,27 @@ static int __init add_huge_page_size(unsigned long long size) /* Check that it is a page size supported by the hardware and * that it fits within pagetable and slice limits. */ if (size <= PAGE_SIZE || !is_power_of_2(size)) - return -EINVAL; + return false; mmu_psize = check_and_get_huge_psize(shift); if (mmu_psize < 0) - return -EINVAL; + return false; BUG_ON(mmu_psize_defs[mmu_psize].shift != shift); - /* Return if huge page size has already been setup */ - if (size_to_hstate(size)) - return 0; - - hugetlb_add_hstate(shift - PAGE_SHIFT); - - return 0; + return true; } -static int __init hugepage_setup_sz(char *str) +static int __init add_huge_page_size(unsigned long long size) { - unsigned long long size; - - size = memparse(str, &str); + int shift = __ffs(size); - if (add_huge_page_size(size) != 0) { - hugetlb_bad_size(); - pr_err("Invalid huge page size specified(%llu)\n", size); - } + if (!arch_hugetlb_valid_size((unsigned long)size)) + return -EINVAL; - return 1; + hugetlb_add_hstate(shift - PAGE_SHIFT); + return 0; } -__setup("hugepagesz=", hugepage_setup_sz); static int __init hugetlbpage_init(void) { @@ -639,20 +583,18 @@ static int __init hugetlbpage_init(void) * if we have pdshift and shift value same, we don't * use pgt cache for hugepd. */ - if (pdshift > shift && IS_ENABLED(CONFIG_PPC_8xx)) - pgtable_cache_add(PTE_INDEX_SIZE); - else if (pdshift > shift) - pgtable_cache_add(pdshift - shift); - else if (IS_ENABLED(CONFIG_PPC_FSL_BOOK3E) || IS_ENABLED(CONFIG_PPC_8xx)) + if (pdshift > shift) { + if (!IS_ENABLED(CONFIG_PPC_8xx)) + pgtable_cache_add(pdshift - shift); + } else if (IS_ENABLED(CONFIG_PPC_E500) || + IS_ENABLED(CONFIG_PPC_8xx)) { pgtable_cache_add(PTE_T_ORDER); + } configured = true; } - if (configured) { - if (IS_ENABLED(CONFIG_HUGETLB_PAGE_SIZE_VARIABLE)) - hugetlbpage_init_default(); - } else + if (!configured) pr_info("Failed to initialize. Disabling HugeTLB"); return 0; @@ -660,20 +602,18 @@ static int __init hugetlbpage_init(void) arch_initcall(hugetlbpage_init); -void flush_dcache_icache_hugepage(struct page *page) +void __init gigantic_hugetlb_cma_reserve(void) { - int i; - void *start; + unsigned long order = 0; - BUG_ON(!PageCompound(page)); + if (radix_enabled()) + order = PUD_SHIFT - PAGE_SHIFT; + else if (!firmware_has_feature(FW_FEATURE_LPAR) && mmu_psize_defs[MMU_PAGE_16G].shift) + /* + * For pseries we do use ibm,expected#pages for reserving 16G pages. + */ + order = mmu_psize_to_shift(MMU_PAGE_16G) - PAGE_SHIFT; - for (i = 0; i < compound_nr(page); i++) { - if (!PageHighMem(page)) { - __flush_dcache_icache(page_address(page+i)); - } else { - start = kmap_atomic(page+i); - __flush_dcache_icache(start); - kunmap_atomic(start); - } - } + if (order) + hugetlb_cma_reserve(order); } |