From 69111bac42f5ceacdd22e30947837ceb2c4493ed Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 21 Oct 2014 15:23:25 -0500 Subject: powerpc: Replace __get_cpu_var uses This still has not been merged and now powerpc is the only arch that does not have this change. Sorry about missing linuxppc-dev before. V2->V2 - Fix up to work against 3.18-rc1 __get_cpu_var() is used for multiple purposes in the kernel source. One of them is address calculation via the form &__get_cpu_var(x). This calculates the address for the instance of the percpu variable of the current processor based on an offset. Other use cases are for storing and retrieving data from the current processors percpu area. __get_cpu_var() can be used as an lvalue when writing data or on the right side of an assignment. __get_cpu_var() is defined as : __get_cpu_var() always only does an address determination. However, store and retrieve operations could use a segment prefix (or global register on other platforms) to avoid the address calculation. this_cpu_write() and this_cpu_read() can directly take an offset into a percpu area and use optimized assembly code to read and write per cpu variables. This patch converts __get_cpu_var into either an explicit address calculation using this_cpu_ptr() or into a use of this_cpu operations that use the offset. Thereby address calculations are avoided and less registers are used when code is generated. At the end of the patch set all uses of __get_cpu_var have been removed so the macro is removed too. The patch set includes passes over all arches as well. Once these operations are used throughout then specialized macros can be defined in non -x86 arches as well in order to optimize per cpu access by f.e. using a global register that may be set to the per cpu base. Transformations done to __get_cpu_var() 1. Determine the address of the percpu instance of the current processor. DEFINE_PER_CPU(int, y); int *x = &__get_cpu_var(y); Converts to int *x = this_cpu_ptr(&y); 2. Same as #1 but this time an array structure is involved. DEFINE_PER_CPU(int, y[20]); int *x = __get_cpu_var(y); Converts to int *x = this_cpu_ptr(y); 3. Retrieve the content of the current processors instance of a per cpu variable. DEFINE_PER_CPU(int, y); int x = __get_cpu_var(y) Converts to int x = __this_cpu_read(y); 4. Retrieve the content of a percpu struct DEFINE_PER_CPU(struct mystruct, y); struct mystruct x = __get_cpu_var(y); Converts to memcpy(&x, this_cpu_ptr(&y), sizeof(x)); 5. Assignment to a per cpu variable DEFINE_PER_CPU(int, y) __get_cpu_var(y) = x; Converts to __this_cpu_write(y, x); 6. Increment/Decrement etc of a per cpu variable DEFINE_PER_CPU(int, y); __get_cpu_var(y)++ Converts to __this_cpu_inc(y) Cc: Benjamin Herrenschmidt CC: Paul Mackerras Signed-off-by: Christoph Lameter [mpe: Fix build errors caused by set/or_softirq_pending(), and rework assignment in __set_breakpoint() to use memcpy().] Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_native_64.c | 2 +- arch/powerpc/mm/hash_utils_64.c | 2 +- arch/powerpc/mm/hugetlbpage-book3e.c | 6 +++--- arch/powerpc/mm/hugetlbpage.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index ae4962a06476..d53288a08c37 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -629,7 +629,7 @@ static void native_flush_hash_range(unsigned long number, int local) unsigned long want_v; unsigned long flags; real_pte_t pte; - struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); + struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); unsigned long psize = batch->psize; int ssize = batch->ssize; int i; diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index d5339a3b9945..f01027731e23 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1322,7 +1322,7 @@ void flush_hash_range(unsigned long number, int local) else { int i; struct ppc64_tlb_batch *batch = - &__get_cpu_var(ppc64_tlb_batch); + this_cpu_ptr(&ppc64_tlb_batch); for (i = 0; i < number; i++) flush_hash_page(batch->vpn[i], batch->pte[i], diff --git a/arch/powerpc/mm/hugetlbpage-book3e.c b/arch/powerpc/mm/hugetlbpage-book3e.c index 5e4ee2573903..ba47aaf33a4b 100644 --- a/arch/powerpc/mm/hugetlbpage-book3e.c +++ b/arch/powerpc/mm/hugetlbpage-book3e.c @@ -33,13 +33,13 @@ static inline int tlb1_next(void) ncams = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY; - index = __get_cpu_var(next_tlbcam_idx); + index = this_cpu_read(next_tlbcam_idx); /* Just round-robin the entries and wrap when we hit the end */ if (unlikely(index == ncams - 1)) - __get_cpu_var(next_tlbcam_idx) = tlbcam_index; + __this_cpu_write(next_tlbcam_idx, tlbcam_index); else - __get_cpu_var(next_tlbcam_idx)++; + __this_cpu_inc(next_tlbcam_idx); return index; } diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 7e70ae968e5f..8aa04f03fd31 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -462,7 +462,7 @@ static void hugepd_free(struct mmu_gather *tlb, void *hugepte) { struct hugepd_freelist **batchp; - batchp = &get_cpu_var(hugepd_freelist_cur); + batchp = this_cpu_ptr(&hugepd_freelist_cur); if (atomic_read(&tlb->mm->mm_users) < 2 || cpumask_equal(mm_cpumask(tlb->mm), -- cgit From 16d0f5c4af76b0c3424290937bf1ac22adf439b1 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 14 Oct 2014 22:17:47 +1100 Subject: powerpc: Remove ppc_md.remove_memory We have an extra level of indirection on memory hot remove which is not matched on memory hot add. Memory hotplug is book3s only, so there is no need for it. This also enables means remove_memory() (ie memory hot unplug) works on powernv. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/mm/mem.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 8ebaac75c940..2add0b7b9f6d 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -144,8 +145,17 @@ int arch_remove_memory(u64 start, u64 size) zone = page_zone(pfn_to_page(start_pfn)); ret = __remove_pages(zone, start_pfn, nr_pages); - if (!ret && (ppc_md.remove_memory)) - ret = ppc_md.remove_memory(start, size); + if (ret) + return ret; + + /* Remove htab bolted mappings for this section of memory */ + start = (unsigned long)__va(start); + ret = remove_section_mapping(start, start + size); + + /* Ensure all vmalloc mappings are flushed in case they also + * hit that section of memory + */ + vm_unmap_aliases(); return ret; } -- cgit From c51a6821bdbc9068adda93b3b8ee65df8e4642a6 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Fri, 19 Sep 2014 10:36:10 +0200 Subject: powerpc/8xx: Invalidate non present TLB as early as possible 8xx sometimes need to load a invalid/non-present TLBs in it DTLB asm handler. These must be invalidated separaly as linux mm doesn't. Commit 5efab4a02c89c252fb4cce097aafde5f8208dbfe was invalidating them in arch/powerpc/mm/fault.c. This patch does the invalidation earlier in order to free the TLB as soon as possible. This also has the advantage of removing some 8xx specific code from fault.c Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/mm/fault.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 08d659a9fcdb..eb79907f34fa 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -43,7 +43,6 @@ #include #include #include -#include #include "icswx.h" @@ -380,12 +379,6 @@ good_area: goto bad_area; #endif /* CONFIG_6xx */ #if defined(CONFIG_8xx) - /* 8xx sometimes need to load a invalid/non-present TLBs. - * These must be invalidated separately as linux mm don't. - */ - if (error_code & 0x40000000) /* no translation? */ - _tlbil_va(address, 0, 0, 0); - /* The MPC8xx seems to always set 0x80000000, which is * "undefined". Of those that can be set, this is the only * one which seems bad. -- cgit From 10239733ee8617bac3f1c1769af43a88ed979324 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 17 Sep 2014 22:15:33 +1000 Subject: powerpc: Remove bootmem allocator At the moment we transition from the memblock alloctor to the bootmem allocator. Gitting rid of the bootmem allocator removes a bunch of complicated code (most of which I owe the dubious honour of being responsible for writing). Signed-off-by: Anton Blanchard Tested-by: Emil Medve Signed-off-by: Michael Ellerman --- arch/powerpc/mm/init_32.c | 9 -- arch/powerpc/mm/mem.c | 62 +----------- arch/powerpc/mm/numa.c | 224 +++++++------------------------------------ arch/powerpc/mm/pgtable_32.c | 3 +- arch/powerpc/mm/pgtable_64.c | 6 +- 5 files changed, 38 insertions(+), 266 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 415a51b028b9..3b3100433c18 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -195,15 +195,6 @@ void __init MMU_init(void) memblock_set_current_limit(lowmem_end_addr); } -/* This is only called until mem_init is done. */ -void __init *early_get_page(void) -{ - if (init_bootmem_done) - return alloc_bootmem_pages(PAGE_SIZE); - else - return __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE)); -} - #ifdef CONFIG_8xx /* No 8xx specific .c file to put that in ... */ void setup_initial_memory_limit(phys_addr_t first_memblock_base, phys_addr_t first_memblock_size) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 2add0b7b9f6d..e076d19c095b 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -61,7 +61,6 @@ #define CPU_FTR_NOEXECUTE 0 #endif -int init_bootmem_done; int mem_init_done; unsigned long long memory_limit; @@ -190,70 +189,22 @@ walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, } EXPORT_SYMBOL_GPL(walk_system_ram_range); -/* - * Initialize the bootmem system and give it all the memory we - * have available. If we are using highmem, we only put the - * lowmem into the bootmem system. - */ #ifndef CONFIG_NEED_MULTIPLE_NODES -void __init do_init_bootmem(void) +void __init initmem_init(void) { - unsigned long start, bootmap_pages; - unsigned long total_pages; - struct memblock_region *reg; - int boot_mapsize; - max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; - total_pages = (memblock_end_of_DRAM() - memstart_addr) >> PAGE_SHIFT; + min_low_pfn = MEMORY_START >> PAGE_SHIFT; #ifdef CONFIG_HIGHMEM - total_pages = total_lowmem >> PAGE_SHIFT; max_low_pfn = lowmem_end_addr >> PAGE_SHIFT; #endif - /* - * Find an area to use for the bootmem bitmap. Calculate the size of - * bitmap required as (Total Memory) / PAGE_SIZE / BITS_PER_BYTE. - * Add 1 additional page in case the address isn't page-aligned. - */ - bootmap_pages = bootmem_bootmap_pages(total_pages); - - start = memblock_alloc(bootmap_pages << PAGE_SHIFT, PAGE_SIZE); - - min_low_pfn = MEMORY_START >> PAGE_SHIFT; - boot_mapsize = init_bootmem_node(NODE_DATA(0), start >> PAGE_SHIFT, min_low_pfn, max_low_pfn); - /* Place all memblock_regions in the same node and merge contiguous * memblock_regions */ memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0); - /* Add all physical memory to the bootmem map, mark each area - * present. - */ -#ifdef CONFIG_HIGHMEM - free_bootmem_with_active_regions(0, lowmem_end_addr >> PAGE_SHIFT); - - /* reserve the sections we're already using */ - for_each_memblock(reserved, reg) { - unsigned long top = reg->base + reg->size - 1; - if (top < lowmem_end_addr) - reserve_bootmem(reg->base, reg->size, BOOTMEM_DEFAULT); - else if (reg->base < lowmem_end_addr) { - unsigned long trunc_size = lowmem_end_addr - reg->base; - reserve_bootmem(reg->base, trunc_size, BOOTMEM_DEFAULT); - } - } -#else - free_bootmem_with_active_regions(0, max_pfn); - - /* reserve the sections we're already using */ - for_each_memblock(reserved, reg) - reserve_bootmem(reg->base, reg->size, BOOTMEM_DEFAULT); -#endif /* XXX need to clip this if using highmem? */ sparse_memory_present_with_active_regions(0); - - init_bootmem_done = 1; } /* mark pages that don't exist as nosave */ @@ -369,14 +320,6 @@ void __init paging_init(void) mark_nonram_nosave(); } -static void __init register_page_bootmem_info(void) -{ - int i; - - for_each_online_node(i) - register_page_bootmem_info_node(NODE_DATA(i)); -} - void __init mem_init(void) { /* @@ -389,7 +332,6 @@ void __init mem_init(void) swiotlb_init(0); #endif - register_page_bootmem_info(); high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); set_max_mapnr(max_pfn); free_all_bootmem(); diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index b9d1dfdbe5bb..2e9ad2d76b75 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -134,28 +134,6 @@ static int __init fake_numa_create_new_node(unsigned long end_pfn, return 0; } -/* - * get_node_active_region - Return active region containing pfn - * Active range returned is empty if none found. - * @pfn: The page to return the region for - * @node_ar: Returned set to the active region containing @pfn - */ -static void __init get_node_active_region(unsigned long pfn, - struct node_active_region *node_ar) -{ - unsigned long start_pfn, end_pfn; - int i, nid; - - for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) { - if (pfn >= start_pfn && pfn < end_pfn) { - node_ar->nid = nid; - node_ar->start_pfn = start_pfn; - node_ar->end_pfn = end_pfn; - break; - } - } -} - static void reset_numa_cpu_lookup_table(void) { unsigned int cpu; @@ -928,134 +906,48 @@ static void __init dump_numa_memory_topology(void) } } -/* - * Allocate some memory, satisfying the memblock or bootmem allocator where - * required. nid is the preferred node and end is the physical address of - * the highest address in the node. - * - * Returns the virtual address of the memory. - */ -static void __init *careful_zallocation(int nid, unsigned long size, - unsigned long align, - unsigned long end_pfn) -{ - void *ret; - int new_nid; - unsigned long ret_paddr; - - ret_paddr = __memblock_alloc_base(size, align, end_pfn << PAGE_SHIFT); - - /* retry over all memory */ - if (!ret_paddr) - ret_paddr = __memblock_alloc_base(size, align, memblock_end_of_DRAM()); - - if (!ret_paddr) - panic("numa.c: cannot allocate %lu bytes for node %d", - size, nid); - - ret = __va(ret_paddr); - - /* - * We initialize the nodes in numeric order: 0, 1, 2... - * and hand over control from the MEMBLOCK allocator to the - * bootmem allocator. If this function is called for - * node 5, then we know that all nodes <5 are using the - * bootmem allocator instead of the MEMBLOCK allocator. - * - * So, check the nid from which this allocation came - * and double check to see if we need to use bootmem - * instead of the MEMBLOCK. We don't free the MEMBLOCK memory - * since it would be useless. - */ - new_nid = early_pfn_to_nid(ret_paddr >> PAGE_SHIFT); - if (new_nid < nid) { - ret = __alloc_bootmem_node(NODE_DATA(new_nid), - size, align, 0); - - dbg("alloc_bootmem %p %lx\n", ret, size); - } - - memset(ret, 0, size); - return ret; -} - static struct notifier_block ppc64_numa_nb = { .notifier_call = cpu_numa_callback, .priority = 1 /* Must run before sched domains notifier. */ }; -static void __init mark_reserved_regions_for_nid(int nid) +/* Initialize NODE_DATA for a node on the local memory */ +static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn) { - struct pglist_data *node = NODE_DATA(nid); - struct memblock_region *reg; - - for_each_memblock(reserved, reg) { - unsigned long physbase = reg->base; - unsigned long size = reg->size; - unsigned long start_pfn = physbase >> PAGE_SHIFT; - unsigned long end_pfn = PFN_UP(physbase + size); - struct node_active_region node_ar; - unsigned long node_end_pfn = pgdat_end_pfn(node); - - /* - * Check to make sure that this memblock.reserved area is - * within the bounds of the node that we care about. - * Checking the nid of the start and end points is not - * sufficient because the reserved area could span the - * entire node. - */ - if (end_pfn <= node->node_start_pfn || - start_pfn >= node_end_pfn) - continue; - - get_node_active_region(start_pfn, &node_ar); - while (start_pfn < end_pfn && - node_ar.start_pfn < node_ar.end_pfn) { - unsigned long reserve_size = size; - /* - * if reserved region extends past active region - * then trim size to active region - */ - if (end_pfn > node_ar.end_pfn) - reserve_size = (node_ar.end_pfn << PAGE_SHIFT) - - physbase; - /* - * Only worry about *this* node, others may not - * yet have valid NODE_DATA(). - */ - if (node_ar.nid == nid) { - dbg("reserve_bootmem %lx %lx nid=%d\n", - physbase, reserve_size, node_ar.nid); - reserve_bootmem_node(NODE_DATA(node_ar.nid), - physbase, reserve_size, - BOOTMEM_DEFAULT); - } - /* - * if reserved region is contained in the active region - * then done. - */ - if (end_pfn <= node_ar.end_pfn) - break; - - /* - * reserved region extends past the active region - * get next active region that contains this - * reserved region - */ - start_pfn = node_ar.end_pfn; - physbase = start_pfn << PAGE_SHIFT; - size = size - reserve_size; - get_node_active_region(start_pfn, &node_ar); - } - } + u64 spanned_pages = end_pfn - start_pfn; + const size_t nd_size = roundup(sizeof(pg_data_t), SMP_CACHE_BYTES); + u64 nd_pa; + void *nd; + int tnid; + + if (spanned_pages) + pr_info("Initmem setup node %d [mem %#010Lx-%#010Lx]\n", + nid, start_pfn << PAGE_SHIFT, + (end_pfn << PAGE_SHIFT) - 1); + else + pr_info("Initmem setup node %d\n", nid); + + nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid); + nd = __va(nd_pa); + + /* report and initialize */ + pr_info(" NODE_DATA [mem %#010Lx-%#010Lx]\n", + nd_pa, nd_pa + nd_size - 1); + tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); + if (tnid != nid) + pr_info(" NODE_DATA(%d) on node %d\n", nid, tnid); + + node_data[nid] = nd; + memset(NODE_DATA(nid), 0, sizeof(pg_data_t)); + NODE_DATA(nid)->node_id = nid; + NODE_DATA(nid)->node_start_pfn = start_pfn; + NODE_DATA(nid)->node_spanned_pages = spanned_pages; } - -void __init do_init_bootmem(void) +void __init initmem_init(void) { int nid, cpu; - min_low_pfn = 0; max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; max_pfn = max_low_pfn; @@ -1064,64 +956,16 @@ void __init do_init_bootmem(void) else dump_numa_memory_topology(); + memblock_dump_all(); + for_each_online_node(nid) { unsigned long start_pfn, end_pfn; - void *bootmem_vaddr; - unsigned long bootmap_pages; get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); - - /* - * Allocate the node structure node local if possible - * - * Be careful moving this around, as it relies on all - * previous nodes' bootmem to be initialized and have - * all reserved areas marked. - */ - NODE_DATA(nid) = careful_zallocation(nid, - sizeof(struct pglist_data), - SMP_CACHE_BYTES, end_pfn); - - dbg("node %d\n", nid); - dbg("NODE_DATA() = %p\n", NODE_DATA(nid)); - - NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; - NODE_DATA(nid)->node_start_pfn = start_pfn; - NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn; - - if (NODE_DATA(nid)->node_spanned_pages == 0) - continue; - - dbg("start_paddr = %lx\n", start_pfn << PAGE_SHIFT); - dbg("end_paddr = %lx\n", end_pfn << PAGE_SHIFT); - - bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); - bootmem_vaddr = careful_zallocation(nid, - bootmap_pages << PAGE_SHIFT, - PAGE_SIZE, end_pfn); - - dbg("bootmap_vaddr = %p\n", bootmem_vaddr); - - init_bootmem_node(NODE_DATA(nid), - __pa(bootmem_vaddr) >> PAGE_SHIFT, - start_pfn, end_pfn); - - free_bootmem_with_active_regions(nid, end_pfn); - /* - * Be very careful about moving this around. Future - * calls to careful_zallocation() depend on this getting - * done correctly. - */ - mark_reserved_regions_for_nid(nid); + setup_node_data(nid, start_pfn, end_pfn); sparse_memory_present_with_active_regions(nid); } - init_bootmem_done = 1; - - /* - * Now bootmem is initialised we can create the node to cpumask - * lookup tables and setup the cpu callback to populate them. - */ setup_node_to_cpumask_map(); reset_numa_cpu_lookup_table(); diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index cf11342bf519..d545b1231594 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -100,12 +100,11 @@ __init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long add { pte_t *pte; extern int mem_init_done; - extern void *early_get_page(void); if (mem_init_done) { pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); } else { - pte = (pte_t *)early_get_page(); + pte = __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE)); if (pte) clear_page(pte); } diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index c8d709ab489d..cdb19ab859d3 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -75,11 +75,7 @@ static __ref void *early_alloc_pgtable(unsigned long size) { void *pt; - if (init_bootmem_done) - pt = __alloc_bootmem(size, size, __pa(MAX_DMA_ADDRESS)); - else - pt = __va(memblock_alloc_base(size, size, - __pa(MAX_DMA_ADDRESS))); + pt = __va(memblock_alloc_base(size, size, __pa(MAX_DMA_ADDRESS))); memset(pt, 0, size); return pt; -- cgit From 14ed740957704e8768523899e0fa31972577bf65 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 17 Sep 2014 22:15:34 +1000 Subject: powerpc: Remove some old bootmem related comments Now bootmem is gone from powerpc we can remove comments mentioning it. Signed-off-by: Anton Blanchard Tested-by: Emil Medve Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hugetlbpage.c | 4 ++-- arch/powerpc/mm/pgtable_64.c | 4 ---- 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 8aa04f03fd31..b460e723f0ec 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -276,7 +276,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz #ifdef CONFIG_PPC_FSL_BOOK3E /* Build list of addresses of gigantic pages. This function is used in early - * boot before the buddy or bootmem allocator is setup. + * boot before the buddy allocator is setup. */ void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages) { @@ -399,7 +399,7 @@ void __init reserve_hugetlb_gpages(void) #else /* !PPC_FSL_BOOK3E */ /* Build list of addresses of gigantic pages. This function is used in early - * boot before the buddy or bootmem allocator is setup. + * boot before the buddy allocator is setup. */ void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages) { diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index cdb19ab859d3..aa9173745cf4 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -109,10 +109,6 @@ int map_kernel_page(unsigned long ea, unsigned long pa, int flags) __pgprot(flags))); } else { #ifdef CONFIG_PPC_MMU_NOHASH - /* Warning ! This will blow up if bootmem is not initialized - * which our ppc64 code is keen to do that, we'll need to - * fix it and/or be more careful - */ pgdp = pgd_offset_k(ea); #ifdef PUD_TABLE_SIZE if (pgd_none(*pgdp)) { -- cgit From 68cf0d642f62267b960f947370539ff3582c4935 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 17 Sep 2014 22:15:35 +1000 Subject: powerpc: Remove superfluous bootmem includes Lots of places included bootmem.h even when not using bootmem. Signed-off-by: Anton Blanchard Tested-by: Emil Medve Signed-off-by: Michael Ellerman --- arch/powerpc/mm/init_32.c | 1 - arch/powerpc/mm/init_64.c | 1 - arch/powerpc/mm/pgtable_64.c | 2 +- 3 files changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 3b3100433c18..a10be665b645 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 3481556a1880..10471f9bb63f 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index aa9173745cf4..e0c718543174 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include @@ -51,6 +50,7 @@ #include #include #include +#include #include "mmu_decl.h" -- cgit From 21098b9e07476deb3f40acd7e51cffbffb4ef865 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 17 Sep 2014 22:15:36 +1000 Subject: powerpc: Move sparse_init() into initmem_init We did part of sparse initialisation in setup_arch and part in initmem_init. Put them together. Signed-off-by: Anton Blanchard Tested-by: Emil Medve Signed-off-by: Michael Ellerman --- arch/powerpc/mm/mem.c | 1 + arch/powerpc/mm/numa.c | 2 ++ 2 files changed, 3 insertions(+) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index e076d19c095b..b7285a5870f8 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -205,6 +205,7 @@ void __init initmem_init(void) /* XXX need to clip this if using highmem? */ sparse_memory_present_with_active_regions(0); + sparse_init(); } /* mark pages that don't exist as nosave */ diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 2e9ad2d76b75..417b0a523a47 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -966,6 +966,8 @@ void __init initmem_init(void) sparse_memory_present_with_active_regions(nid); } + sparse_init(); + setup_node_to_cpumask_map(); reset_numa_cpu_lookup_table(); -- cgit From d1d5304fcbc519f5fb12cbcca9c4fdd1d94447d7 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 12 Nov 2014 13:29:28 +1100 Subject: powerpc/mm: Use PAGE_FACTOR PAGE_FACTOR was defined to reflect the difference between configured page size and fixed 4KB page size. Replace (PAGE_SHIFT - HW_PAGE_SHIFT) with PAGE_FACTOR. Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_low_64.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S index 057cbbb4c576..5094f32b706e 100644 --- a/arch/powerpc/mm/hash_low_64.S +++ b/arch/powerpc/mm/hash_low_64.S @@ -514,7 +514,7 @@ htab_insert_pte: andis. r0,r31,_PAGE_4K_PFN@h srdi r5,r31,PTE_RPN_SHIFT bne- htab_special_pfn - sldi r5,r5,PAGE_SHIFT-HW_PAGE_SHIFT + sldi r5,r5,PAGE_FACTOR add r5,r5,r25 htab_special_pfn: sldi r5,r5,HW_PAGE_SHIFT @@ -544,7 +544,7 @@ htab_call_hpte_insert1: andis. r0,r31,_PAGE_4K_PFN@h srdi r5,r31,PTE_RPN_SHIFT bne- 3f - sldi r5,r5,PAGE_SHIFT-HW_PAGE_SHIFT + sldi r5,r5,PAGE_FACTOR add r5,r5,r25 3: sldi r5,r5,HW_PAGE_SHIFT -- cgit From 06743521d0eae1263a09bccb1a92a9fbb94660b3 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 5 Nov 2014 21:57:39 +0530 Subject: powerpc/mm: Add missing pmd accessors This patch add documentation and missing accessors. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hugetlbpage.c | 3 +++ arch/powerpc/mm/pgtable_64.c | 22 +++++++++++++++++++--- 2 files changed, 22 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index b460e723f0ec..2b8e5ed28831 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -62,6 +62,9 @@ static unsigned nr_gpages; /* * We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can have * 16GB hugepage pte in PGD and 16MB hugepage pte at PMD; + * + * Defined in such a way that we can optimize away code block at build time + * if CONFIG_HUGETLB_PAGE=n. */ int pmd_huge(pmd_t pmd) { diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index e0c718543174..87ff0c1908a9 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -344,16 +345,31 @@ EXPORT_SYMBOL(iounmap); EXPORT_SYMBOL(__iounmap); EXPORT_SYMBOL(__iounmap_at); +#ifndef __PAGETABLE_PUD_FOLDED +/* 4 level page table */ +struct page *pgd_page(pgd_t pgd) +{ + if (pgd_huge(pgd)) + return pte_page(pgd_pte(pgd)); + return virt_to_page(pgd_page_vaddr(pgd)); +} +#endif + +struct page *pud_page(pud_t pud) +{ + if (pud_huge(pud)) + return pte_page(pud_pte(pud)); + return virt_to_page(pud_page_vaddr(pud)); +} + /* * For hugepage we have pfn in the pmd, we use PTE_RPN_SHIFT bits for flags * For PTE page, we have a PTE_FRAG_SIZE (4K) aligned virtual address. */ struct page *pmd_page(pmd_t pmd) { -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - if (pmd_trans_huge(pmd)) + if (pmd_trans_huge(pmd) || pmd_huge(pmd)) return pfn_to_page(pmd_pfn(pmd)); -#endif return virt_to_page(pmd_page_vaddr(pmd)); } -- cgit From b30e759072c182538abb6908681cfd49978ba5e2 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 5 Nov 2014 21:57:41 +0530 Subject: powerpc/mm: Switch to generic RCU get_user_pages_fast This patch switch the ppc arch to use the generic RCU based gup implementation. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/mm/Makefile | 2 +- arch/powerpc/mm/gup.c | 235 ------------------------------------------ arch/powerpc/mm/hugetlbpage.c | 33 +++--- 3 files changed, 13 insertions(+), 257 deletions(-) delete mode 100644 arch/powerpc/mm/gup.c (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 325e861616a1..438dcd3fd0d1 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -6,7 +6,7 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) -obj-y := fault.o mem.o pgtable.o gup.o mmap.o \ +obj-y := fault.o mem.o pgtable.o mmap.o \ init_$(CONFIG_WORD_SIZE).o \ pgtable_$(CONFIG_WORD_SIZE).o obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \ diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c deleted file mode 100644 index d8746684f606..000000000000 --- a/arch/powerpc/mm/gup.c +++ /dev/null @@ -1,235 +0,0 @@ -/* - * Lockless get_user_pages_fast for powerpc - * - * Copyright (C) 2008 Nick Piggin - * Copyright (C) 2008 Novell Inc. - */ -#undef DEBUG - -#include -#include -#include -#include -#include -#include -#include - -#ifdef __HAVE_ARCH_PTE_SPECIAL - -/* - * The performance critical leaf functions are made noinline otherwise gcc - * inlines everything into a single function which results in too much - * register pressure. - */ -static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr) -{ - unsigned long mask, result; - pte_t *ptep; - - result = _PAGE_PRESENT|_PAGE_USER; - if (write) - result |= _PAGE_RW; - mask = result | _PAGE_SPECIAL; - - ptep = pte_offset_kernel(&pmd, addr); - do { - pte_t pte = ACCESS_ONCE(*ptep); - struct page *page; - /* - * Similar to the PMD case, NUMA hinting must take slow path - */ - if (pte_numa(pte)) - return 0; - - if ((pte_val(pte) & mask) != result) - return 0; - VM_BUG_ON(!pfn_valid(pte_pfn(pte))); - page = pte_page(pte); - if (!page_cache_get_speculative(page)) - return 0; - if (unlikely(pte_val(pte) != pte_val(*ptep))) { - put_page(page); - return 0; - } - pages[*nr] = page; - (*nr)++; - - } while (ptep++, addr += PAGE_SIZE, addr != end); - - return 1; -} - -static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) -{ - unsigned long next; - pmd_t *pmdp; - - pmdp = pmd_offset(&pud, addr); - do { - pmd_t pmd = ACCESS_ONCE(*pmdp); - - next = pmd_addr_end(addr, end); - /* - * If we find a splitting transparent hugepage we - * return zero. That will result in taking the slow - * path which will call wait_split_huge_page() - * if the pmd is still in splitting state - */ - if (pmd_none(pmd) || pmd_trans_splitting(pmd)) - return 0; - if (pmd_huge(pmd) || pmd_large(pmd)) { - /* - * NUMA hinting faults need to be handled in the GUP - * slowpath for accounting purposes and so that they - * can be serialised against THP migration. - */ - if (pmd_numa(pmd)) - return 0; - - if (!gup_hugepte((pte_t *)pmdp, PMD_SIZE, addr, next, - write, pages, nr)) - return 0; - } else if (is_hugepd(pmdp)) { - if (!gup_hugepd((hugepd_t *)pmdp, PMD_SHIFT, - addr, next, write, pages, nr)) - return 0; - } else if (!gup_pte_range(pmd, addr, next, write, pages, nr)) - return 0; - } while (pmdp++, addr = next, addr != end); - - return 1; -} - -static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) -{ - unsigned long next; - pud_t *pudp; - - pudp = pud_offset(&pgd, addr); - do { - pud_t pud = ACCESS_ONCE(*pudp); - - next = pud_addr_end(addr, end); - if (pud_none(pud)) - return 0; - if (pud_huge(pud)) { - if (!gup_hugepte((pte_t *)pudp, PUD_SIZE, addr, next, - write, pages, nr)) - return 0; - } else if (is_hugepd(pudp)) { - if (!gup_hugepd((hugepd_t *)pudp, PUD_SHIFT, - addr, next, write, pages, nr)) - return 0; - } else if (!gup_pmd_range(pud, addr, next, write, pages, nr)) - return 0; - } while (pudp++, addr = next, addr != end); - - return 1; -} - -int __get_user_pages_fast(unsigned long start, int nr_pages, int write, - struct page **pages) -{ - struct mm_struct *mm = current->mm; - unsigned long addr, len, end; - unsigned long next; - unsigned long flags; - pgd_t *pgdp; - int nr = 0; - - pr_devel("%s(%lx,%x,%s)\n", __func__, start, nr_pages, write ? "write" : "read"); - - start &= PAGE_MASK; - addr = start; - len = (unsigned long) nr_pages << PAGE_SHIFT; - end = start + len; - - if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, - start, len))) - return 0; - - pr_devel(" aligned: %lx .. %lx\n", start, end); - - /* - * XXX: batch / limit 'nr', to avoid large irq off latency - * needs some instrumenting to determine the common sizes used by - * important workloads (eg. DB2), and whether limiting the batch size - * will decrease performance. - * - * It seems like we're in the clear for the moment. Direct-IO is - * the main guy that batches up lots of get_user_pages, and even - * they are limited to 64-at-a-time which is not so many. - */ - /* - * This doesn't prevent pagetable teardown, but does prevent - * the pagetables from being freed on powerpc. - * - * So long as we atomically load page table pointers versus teardown, - * we can follow the address down to the the page and take a ref on it. - */ - local_irq_save(flags); - - pgdp = pgd_offset(mm, addr); - do { - pgd_t pgd = ACCESS_ONCE(*pgdp); - - pr_devel(" %016lx: normal pgd %p\n", addr, - (void *)pgd_val(pgd)); - next = pgd_addr_end(addr, end); - if (pgd_none(pgd)) - break; - if (pgd_huge(pgd)) { - if (!gup_hugepte((pte_t *)pgdp, PGDIR_SIZE, addr, next, - write, pages, &nr)) - break; - } else if (is_hugepd(pgdp)) { - if (!gup_hugepd((hugepd_t *)pgdp, PGDIR_SHIFT, - addr, next, write, pages, &nr)) - break; - } else if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) - break; - } while (pgdp++, addr = next, addr != end); - - local_irq_restore(flags); - - return nr; -} - -int get_user_pages_fast(unsigned long start, int nr_pages, int write, - struct page **pages) -{ - struct mm_struct *mm = current->mm; - int nr, ret; - - start &= PAGE_MASK; - nr = __get_user_pages_fast(start, nr_pages, write, pages); - ret = nr; - - if (nr < nr_pages) { - pr_devel(" slow path ! nr = %d\n", nr); - - /* Try to get the remaining pages with get_user_pages */ - start += nr << PAGE_SHIFT; - pages += nr; - - down_read(&mm->mmap_sem); - ret = get_user_pages(current, mm, start, - nr_pages - nr, write, 0, pages, NULL); - up_read(&mm->mmap_sem); - - /* Have to be a bit careful with return values */ - if (nr > 0) { - if (ret < 0) - ret = nr; - else - ret += nr; - } - } - - return ret; -} - -#endif /* __HAVE_ARCH_PTE_SPECIAL */ diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 2b8e5ed28831..af56de82375d 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -233,7 +233,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift)) return NULL; - return hugepte_offset(hpdp, addr, pdshift); + return hugepte_offset(*hpdp, addr, pdshift); } #else @@ -273,7 +273,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift)) return NULL; - return hugepte_offset(hpdp, addr, pdshift); + return hugepte_offset(*hpdp, addr, pdshift); } #endif @@ -541,7 +541,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, do { pmd = pmd_offset(pud, addr); next = pmd_addr_end(addr, end); - if (!is_hugepd(pmd)) { + if (!is_hugepd(__hugepd(pmd_val(*pmd)))) { /* * if it is not hugepd pointer, we should already find * it cleared. @@ -590,7 +590,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, do { pud = pud_offset(pgd, addr); next = pud_addr_end(addr, end); - if (!is_hugepd(pud)) { + if (!is_hugepd(__hugepd(pud_val(*pud)))) { if (pud_none_or_clear_bad(pud)) continue; hugetlb_free_pmd_range(tlb, pud, addr, next, floor, @@ -656,7 +656,7 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, do { next = pgd_addr_end(addr, end); pgd = pgd_offset(tlb->mm, addr); - if (!is_hugepd(pgd)) { + if (!is_hugepd(__hugepd(pgd_val(*pgd)))) { if (pgd_none_or_clear_bad(pgd)) continue; hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); @@ -716,12 +716,11 @@ static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end, return (__boundary - 1 < end - 1) ? __boundary : end; } -int gup_hugepd(hugepd_t *hugepd, unsigned pdshift, - unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) +int gup_huge_pd(hugepd_t hugepd, unsigned long addr, unsigned pdshift, + unsigned long end, int write, struct page **pages, int *nr) { pte_t *ptep; - unsigned long sz = 1UL << hugepd_shift(*hugepd); + unsigned long sz = 1UL << hugepd_shift(hugepd); unsigned long next; ptep = hugepte_offset(hugepd, addr, pdshift); @@ -964,7 +963,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift else if (pgd_huge(pgd)) { ret_pte = (pte_t *) pgdp; goto out; - } else if (is_hugepd(&pgd)) + } else if (is_hugepd(__hugepd(pgd_val(pgd)))) hpdp = (hugepd_t *)&pgd; else { /* @@ -981,7 +980,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift else if (pud_huge(pud)) { ret_pte = (pte_t *) pudp; goto out; - } else if (is_hugepd(&pud)) + } else if (is_hugepd(__hugepd(pud_val(pud)))) hpdp = (hugepd_t *)&pud; else { pdshift = PMD_SHIFT; @@ -1002,7 +1001,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift if (pmd_huge(pmd) || pmd_large(pmd)) { ret_pte = (pte_t *) pmdp; goto out; - } else if (is_hugepd(&pmd)) + } else if (is_hugepd(__hugepd(pmd_val(pmd)))) hpdp = (hugepd_t *)&pmd; else return pte_offset_kernel(&pmd, ea); @@ -1011,7 +1010,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift if (!hpdp) return NULL; - ret_pte = hugepte_offset(hpdp, ea, pdshift); + ret_pte = hugepte_offset(*hpdp, ea, pdshift); pdshift = hugepd_shift(*hpdp); out: if (shift) @@ -1041,14 +1040,6 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, if ((pte_val(pte) & mask) != mask) return 0; -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - /* - * check for splitting here - */ - if (pmd_trans_splitting(pte_pmd(pte))) - return 0; -#endif - /* hugepages are never "special" */ VM_BUG_ON(!pfn_valid(pte_pfn(pte))); -- cgit From e39f223fc93580c86ccf6b3422033e349f57f0dd Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 18 Nov 2014 16:47:35 +1100 Subject: powerpc: Remove more traces of bootmem Although we are now selecting NO_BOOTMEM, we still have some traces of bootmem lying around. That is because even with NO_BOOTMEM there is still a shim that converts bootmem calls into memblock calls, but ultimately we want to remove all traces of bootmem. Most of the patch is conversions from alloc_bootmem() to memblock_virt_alloc(). In general a call such as: p = (struct foo *)alloc_bootmem(x); Becomes: p = memblock_virt_alloc(x, 0); We don't need the cast because memblock_virt_alloc() returns a void *. The alignment value of zero tells memblock to use the default alignment, which is SMP_CACHE_BYTES, the same value alloc_bootmem() uses. We remove a number of NULL checks on the result of memblock_virt_alloc(). That is because memblock_virt_alloc() will panic if it can't allocate, in exactly the same way as alloc_bootmem(), so the NULL checks are and always have been redundant. The memory returned by memblock_virt_alloc() is already zeroed, so we remove several memsets of the result of memblock_virt_alloc(). Finally we convert a few uses of __alloc_bootmem(x, y, MAX_DMA_ADDRESS) to just plain memblock_virt_alloc(). We don't use memblock_alloc_base() because MAX_DMA_ADDRESS is ~0ul on powerpc, so limiting the allocation to that is pointless, 16XB ought to be enough for anyone. Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hugetlbpage.c | 2 +- arch/powerpc/mm/mmu_context_nohash.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index af56de82375d..8c9b8115867c 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -315,7 +315,7 @@ int alloc_bootmem_huge_page(struct hstate *hstate) * If gpages can be in highmem we can't use the trick of storing the * data structure in the page; allocate space for this */ - m = alloc_bootmem(sizeof(struct huge_bootmem_page)); + m = memblock_virt_alloc(sizeof(struct huge_bootmem_page), 0); m->phys = gpage_freearray[idx].gpage_list[--nr_gpages]; #else m = phys_to_virt(gpage_freearray[idx].gpage_list[--nr_gpages]); diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index 928ebe79668b..9cba6cba2e50 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -421,12 +421,12 @@ void __init mmu_context_init(void) /* * Allocate the maps used by context management */ - context_map = alloc_bootmem(CTX_MAP_SIZE); - context_mm = alloc_bootmem(sizeof(void *) * (last_context + 1)); + context_map = memblock_virt_alloc(CTX_MAP_SIZE, 0); + context_mm = memblock_virt_alloc(sizeof(void *) * (last_context + 1), 0); #ifndef CONFIG_SMP - stale_map[0] = alloc_bootmem(CTX_MAP_SIZE); + stale_map[0] = memblock_virt_alloc(CTX_MAP_SIZE, 0); #else - stale_map[boot_cpuid] = alloc_bootmem(CTX_MAP_SIZE); + stale_map[boot_cpuid] = memblock_virt_alloc(CTX_MAP_SIZE, 0); register_cpu_notifier(&mmu_context_cpu_nb); #endif -- cgit From 0ec2698fe1c451606c16e21dbfbd29efce694668 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 3 Nov 2014 20:21:34 +0530 Subject: powerpc/mm: Check for matching hpte without taking hpte lock With smaller hash page table config, we would end up in situation where we would be replacing hash page table slot frequently. In such config, we will find the hpte to be not matching, and we can do that check without holding the hpte lock. We need to recheck the hpte again after holding lock. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/hash_native_64.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index d53288a08c37..558e50bac6f7 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -294,8 +294,6 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, DBG_LOW(" update(vpn=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)", vpn, want_v & HPTE_V_AVPN, slot, newpp); - native_lock_hpte(hptep); - hpte_v = be64_to_cpu(hptep->v); /* * We need to invalidate the TLB always because hpte_remove doesn't do @@ -308,16 +306,24 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, DBG_LOW(" -> miss\n"); ret = -1; } else { - DBG_LOW(" -> hit\n"); - /* Update the HPTE */ - hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) & ~(HPTE_R_PP | HPTE_R_N)) | - (newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C))); + native_lock_hpte(hptep); + /* recheck with locks held */ + hpte_v = be64_to_cpu(hptep->v); + if (unlikely(!HPTE_V_COMPARE(hpte_v, want_v) || + !(hpte_v & HPTE_V_VALID))) { + ret = -1; + } else { + DBG_LOW(" -> hit\n"); + /* Update the HPTE */ + hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) & + ~(HPTE_R_PP | HPTE_R_N)) | + (newpp & (HPTE_R_PP | HPTE_R_N | + HPTE_R_C))); + } + native_unlock_hpte(hptep); } - native_unlock_hpte(hptep); - /* Ensure it is out of the tlb too. */ tlbie(vpn, bpsize, apsize, ssize, local); - return ret; } -- cgit From c4f3eb5fc527b749d6fb0d47ffdcfe83706dbe2f Mon Sep 17 00:00:00 2001 From: James Yang Date: Fri, 14 Nov 2014 12:32:24 -0600 Subject: powerpc/mm/hugetlb: Sanity check gigantic hugepage count Limit the number of gigantic hugepages specified by the hugepages= parameter to MAX_NUMBER_GPAGES. Signed-off-by: James Yang Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/hugetlbpage.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index af56de82375d..747e0c616526 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -355,6 +355,13 @@ static int __init do_gpage_early_setup(char *param, char *val, if (size != 0) { if (sscanf(val, "%lu", &npages) <= 0) npages = 0; + if (npages > MAX_NUMBER_GPAGES) { + pr_warn("MMU: %lu pages requested for page " + "size %llu KB, limiting to " + __stringify(MAX_NUMBER_GPAGES) "\n", + npages, size / 1024); + npages = MAX_NUMBER_GPAGES; + } gpage_npages[shift_to_mmu_psize(__ffs(size))] = npages; size = 0; } -- cgit From f1581bf14bc40b4a14bf10358eac0b22173b3313 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Sun, 2 Nov 2014 21:15:27 +0530 Subject: powerpc/mm/thp: Remove code duplication Rename invalidate_old_hpte to flush_hash_hugepage and use that in other places. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/hash_utils_64.c | 52 +++++++++++++++++++++++++++++++ arch/powerpc/mm/hugepage-hash64.c | 54 ++------------------------------- arch/powerpc/mm/pgtable_64.c | 64 ++++++--------------------------------- 3 files changed, 63 insertions(+), 107 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index f01027731e23..6c2076c65d7c 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1315,6 +1315,58 @@ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, #endif } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +void flush_hash_hugepage(unsigned long vsid, unsigned long addr, + pmd_t *pmdp, unsigned int psize, int ssize) +{ + int i, max_hpte_count, valid; + unsigned long s_addr; + unsigned char *hpte_slot_array; + unsigned long hidx, shift, vpn, hash, slot; + + s_addr = addr & HPAGE_PMD_MASK; + hpte_slot_array = get_hpte_slot_array(pmdp); + /* + * IF we try to do a HUGE PTE update after a withdraw is done. + * we will find the below NULL. This happens when we do + * split_huge_page_pmd + */ + if (!hpte_slot_array) + return; + + if (ppc_md.hugepage_invalidate) + return ppc_md.hugepage_invalidate(vsid, s_addr, hpte_slot_array, + psize, ssize); + /* + * No bluk hpte removal support, invalidate each entry + */ + shift = mmu_psize_defs[psize].shift; + max_hpte_count = HPAGE_PMD_SIZE >> shift; + for (i = 0; i < max_hpte_count; i++) { + /* + * 8 bits per each hpte entries + * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit] + */ + valid = hpte_valid(hpte_slot_array, i); + if (!valid) + continue; + hidx = hpte_hash_index(hpte_slot_array, i); + + /* get the vpn */ + addr = s_addr + (i * (1ul << shift)); + vpn = hpt_vpn(addr, vsid, ssize); + hash = hpt_hash(vpn, shift, ssize); + if (hidx & _PTEIDX_SECONDARY) + hash = ~hash; + + slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; + slot += hidx & _PTEIDX_GROUP_IX; + ppc_md.hpte_invalidate(slot, vpn, psize, + MMU_PAGE_16M, ssize, 0); + } +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + void flush_hash_range(unsigned long number, int local) { if (ppc_md.flush_hash_range) diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c index 5f5e6328c21c..1b3ad46a71b5 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/hugepage-hash64.c @@ -18,57 +18,6 @@ #include #include -static void invalidate_old_hpte(unsigned long vsid, unsigned long addr, - pmd_t *pmdp, unsigned int psize, int ssize) -{ - int i, max_hpte_count, valid; - unsigned long s_addr; - unsigned char *hpte_slot_array; - unsigned long hidx, shift, vpn, hash, slot; - - s_addr = addr & HPAGE_PMD_MASK; - hpte_slot_array = get_hpte_slot_array(pmdp); - /* - * IF we try to do a HUGE PTE update after a withdraw is done. - * we will find the below NULL. This happens when we do - * split_huge_page_pmd - */ - if (!hpte_slot_array) - return; - - if (ppc_md.hugepage_invalidate) - return ppc_md.hugepage_invalidate(vsid, s_addr, hpte_slot_array, - psize, ssize); - /* - * No bluk hpte removal support, invalidate each entry - */ - shift = mmu_psize_defs[psize].shift; - max_hpte_count = HPAGE_PMD_SIZE >> shift; - for (i = 0; i < max_hpte_count; i++) { - /* - * 8 bits per each hpte entries - * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit] - */ - valid = hpte_valid(hpte_slot_array, i); - if (!valid) - continue; - hidx = hpte_hash_index(hpte_slot_array, i); - - /* get the vpn */ - addr = s_addr + (i * (1ul << shift)); - vpn = hpt_vpn(addr, vsid, ssize); - hash = hpt_hash(vpn, shift, ssize); - if (hidx & _PTEIDX_SECONDARY) - hash = ~hash; - - slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; - slot += hidx & _PTEIDX_GROUP_IX; - ppc_md.hpte_invalidate(slot, vpn, psize, - MMU_PAGE_16M, ssize, 0); - } -} - - int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, pmd_t *pmdp, unsigned long trap, int local, int ssize, unsigned int psize) @@ -145,7 +94,8 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, * hash page table entries. */ if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO)) - invalidate_old_hpte(vsid, ea, pmdp, MMU_PAGE_64K, ssize); + flush_hash_hugepage(vsid, ea, pmdp, MMU_PAGE_64K, + ssize); } valid = hpte_valid(hpte_slot_array, index); diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 87ff0c1908a9..c175c990580e 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -739,29 +739,13 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, unsigned long old_pmd) { - int ssize, i; - unsigned long s_addr; - int max_hpte_count; - unsigned int psize, valid; - unsigned char *hpte_slot_array; - unsigned long hidx, vpn, vsid, hash, shift, slot; - - /* - * Flush all the hptes mapping this hugepage - */ - s_addr = addr & HPAGE_PMD_MASK; - hpte_slot_array = get_hpte_slot_array(pmdp); - /* - * IF we try to do a HUGE PTE update after a withdraw is done. - * we will find the below NULL. This happens when we do - * split_huge_page_pmd - */ - if (!hpte_slot_array) - return; + int ssize; + unsigned int psize; + unsigned long vsid; /* get the base page size,vsid and segment size */ #ifdef CONFIG_DEBUG_VM - psize = get_slice_psize(mm, s_addr); + psize = get_slice_psize(mm, addr); BUG_ON(psize == MMU_PAGE_16M); #endif if (old_pmd & _PAGE_COMBO) @@ -769,46 +753,16 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, else psize = MMU_PAGE_64K; - if (!is_kernel_addr(s_addr)) { - ssize = user_segment_size(s_addr); - vsid = get_vsid(mm->context.id, s_addr, ssize); + if (!is_kernel_addr(addr)) { + ssize = user_segment_size(addr); + vsid = get_vsid(mm->context.id, addr, ssize); WARN_ON(vsid == 0); } else { - vsid = get_kernel_vsid(s_addr, mmu_kernel_ssize); + vsid = get_kernel_vsid(addr, mmu_kernel_ssize); ssize = mmu_kernel_ssize; } - if (ppc_md.hugepage_invalidate) - return ppc_md.hugepage_invalidate(vsid, s_addr, - hpte_slot_array, - psize, ssize); - /* - * No bluk hpte removal support, invalidate each entry - */ - shift = mmu_psize_defs[psize].shift; - max_hpte_count = HPAGE_PMD_SIZE >> shift; - for (i = 0; i < max_hpte_count; i++) { - /* - * 8 bits per each hpte entries - * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit] - */ - valid = hpte_valid(hpte_slot_array, i); - if (!valid) - continue; - hidx = hpte_hash_index(hpte_slot_array, i); - - /* get the vpn */ - addr = s_addr + (i * (1ul << shift)); - vpn = hpt_vpn(addr, vsid, ssize); - hash = hpt_hash(vpn, shift, ssize); - if (hidx & _PTEIDX_SECONDARY) - hash = ~hash; - - slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; - slot += hidx & _PTEIDX_GROUP_IX; - ppc_md.hpte_invalidate(slot, vpn, psize, - MMU_PAGE_16M, ssize, 0); - } + return flush_hash_hugepage(vsid, addr, pmdp, psize, ssize); } static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot) -- cgit From d557b09800dab5dd6804e5b79324069abcf0be11 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Sun, 2 Nov 2014 21:15:28 +0530 Subject: powerpc/mm/thp: Use tlbiel if possible If we know that user address space has never executed on other cpus we could use tlbiel. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/hash_native_64.c | 4 ++-- arch/powerpc/mm/hash_utils_64.c | 28 +++++++++++++++++++++++----- arch/powerpc/mm/hugepage-hash64.c | 2 +- arch/powerpc/mm/pgtable_64.c | 9 +++++++-- 4 files changed, 33 insertions(+), 10 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 558e50bac6f7..13700911b522 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -425,7 +425,7 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, static void native_hugepage_invalidate(unsigned long vsid, unsigned long addr, unsigned char *hpte_slot_array, - int psize, int ssize) + int psize, int ssize, int local) { int i; struct hash_pte *hptep; @@ -471,7 +471,7 @@ static void native_hugepage_invalidate(unsigned long vsid, * instruction compares entry_VA in tlb with the VA specified * here */ - tlbie(vpn, psize, actual_psize, ssize, 0); + tlbie(vpn, psize, actual_psize, ssize, local); } local_irq_restore(flags); } diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 6c2076c65d7c..68211d398fdb 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1317,7 +1317,7 @@ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, #ifdef CONFIG_TRANSPARENT_HUGEPAGE void flush_hash_hugepage(unsigned long vsid, unsigned long addr, - pmd_t *pmdp, unsigned int psize, int ssize) + pmd_t *pmdp, unsigned int psize, int ssize, int local) { int i, max_hpte_count, valid; unsigned long s_addr; @@ -1334,9 +1334,11 @@ void flush_hash_hugepage(unsigned long vsid, unsigned long addr, if (!hpte_slot_array) return; - if (ppc_md.hugepage_invalidate) - return ppc_md.hugepage_invalidate(vsid, s_addr, hpte_slot_array, - psize, ssize); + if (ppc_md.hugepage_invalidate) { + ppc_md.hugepage_invalidate(vsid, s_addr, hpte_slot_array, + psize, ssize, local); + goto tm_abort; + } /* * No bluk hpte removal support, invalidate each entry */ @@ -1362,8 +1364,24 @@ void flush_hash_hugepage(unsigned long vsid, unsigned long addr, slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += hidx & _PTEIDX_GROUP_IX; ppc_md.hpte_invalidate(slot, vpn, psize, - MMU_PAGE_16M, ssize, 0); + MMU_PAGE_16M, ssize, local); + } +tm_abort: +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* Transactions are not aborted by tlbiel, only tlbie. + * Without, syncing a page back to a block device w/ PIO could pick up + * transactional data (bad!) so we force an abort here. Before the + * sync the page will be made read-only, which will flush_hash_page. + * BIG ISSUE here: if the kernel uses a page from userspace without + * unmapping it first, it may see the speculated version. + */ + if (local && cpu_has_feature(CPU_FTR_TM) && + current->thread.regs && + MSR_TM_ACTIVE(current->thread.regs->msr)) { + tm_enable(); + tm_abort(TM_CAUSE_TLBI); } +#endif } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c index 1b3ad46a71b5..3a648cd363ae 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/hugepage-hash64.c @@ -95,7 +95,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, */ if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO)) flush_hash_hugepage(vsid, ea, pmdp, MMU_PAGE_64K, - ssize); + ssize, local); } valid = hpte_valid(hpte_slot_array, index); diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index c175c990580e..eea9fa1f8ae7 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -739,9 +739,10 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, unsigned long old_pmd) { - int ssize; + int ssize, local = 0; unsigned int psize; unsigned long vsid; + const struct cpumask *tmp; /* get the base page size,vsid and segment size */ #ifdef CONFIG_DEBUG_VM @@ -762,7 +763,11 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, ssize = mmu_kernel_ssize; } - return flush_hash_hugepage(vsid, addr, pmdp, psize, ssize); + tmp = cpumask_of(smp_processor_id()); + if (cpumask_equal(mm_cpumask(mm), tmp)) + local = 1; + + return flush_hash_hugepage(vsid, addr, pmdp, psize, ssize, local); } static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot) -- cgit From aefa5688c070727b8729de1aef85cad7b9933fc7 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 4 Dec 2014 11:00:14 +0530 Subject: powerpc/mm: don't do tlbie for updatepp request with NO HPTE fault upatepp can get called for a nohpte fault when we find from the linux page table that the translation was hashed before. In that case we are sure that there is no existing translation, hence we could avoid doing tlbie. We could possibly race with a parallel fault filling the TLB. But that should be ok because updatepp is only ever relaxing permissions. We also look at linux pte permission bits when filling hash pte permission bits. We also hold the linux pte busy bits while inserting/updating a hashpte entry, hence a paralle update of linux pte is not possible. On the other hand mprotect involves ptep_modify_prot_start which cause a hpte invalidate and not updatepp. Performance number: We use randbox_access_bench written by Anton. Kernel with THP disabled and smaller hash page table size. 86.60% random_access_b [kernel.kallsyms] [k] .native_hpte_updatepp 2.10% random_access_b random_access_bench [.] doit 1.99% random_access_b [kernel.kallsyms] [k] .do_raw_spin_lock 1.85% random_access_b [kernel.kallsyms] [k] .native_hpte_insert 1.26% random_access_b [kernel.kallsyms] [k] .native_flush_hash_range 1.18% random_access_b [kernel.kallsyms] [k] .__delay 0.69% random_access_b [kernel.kallsyms] [k] .native_hpte_remove 0.37% random_access_b [kernel.kallsyms] [k] .clear_user_page 0.34% random_access_b [kernel.kallsyms] [k] .__hash_page_64K 0.32% random_access_b [kernel.kallsyms] [k] fast_exception_return 0.30% random_access_b [kernel.kallsyms] [k] .hash_page_mm With Fix: 27.54% random_access_b random_access_bench [.] doit 22.90% random_access_b [kernel.kallsyms] [k] .native_hpte_insert 5.76% random_access_b [kernel.kallsyms] [k] .native_hpte_remove 5.20% random_access_b [kernel.kallsyms] [k] fast_exception_return 5.12% random_access_b [kernel.kallsyms] [k] .__hash_page_64K 4.80% random_access_b [kernel.kallsyms] [k] .hash_page_mm 3.31% random_access_b [kernel.kallsyms] [k] data_access_common 1.84% random_access_b [kernel.kallsyms] [k] .trace_hardirqs_on_caller Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_low_64.S | 15 ++++++------ arch/powerpc/mm/hash_native_64.c | 15 ++++++++---- arch/powerpc/mm/hash_utils_64.c | 44 +++++++++++++++++++++++------------- arch/powerpc/mm/hugepage-hash64.c | 8 +++---- arch/powerpc/mm/hugetlbpage-hash64.c | 6 ++--- arch/powerpc/mm/pgtable_64.c | 7 +++--- 6 files changed, 58 insertions(+), 37 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S index 5094f32b706e..463174a4a647 100644 --- a/arch/powerpc/mm/hash_low_64.S +++ b/arch/powerpc/mm/hash_low_64.S @@ -46,7 +46,8 @@ /* * _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, - * pte_t *ptep, unsigned long trap, int local, int ssize) + * pte_t *ptep, unsigned long trap, unsigned long flags, + * int ssize) * * Adds a 4K page to the hash table in a segment of 4K pages only */ @@ -298,7 +299,7 @@ htab_modify_pte: li r6,MMU_PAGE_4K /* base page size */ li r7,MMU_PAGE_4K /* actual page size */ ld r8,STK_PARAM(R9)(r1) /* segment size */ - ld r9,STK_PARAM(R8)(r1) /* get "local" param */ + ld r9,STK_PARAM(R8)(r1) /* get "flags" param */ .globl htab_call_hpte_updatepp htab_call_hpte_updatepp: bl . /* Patched by htab_finish_init() */ @@ -338,8 +339,8 @@ htab_pte_insert_failure: *****************************************************************************/ /* _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, - * pte_t *ptep, unsigned long trap, int local, int ssize, - * int subpg_prot) + * pte_t *ptep, unsigned long trap, unsigned local flags, + * int ssize, int subpg_prot) */ /* @@ -594,7 +595,7 @@ htab_inval_old_hpte: li r5,0 /* PTE.hidx */ li r6,MMU_PAGE_64K /* psize */ ld r7,STK_PARAM(R9)(r1) /* ssize */ - ld r8,STK_PARAM(R8)(r1) /* local */ + ld r8,STK_PARAM(R8)(r1) /* flags */ bl flush_hash_page /* Clear out _PAGE_HPTE_SUB bits in the new linux PTE */ lis r0,_PAGE_HPTE_SUB@h @@ -666,7 +667,7 @@ htab_modify_pte: li r6,MMU_PAGE_4K /* base page size */ li r7,MMU_PAGE_4K /* actual page size */ ld r8,STK_PARAM(R9)(r1) /* segment size */ - ld r9,STK_PARAM(R8)(r1) /* get "local" param */ + ld r9,STK_PARAM(R8)(r1) /* get "flags" param */ .globl htab_call_hpte_updatepp htab_call_hpte_updatepp: bl . /* patched by htab_finish_init() */ @@ -962,7 +963,7 @@ ht64_modify_pte: li r6,MMU_PAGE_64K /* base page size */ li r7,MMU_PAGE_64K /* actual page size */ ld r8,STK_PARAM(R9)(r1) /* segment size */ - ld r9,STK_PARAM(R8)(r1) /* get "local" param */ + ld r9,STK_PARAM(R8)(r1) /* get "flags" param */ .globl ht64_call_hpte_updatepp ht64_call_hpte_updatepp: bl . /* patched by htab_finish_init() */ diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 13700911b522..9c4880ddecd6 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -283,11 +283,11 @@ static long native_hpte_remove(unsigned long hpte_group) static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, unsigned long vpn, int bpsize, - int apsize, int ssize, int local) + int apsize, int ssize, unsigned long flags) { struct hash_pte *hptep = htab_address + slot; unsigned long hpte_v, want_v; - int ret = 0; + int ret = 0, local = 0; want_v = hpte_encode_avpn(vpn, bpsize, ssize); @@ -322,8 +322,15 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, } native_unlock_hpte(hptep); } - /* Ensure it is out of the tlb too. */ - tlbie(vpn, bpsize, apsize, ssize, local); + + if (flags & HPTE_LOCAL_UPDATE) + local = 1; + /* + * Ensure it is out of the tlb too if it is not a nohpte fault + */ + if (!(flags & HPTE_NOHPTE_UPDATE)) + tlbie(vpn, bpsize, apsize, ssize, local); + return ret; } diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 68211d398fdb..e56a307bc676 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -989,7 +989,9 @@ static void check_paca_psize(unsigned long ea, struct mm_struct *mm, * -1 - critical hash insertion error * -2 - access not permitted by subpage protection mechanism */ -int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, unsigned long trap) +int hash_page_mm(struct mm_struct *mm, unsigned long ea, + unsigned long access, unsigned long trap, + unsigned long flags) { enum ctx_state prev_state = exception_enter(); pgd_t *pgdir; @@ -997,7 +999,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, u pte_t *ptep; unsigned hugeshift; const struct cpumask *tmp; - int rc, user_region = 0, local = 0; + int rc, user_region = 0; int psize, ssize; DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n", @@ -1049,7 +1051,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, u /* Check CPU locality */ tmp = cpumask_of(smp_processor_id()); if (user_region && cpumask_equal(mm_cpumask(mm), tmp)) - local = 1; + flags |= HPTE_LOCAL_UPDATE; #ifndef CONFIG_PPC_64K_PAGES /* If we use 4K pages and our psize is not 4K, then we might @@ -1086,11 +1088,11 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, u if (hugeshift) { if (pmd_trans_huge(*(pmd_t *)ptep)) rc = __hash_page_thp(ea, access, vsid, (pmd_t *)ptep, - trap, local, ssize, psize); + trap, flags, ssize, psize); #ifdef CONFIG_HUGETLB_PAGE else rc = __hash_page_huge(ea, access, vsid, ptep, trap, - local, ssize, hugeshift, psize); + flags, ssize, hugeshift, psize); #else else { /* @@ -1149,7 +1151,8 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, u #ifdef CONFIG_PPC_HAS_HASH_64K if (psize == MMU_PAGE_64K) - rc = __hash_page_64K(ea, access, vsid, ptep, trap, local, ssize); + rc = __hash_page_64K(ea, access, vsid, ptep, trap, + flags, ssize); else #endif /* CONFIG_PPC_HAS_HASH_64K */ { @@ -1158,7 +1161,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, u rc = -2; else rc = __hash_page_4K(ea, access, vsid, ptep, trap, - local, ssize, spp); + flags, ssize, spp); } /* Dump some info in case of hash insertion failure, they should @@ -1181,14 +1184,19 @@ bail: } EXPORT_SYMBOL_GPL(hash_page_mm); -int hash_page(unsigned long ea, unsigned long access, unsigned long trap) +int hash_page(unsigned long ea, unsigned long access, unsigned long trap, + unsigned long dsisr) { + unsigned long flags = 0; struct mm_struct *mm = current->mm; if (REGION_ID(ea) == VMALLOC_REGION_ID) mm = &init_mm; - return hash_page_mm(mm, ea, access, trap); + if (dsisr & DSISR_NOHPTE) + flags |= HPTE_NOHPTE_UPDATE; + + return hash_page_mm(mm, ea, access, trap, flags); } EXPORT_SYMBOL_GPL(hash_page); @@ -1200,7 +1208,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, pgd_t *pgdir; pte_t *ptep; unsigned long flags; - int rc, ssize, local = 0; + int rc, ssize, update_flags = 0; BUG_ON(REGION_ID(ea) != USER_REGION_ID); @@ -1251,16 +1259,17 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, /* Is that local to this CPU ? */ if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) - local = 1; + update_flags |= HPTE_LOCAL_UPDATE; /* Hash it in */ #ifdef CONFIG_PPC_HAS_HASH_64K if (mm->context.user_psize == MMU_PAGE_64K) - rc = __hash_page_64K(ea, access, vsid, ptep, trap, local, ssize); + rc = __hash_page_64K(ea, access, vsid, ptep, trap, + update_flags, ssize); else #endif /* CONFIG_PPC_HAS_HASH_64K */ - rc = __hash_page_4K(ea, access, vsid, ptep, trap, local, ssize, - subpage_protection(mm, ea)); + rc = __hash_page_4K(ea, access, vsid, ptep, trap, update_flags, + ssize, subpage_protection(mm, ea)); /* Dump some info in case of hash insertion failure, they should * never happen so it is really useful to know if/when they do @@ -1278,9 +1287,10 @@ out_exit: * do not forget to update the assembly call site ! */ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, - int local) + unsigned long flags) { unsigned long hash, index, shift, hidx, slot; + int local = flags & HPTE_LOCAL_UPDATE; DBG_LOW("flush_hash_page(vpn=%016lx)\n", vpn); pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) { @@ -1317,12 +1327,14 @@ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, #ifdef CONFIG_TRANSPARENT_HUGEPAGE void flush_hash_hugepage(unsigned long vsid, unsigned long addr, - pmd_t *pmdp, unsigned int psize, int ssize, int local) + pmd_t *pmdp, unsigned int psize, int ssize, + unsigned long flags) { int i, max_hpte_count, valid; unsigned long s_addr; unsigned char *hpte_slot_array; unsigned long hidx, shift, vpn, hash, slot; + int local = flags & HPTE_LOCAL_UPDATE; s_addr = addr & HPAGE_PMD_MASK; hpte_slot_array = get_hpte_slot_array(pmdp); diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c index 3a648cd363ae..86686514ae13 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/hugepage-hash64.c @@ -19,8 +19,8 @@ #include int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, - pmd_t *pmdp, unsigned long trap, int local, int ssize, - unsigned int psize) + pmd_t *pmdp, unsigned long trap, unsigned long flags, + int ssize, unsigned int psize) { unsigned int index, valid; unsigned char *hpte_slot_array; @@ -95,7 +95,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, */ if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO)) flush_hash_hugepage(vsid, ea, pmdp, MMU_PAGE_64K, - ssize, local); + ssize, flags); } valid = hpte_valid(hpte_slot_array, index); @@ -108,7 +108,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, slot += hidx & _PTEIDX_GROUP_IX; ret = ppc_md.hpte_updatepp(slot, rflags, vpn, - psize, lpsize, ssize, local); + psize, lpsize, ssize, flags); /* * We failed to update, try to insert a new entry. */ diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c index a5bcf9301196..d94b1af53a93 100644 --- a/arch/powerpc/mm/hugetlbpage-hash64.c +++ b/arch/powerpc/mm/hugetlbpage-hash64.c @@ -19,8 +19,8 @@ extern long hpte_insert_repeating(unsigned long hash, unsigned long vpn, unsigned long vflags, int psize, int ssize); int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, - pte_t *ptep, unsigned long trap, int local, int ssize, - unsigned int shift, unsigned int mmu_psize) + pte_t *ptep, unsigned long trap, unsigned long flags, + int ssize, unsigned int shift, unsigned int mmu_psize) { unsigned long vpn; unsigned long old_pte, new_pte; @@ -81,7 +81,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, slot += (old_pte & _PAGE_F_GIX) >> 12; if (ppc_md.hpte_updatepp(slot, rflags, vpn, mmu_psize, - mmu_psize, ssize, local) == -1) + mmu_psize, ssize, flags) == -1) old_pte &= ~_PAGE_HPTEFLAGS; } diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index eea9fa1f8ae7..4fe5f64cc179 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -739,9 +739,10 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, unsigned long old_pmd) { - int ssize, local = 0; + int ssize; unsigned int psize; unsigned long vsid; + unsigned long flags = 0; const struct cpumask *tmp; /* get the base page size,vsid and segment size */ @@ -765,9 +766,9 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, tmp = cpumask_of(smp_processor_id()); if (cpumask_equal(mm_cpumask(mm), tmp)) - local = 1; + flags |= HPTE_LOCAL_UPDATE; - return flush_hash_hugepage(vsid, addr, pmdp, psize, ssize, local); + return flush_hash_hugepage(vsid, addr, pmdp, psize, ssize, flags); } static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot) -- cgit