diff options
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r-- | arch/powerpc/mm/book3s32/mmu.c | 11 | ||||
-rw-r--r-- | arch/powerpc/mm/book3s64/hash_native.c | 38 | ||||
-rw-r--r-- | arch/powerpc/mm/book3s64/hash_utils.c | 19 | ||||
-rw-r--r-- | arch/powerpc/mm/book3s64/pkeys.c | 10 | ||||
-rw-r--r-- | arch/powerpc/mm/book3s64/radix_pgtable.c | 1 | ||||
-rw-r--r-- | arch/powerpc/mm/book3s64/radix_tlb.c | 80 | ||||
-rw-r--r-- | arch/powerpc/mm/dma-noncoherent.c | 8 | ||||
-rw-r--r-- | arch/powerpc/mm/fault.c | 6 | ||||
-rw-r--r-- | arch/powerpc/mm/init-common.c | 7 | ||||
-rw-r--r-- | arch/powerpc/mm/init_32.c | 5 | ||||
-rw-r--r-- | arch/powerpc/mm/init_64.c | 59 | ||||
-rw-r--r-- | arch/powerpc/mm/ioremap_32.c | 1 | ||||
-rw-r--r-- | arch/powerpc/mm/ioremap_64.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/mem.c | 203 | ||||
-rw-r--r-- | arch/powerpc/mm/mmu_decl.h | 11 | ||||
-rw-r--r-- | arch/powerpc/mm/nohash/8xx.c | 52 | ||||
-rw-r--r-- | arch/powerpc/mm/nohash/Makefile | 1 | ||||
-rw-r--r-- | arch/powerpc/mm/nohash/fsl_booke.c | 8 | ||||
-rw-r--r-- | arch/powerpc/mm/nohash/kaslr_booke.c | 401 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable_32.c | 5 |
20 files changed, 784 insertions, 144 deletions
diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 84d5fab94f8f..69b2419accef 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -251,9 +251,18 @@ void __init setbat(int index, unsigned long virt, phys_addr_t phys, { unsigned int bl; int wimgxpp; - struct ppc_bat *bat = BATS[index]; + struct ppc_bat *bat; unsigned long flags = pgprot_val(prot); + if (index == -1) + index = find_free_bat(); + if (index == -1) { + pr_err("%s: no BAT available for mapping 0x%llx\n", __func__, + (unsigned long long)phys); + return; + } + bat = BATS[index]; + if ((flags & _PAGE_NO_CACHE) || (cpu_has_feature(CPU_FTR_NEED_COHERENT) == 0)) flags &= ~_PAGE_COHERENT; diff --git a/arch/powerpc/mm/book3s64/hash_native.c b/arch/powerpc/mm/book3s64/hash_native.c index 523e42eb11da..d2d8237ea9d5 100644 --- a/arch/powerpc/mm/book3s64/hash_native.c +++ b/arch/powerpc/mm/book3s64/hash_native.c @@ -482,19 +482,12 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, return ret; } -static long native_hpte_find(unsigned long vpn, int psize, int ssize) +static long __native_hpte_find(unsigned long want_v, unsigned long slot) { struct hash_pte *hptep; - unsigned long hash; + unsigned long hpte_v; unsigned long i; - long slot; - unsigned long want_v, hpte_v; - hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize); - want_v = hpte_encode_avpn(vpn, psize, ssize); - - /* Bolted mappings are only ever in the primary group */ - slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; for (i = 0; i < HPTES_PER_GROUP; i++) { hptep = htab_address + slot; @@ -508,6 +501,33 @@ static long native_hpte_find(unsigned long vpn, int psize, int ssize) return -1; } +static long native_hpte_find(unsigned long vpn, int psize, int ssize) +{ + unsigned long hpte_group; + unsigned long want_v; + unsigned long hash; + long slot; + + hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize); + want_v = hpte_encode_avpn(vpn, psize, ssize); + + /* + * We try to keep bolted entries always in primary hash + * But in some case we can find them in secondary too. + */ + hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP; + slot = __native_hpte_find(want_v, hpte_group); + if (slot < 0) { + /* Try in secondary */ + hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP; + slot = __native_hpte_find(want_v, hpte_group); + if (slot < 0) + return -1; + } + + return slot; +} + /* * Update the page protection bits. Intended to be used to create * guard pages for kernel data structures on pages which are bolted diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 6c123760164e..b30435c7d804 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -263,6 +263,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, unsigned long vsid = get_kernel_vsid(vaddr, ssize); unsigned long vpn = hpt_vpn(vaddr, vsid, ssize); unsigned long tprot = prot; + bool secondary_hash = false; /* * If we hit a bad address return error. @@ -291,13 +292,31 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); BUG_ON(!mmu_hash_ops.hpte_insert); +repeat: ret = mmu_hash_ops.hpte_insert(hpteg, vpn, paddr, tprot, HPTE_V_BOLTED, psize, psize, ssize); + if (ret == -1) { + /* + * Try to to keep bolted entries in primary. + * Remove non bolted entries and try insert again + */ + ret = mmu_hash_ops.hpte_remove(hpteg); + if (ret != -1) + ret = mmu_hash_ops.hpte_insert(hpteg, vpn, paddr, tprot, + HPTE_V_BOLTED, psize, psize, + ssize); + if (ret == -1 && !secondary_hash) { + secondary_hash = true; + hpteg = ((~hash & htab_hash_mask) * HPTES_PER_GROUP); + goto repeat; + } + } if (ret < 0) break; + cond_resched(); #ifdef CONFIG_DEBUG_PAGEALLOC if (debug_pagealloc_enabled() && (paddr >> PAGE_SHIFT) < linear_map_hash_count) diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index ae7fca40e5b3..59e0ebbd8036 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -307,16 +307,6 @@ void thread_pkey_regs_init(struct thread_struct *thread) write_iamr(pkey_iamr_mask); } -static inline bool pkey_allows_readwrite(int pkey) -{ - int pkey_shift = pkeyshift(pkey); - - if (!is_pkey_enabled(pkey)) - return true; - - return !(read_amr() & ((AMR_RD_BIT|AMR_WR_BIT) << pkey_shift)); -} - int __execute_only_pkey(struct mm_struct *mm) { return mm->context.execute_only_pkey; diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 6ee17d09649c..974109bb85db 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -13,6 +13,7 @@ #include <linux/memblock.h> #include <linux/of_fdt.h> #include <linux/mm.h> +#include <linux/hugetlb.h> #include <linux/string_helpers.h> #include <linux/stop_machine.h> diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index 67af871190c6..a95175c0972b 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -732,18 +732,13 @@ local: } preempt_enable(); } + void radix__flush_all_mm(struct mm_struct *mm) { __flush_all_mm(mm, false); } EXPORT_SYMBOL(radix__flush_all_mm); -void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr) -{ - tlb->need_flush_all = 1; -} -EXPORT_SYMBOL(radix__flush_tlb_pwc); - void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, int psize) { @@ -832,8 +827,7 @@ static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33; static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2; static inline void __radix__flush_tlb_range(struct mm_struct *mm, - unsigned long start, unsigned long end, - bool flush_all_sizes) + unsigned long start, unsigned long end) { unsigned long pid; @@ -879,26 +873,16 @@ is_local: } } } else { - bool hflush = flush_all_sizes; - bool gflush = flush_all_sizes; + bool hflush = false; unsigned long hstart, hend; - unsigned long gstart, gend; - if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) - hflush = true; - - if (hflush) { + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { hstart = (start + PMD_SIZE - 1) & PMD_MASK; hend = end & PMD_MASK; if (hstart == hend) hflush = false; - } - - if (gflush) { - gstart = (start + PUD_SIZE - 1) & PUD_MASK; - gend = end & PUD_MASK; - if (gstart == gend) - gflush = false; + else + hflush = true; } if (local) { @@ -907,9 +891,6 @@ is_local: if (hflush) __tlbiel_va_range(hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M); - if (gflush) - __tlbiel_va_range(gstart, gend, pid, - PUD_SIZE, MMU_PAGE_1G); asm volatile("ptesync": : :"memory"); } else if (cputlb_use_tlbie()) { asm volatile("ptesync": : :"memory"); @@ -917,10 +898,6 @@ is_local: if (hflush) __tlbie_va_range(hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M); - if (gflush) - __tlbie_va_range(gstart, gend, pid, - PUD_SIZE, MMU_PAGE_1G); - asm volatile("eieio; tlbsync; ptesync": : :"memory"); } else { _tlbiel_va_range_multicast(mm, @@ -928,9 +905,6 @@ is_local: if (hflush) _tlbiel_va_range_multicast(mm, hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, false); - if (gflush) - _tlbiel_va_range_multicast(mm, - gstart, gend, pid, PUD_SIZE, MMU_PAGE_1G, false); } } preempt_enable(); @@ -945,7 +919,7 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, return radix__flush_hugetlb_tlb_range(vma, start, end); #endif - __radix__flush_tlb_range(vma->vm_mm, start, end, false); + __radix__flush_tlb_range(vma->vm_mm, start, end); } EXPORT_SYMBOL(radix__flush_tlb_range); @@ -1021,53 +995,19 @@ void radix__tlb_flush(struct mmu_gather *tlb) * that flushes the process table entry cache upon process teardown. * See the comment for radix in arch_exit_mmap(). */ - if (tlb->fullmm) { + if (tlb->fullmm || tlb->need_flush_all) { __flush_all_mm(mm, true); -#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE) - } else if (mm_tlb_flush_nested(mm)) { - /* - * If there is a concurrent invalidation that is clearing ptes, - * then it's possible this invalidation will miss one of those - * cleared ptes and miss flushing the TLB. If this invalidate - * returns before the other one flushes TLBs, that can result - * in it returning while there are still valid TLBs inside the - * range to be invalidated. - * - * See mm/memory.c:tlb_finish_mmu() for more details. - * - * The solution to this is ensure the entire range is always - * flushed here. The problem for powerpc is that the flushes - * are page size specific, so this "forced flush" would not - * do the right thing if there are a mix of page sizes in - * the range to be invalidated. So use __flush_tlb_range - * which invalidates all possible page sizes in the range. - * - * PWC flush probably is not be required because the core code - * shouldn't free page tables in this path, but accounting - * for the possibility makes us a bit more robust. - * - * need_flush_all is an uncommon case because page table - * teardown should be done with exclusive locks held (but - * after locks are dropped another invalidate could come - * in), it could be optimized further if necessary. - */ - if (!tlb->need_flush_all) - __radix__flush_tlb_range(mm, start, end, true); - else - radix__flush_all_mm(mm); -#endif } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) { - if (!tlb->need_flush_all) + if (!tlb->freed_tables) radix__flush_tlb_mm(mm); else radix__flush_all_mm(mm); } else { - if (!tlb->need_flush_all) + if (!tlb->freed_tables) radix__flush_tlb_range_psize(mm, start, end, psize); else radix__flush_tlb_pwc_range_psize(mm, start, end, psize); } - tlb->need_flush_all = 0; } static __always_inline void __radix__flush_tlb_range_psize(struct mm_struct *mm, diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c index 2a82984356f8..5ab4f868e919 100644 --- a/arch/powerpc/mm/dma-noncoherent.c +++ b/arch/powerpc/mm/dma-noncoherent.c @@ -104,14 +104,14 @@ static void __dma_sync_page(phys_addr_t paddr, size_t size, int dir) #endif } -void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr, - size_t size, enum dma_data_direction dir) +void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, + enum dma_data_direction dir) { __dma_sync_page(paddr, size, dir); } -void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, - size_t size, enum dma_data_direction dir) +void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, + enum dma_data_direction dir) { __dma_sync_page(paddr, size, dir); } diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 8432c281de92..b5047f9b5dec 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -645,6 +645,7 @@ NOKPROBE_SYMBOL(do_page_fault); void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) { const struct exception_table_entry *entry; + int is_write = page_fault_is_write(regs->dsisr); /* Are we prepared to handle this fault? */ if ((entry = search_exception_tables(regs->nip)) != NULL) { @@ -658,9 +659,10 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) case 0x300: case 0x380: case 0xe00: - pr_alert("BUG: %s at 0x%08lx\n", + pr_alert("BUG: %s on %s at 0x%08lx\n", regs->dar < PAGE_SIZE ? "Kernel NULL pointer dereference" : - "Unable to handle kernel data access", regs->dar); + "Unable to handle kernel data access", + is_write ? "write" : "read", regs->dar); break; case 0x400: case 0x480: diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c index a84da92920f7..42ef7a6e6098 100644 --- a/arch/powerpc/mm/init-common.c +++ b/arch/powerpc/mm/init-common.c @@ -21,6 +21,13 @@ #include <asm/pgtable.h> #include <asm/kup.h> +phys_addr_t memstart_addr __ro_after_init = (phys_addr_t)~0ull; +EXPORT_SYMBOL_GPL(memstart_addr); +phys_addr_t kernstart_addr __ro_after_init; +EXPORT_SYMBOL_GPL(kernstart_addr); +unsigned long kernstart_virt_addr __ro_after_init = KERNELBASE; +EXPORT_SYMBOL_GPL(kernstart_virt_addr); + static bool disable_kuep = !IS_ENABLED(CONFIG_PPC_KUEP); static bool disable_kuap = !IS_ENABLED(CONFIG_PPC_KUAP); diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index b04896a88d79..872df48ae41b 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -56,11 +56,6 @@ phys_addr_t total_memory; phys_addr_t total_lowmem; -phys_addr_t memstart_addr = (phys_addr_t)~0ull; -EXPORT_SYMBOL(memstart_addr); -phys_addr_t kernstart_addr; -EXPORT_SYMBOL(kernstart_addr); - #ifdef CONFIG_RELOCATABLE /* Used in __va()/__pa() */ long long virt_phys_offset; diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 4e08246acd79..4002ced3596f 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -63,38 +63,48 @@ #include <mm/mmu_decl.h> -phys_addr_t memstart_addr = ~0; -EXPORT_SYMBOL_GPL(memstart_addr); -phys_addr_t kernstart_addr; -EXPORT_SYMBOL_GPL(kernstart_addr); - #ifdef CONFIG_SPARSEMEM_VMEMMAP /* - * Given an address within the vmemmap, determine the pfn of the page that - * represents the start of the section it is within. Note that we have to + * Given an address within the vmemmap, determine the page that + * represents the start of the subsection it is within. Note that we have to * do this by hand as the proffered address may not be correctly aligned. * Subtraction of non-aligned pointers produces undefined results. */ -static unsigned long __meminit vmemmap_section_start(unsigned long page) +static struct page * __meminit vmemmap_subsection_start(unsigned long vmemmap_addr) { - unsigned long offset = page - ((unsigned long)(vmemmap)); + unsigned long start_pfn; + unsigned long offset = vmemmap_addr - ((unsigned long)(vmemmap)); /* Return the pfn of the start of the section. */ - return (offset / sizeof(struct page)) & PAGE_SECTION_MASK; + start_pfn = (offset / sizeof(struct page)) & PAGE_SUBSECTION_MASK; + return pfn_to_page(start_pfn); } /* - * Check if this vmemmap page is already initialised. If any section - * which overlaps this vmemmap page is initialised then this page is - * initialised already. + * Since memory is added in sub-section chunks, before creating a new vmemmap + * mapping, the kernel should check whether there is an existing memmap mapping + * covering the new subsection added. This is needed because kernel can map + * vmemmap area using 16MB pages which will cover a memory range of 16G. Such + * a range covers multiple subsections (2M) + * + * If any subsection in the 16G range mapped by vmemmap is valid we consider the + * vmemmap populated (There is a page table entry already present). We can't do + * a page table lookup here because with the hash translation we don't keep + * vmemmap details in linux page table. */ -static int __meminit vmemmap_populated(unsigned long start, int page_size) +static int __meminit vmemmap_populated(unsigned long vmemmap_addr, int vmemmap_map_size) { - unsigned long end = start + page_size; - start = (unsigned long)(pfn_to_page(vmemmap_section_start(start))); + struct page *start; + unsigned long vmemmap_end = vmemmap_addr + vmemmap_map_size; + start = vmemmap_subsection_start(vmemmap_addr); - for (; start < end; start += (PAGES_PER_SECTION * sizeof(struct page))) - if (pfn_valid(page_to_pfn((struct page *)start))) + for (; (unsigned long)start < vmemmap_end; start += PAGES_PER_SUBSECTION) + /* + * pfn valid check here is intended to really check + * whether we have any subsection already initialized + * in this range. + */ + if (pfn_valid(page_to_pfn(start))) return 1; return 0; @@ -201,6 +211,12 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, void *p = NULL; int rc; + /* + * This vmemmap range is backing different subsections. If any + * of that subsection is marked valid, that means we already + * have initialized a page table covering this range and hence + * the vmemmap range is populated. + */ if (vmemmap_populated(start, page_size)) continue; @@ -290,9 +306,10 @@ void __ref vmemmap_free(unsigned long start, unsigned long end, struct page *page; /* - * the section has already be marked as invalid, so - * vmemmap_populated() true means some other sections still - * in this page, so skip it. + * We have already marked the subsection we are trying to remove + * invalid. So if we want to remove the vmemmap range, we + * need to make sure there is no subsection marked valid + * in this range. */ if (vmemmap_populated(start, page_size)) continue; diff --git a/arch/powerpc/mm/ioremap_32.c b/arch/powerpc/mm/ioremap_32.c index f36121f25243..743e11384dea 100644 --- a/arch/powerpc/mm/ioremap_32.c +++ b/arch/powerpc/mm/ioremap_32.c @@ -68,6 +68,7 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *call /* * Should check if it is a candidate for a BAT mapping */ + pr_warn("ioremap() called early from %pS. Use early_ioremap() instead\n", caller); err = early_ioremap_range(ioremap_bot - size, p, size, prot); if (err) diff --git a/arch/powerpc/mm/ioremap_64.c b/arch/powerpc/mm/ioremap_64.c index fd29e51700cd..50a99d9684f7 100644 --- a/arch/powerpc/mm/ioremap_64.c +++ b/arch/powerpc/mm/ioremap_64.c @@ -81,6 +81,8 @@ void __iomem *__ioremap_caller(phys_addr_t addr, unsigned long size, if (slab_is_available()) return do_ioremap(paligned, offset, size, prot, caller); + pr_warn("ioremap() called early from %pS. Use early_ioremap() instead\n", caller); + err = early_ioremap_range(ioremap_bot, paligned, size, prot); if (err) return NULL; diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index be941d382c8d..ad299e72ec30 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -31,6 +31,7 @@ #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/memremap.h> +#include <linux/dma-direct.h> #include <asm/pgalloc.h> #include <asm/prom.h> @@ -104,6 +105,27 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end) return -ENODEV; } +#define FLUSH_CHUNK_SIZE SZ_1G +/** + * flush_dcache_range_chunked(): Write any modified data cache blocks out to + * memory and invalidate them, in chunks of up to FLUSH_CHUNK_SIZE + * Does not invalidate the corresponding instruction cache blocks. + * + * @start: the start address + * @stop: the stop address (exclusive) + * @chunk: the max size of the chunks + */ +static void flush_dcache_range_chunked(unsigned long start, unsigned long stop, + unsigned long chunk) +{ + unsigned long i; + + for (i = start; i < stop; i += chunk) { + flush_dcache_range(i, min(stop, start + chunk)); + cond_resched(); + } +} + int __ref arch_add_memory(int nid, u64 start, u64 size, struct mhp_restrictions *restrictions) { @@ -120,7 +142,6 @@ int __ref arch_add_memory(int nid, u64 start, u64 size, start, start + size, rc); return -EFAULT; } - flush_dcache_range(start, start + size); return __add_pages(nid, start_pfn, nr_pages, restrictions); } @@ -137,7 +158,8 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size, /* Remove htab bolted mappings for this section of memory */ start = (unsigned long)__va(start); - flush_dcache_range(start, start + size); + flush_dcache_range_chunked(start, start + size, FLUSH_CHUNK_SIZE); + ret = remove_section_mapping(start, start + size); WARN_ON_ONCE(ret); @@ -201,10 +223,10 @@ static int __init mark_nonram_nosave(void) * everything else. GFP_DMA32 page allocations automatically fall back to * ZONE_DMA. * - * By using 31-bit unconditionally, we can exploit ARCH_ZONE_DMA_BITS to - * inform the generic DMA mapping code. 32-bit only devices (if not handled - * by an IOMMU anyway) will take a first dip into ZONE_NORMAL and get - * otherwise served by ZONE_DMA. + * By using 31-bit unconditionally, we can exploit zone_dma_bits to inform the + * generic DMA mapping code. 32-bit only devices (if not handled by an IOMMU + * anyway) will take a first dip into ZONE_NORMAL and get otherwise served by + * ZONE_DMA. */ static unsigned long max_zone_pfns[MAX_NR_ZONES]; @@ -216,15 +238,13 @@ void __init paging_init(void) unsigned long long total_ram = memblock_phys_mem_size(); phys_addr_t top_of_ram = memblock_end_of_DRAM(); -#ifdef CONFIG_PPC32 - unsigned long v = __fix_to_virt(__end_of_fixed_addresses - 1); - unsigned long end = __fix_to_virt(FIX_HOLE); +#ifdef CONFIG_HIGHMEM + unsigned long v = __fix_to_virt(FIX_KMAP_END); + unsigned long end = __fix_to_virt(FIX_KMAP_BEGIN); for (; v < end; v += PAGE_SIZE) map_kernel_page(v, 0, __pgprot(0)); /* XXX gross */ -#endif -#ifdef CONFIG_HIGHMEM map_kernel_page(PKMAP_BASE, 0, __pgprot(0)); /* XXX gross */ pkmap_page_table = virt_to_kpte(PKMAP_BASE); @@ -237,9 +257,18 @@ void __init paging_init(void) printk(KERN_DEBUG "Memory hole size: %ldMB\n", (long int)((top_of_ram - total_ram) >> 20)); + /* + * Allow 30-bit DMA for very limited Broadcom wifi chips on many + * powerbooks. + */ + if (IS_ENABLED(CONFIG_PPC32)) + zone_dma_bits = 30; + else + zone_dma_bits = 31; + #ifdef CONFIG_ZONE_DMA max_zone_pfns[ZONE_DMA] = min(max_low_pfn, - 1UL << (ARCH_ZONE_DMA_BITS - PAGE_SHIFT)); + 1UL << (zone_dma_bits - PAGE_SHIFT)); #endif max_zone_pfns[ZONE_NORMAL] = max_low_pfn; #ifdef CONFIG_HIGHMEM @@ -318,6 +347,120 @@ void free_initmem(void) free_initmem_default(POISON_FREE_INITMEM); } +/** + * flush_coherent_icache() - if a CPU has a coherent icache, flush it + * @addr: The base address to use (can be any valid address, the whole cache will be flushed) + * Return true if the cache was flushed, false otherwise + */ +static inline bool flush_coherent_icache(unsigned long addr) +{ + /* + * For a snooping icache, we still need a dummy icbi to purge all the + * prefetched instructions from the ifetch buffers. We also need a sync + * before the icbi to order the the actual stores to memory that might + * have modified instructions with the icbi. + */ + if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) { + mb(); /* sync */ + icbi((void *)addr); + mb(); /* sync */ + isync(); + return true; + } + + return false; +} + +/** + * invalidate_icache_range() - Flush the icache by issuing icbi across an address range + * @start: the start address + * @stop: the stop address (exclusive) + */ +static void invalidate_icache_range(unsigned long start, unsigned long stop) +{ + unsigned long shift = l1_icache_shift(); + unsigned long bytes = l1_icache_bytes(); + char *addr = (char *)(start & ~(bytes - 1)); + unsigned long size = stop - (unsigned long)addr + (bytes - 1); + unsigned long i; + + for (i = 0; i < size >> shift; i++, addr += bytes) + icbi(addr); + + mb(); /* sync */ + isync(); +} + +/** + * flush_icache_range: Write any modified data cache blocks out to memory + * and invalidate the corresponding blocks in the instruction cache + * + * Generic code will call this after writing memory, before executing from it. + * + * @start: the start address + * @stop: the stop address (exclusive) + */ +void flush_icache_range(unsigned long start, unsigned long stop) +{ + if (flush_coherent_icache(start)) + return; + + clean_dcache_range(start, stop); + + if (IS_ENABLED(CONFIG_44x)) { + /* + * Flash invalidate on 44x because we are passed kmapped + * addresses and this doesn't work for userspace pages due to + * the virtually tagged icache. + */ + iccci((void *)start); + mb(); /* sync */ + isync(); + } else + invalidate_icache_range(start, stop); +} +EXPORT_SYMBOL(flush_icache_range); + +#if !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC64) +/** + * flush_dcache_icache_phys() - Flush a page by it's physical address + * @physaddr: the physical address of the page + */ +static void flush_dcache_icache_phys(unsigned long physaddr) +{ + unsigned long bytes = l1_dcache_bytes(); + unsigned long nb = PAGE_SIZE / bytes; + unsigned long addr = physaddr & PAGE_MASK; + unsigned long msr, msr0; + unsigned long loop1 = addr, loop2 = addr; + + msr0 = mfmsr(); + msr = msr0 & ~MSR_DR; + /* + * This must remain as ASM to prevent potential memory accesses + * while the data MMU is disabled + */ + asm volatile( + " mtctr %2;\n" + " mtmsr %3;\n" + " isync;\n" + "0: dcbst 0, %0;\n" + " addi %0, %0, %4;\n" + " bdnz 0b;\n" + " sync;\n" + " mtctr %2;\n" + "1: icbi 0, %1;\n" + " addi %1, %1, %4;\n" + " bdnz 1b;\n" + " sync;\n" + " mtmsr %5;\n" + " isync;\n" + : "+&r" (loop1), "+&r" (loop2) + : "r" (nb), "r" (msr), "i" (bytes), "r" (msr0) + : "ctr", "memory"); +} +#endif // !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC64) + /* * This is called when a page has been modified by the kernel. * It just marks the page as not i-cache clean. We do the i-cache @@ -350,12 +493,46 @@ void flush_dcache_icache_page(struct page *page) __flush_dcache_icache(start); kunmap_atomic(start); } else { - __flush_dcache_icache_phys(page_to_pfn(page) << PAGE_SHIFT); + unsigned long addr = page_to_pfn(page) << PAGE_SHIFT; + + if (flush_coherent_icache(addr)) + return; + flush_dcache_icache_phys(addr); } #endif } EXPORT_SYMBOL(flush_dcache_icache_page); +/** + * __flush_dcache_icache(): Flush a particular page from the data cache to RAM. + * Note: this is necessary because the instruction cache does *not* + * snoop from the data cache. + * + * @page: the address of the page to flush + */ +void __flush_dcache_icache(void *p) +{ + unsigned long addr = (unsigned long)p; + + if (flush_coherent_icache(addr)) + return; + + clean_dcache_range(addr, addr + PAGE_SIZE); + + /* + * We don't flush the icache on 44x. Those have a virtual icache and we + * don't have access to the virtual address here (it's not the page + * vaddr but where it's mapped in user space). The flushing of the + * icache on these is handled elsewhere, when a change in the address + * space occurs, before returning to user space. + */ + + if (cpu_has_feature(MMU_FTR_TYPE_44x)) + return; + + invalidate_icache_range(addr, addr + PAGE_SIZE); +} + void clear_user_page(void *page, unsigned long vaddr, struct page *pg) { clear_page(page); diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index c750ac9ec713..8e99649c24fc 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -139,10 +139,21 @@ extern unsigned long calc_cam_sz(unsigned long ram, unsigned long virt, extern void adjust_total_lowmem(void); extern int switch_to_as1(void); extern void restore_to_as0(int esel, int offset, void *dt_ptr, int bootcpu); +void create_kaslr_tlb_entry(int entry, unsigned long virt, phys_addr_t phys); +void reloc_kernel_entry(void *fdt, int addr); +extern int is_second_reloc; #endif extern void loadcam_entry(unsigned int index); extern void loadcam_multi(int first_idx, int num, int tmp_idx); +#ifdef CONFIG_RANDOMIZE_BASE +void kaslr_early_init(void *dt_ptr, phys_addr_t size); +void kaslr_late_init(void); +#else +static inline void kaslr_early_init(void *dt_ptr, phys_addr_t size) {} +static inline void kaslr_late_init(void) {} +#endif + struct tlbcam { u32 MAS0; u32 MAS1; diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index 4a06cb342da2..090af2d2d3e4 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -103,6 +103,19 @@ static void mmu_patch_addis(s32 *site, long simm) patch_instruction_site(site, instr); } +void __init mmu_mapin_ram_chunk(unsigned long offset, unsigned long top, pgprot_t prot) +{ + unsigned long s = offset; + unsigned long v = PAGE_OFFSET + s; + phys_addr_t p = memstart_addr + s; + + for (; s < top; s += PAGE_SIZE) { + map_kernel_page(v, p, prot); + v += PAGE_SIZE; + p += PAGE_SIZE; + } +} + unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) { unsigned long mapped; @@ -115,10 +128,20 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT)) mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, 0); } else { + unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M); + mapped = top & ~(LARGE_PAGE_SIZE_8M - 1); if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT)) - mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, - _ALIGN(__pa(_einittext), 8 << 20)); + mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, einittext8); + + /* + * Populate page tables to: + * - have them appear in /sys/kernel/debug/kernel_page_tables + * - allow the BDI to find the pages when they are not PINNED + */ + mmu_mapin_ram_chunk(0, einittext8, PAGE_KERNEL_X); + mmu_mapin_ram_chunk(einittext8, mapped, PAGE_KERNEL); + mmu_mapin_immr(); } mmu_patch_cmp_limit(&patch__dtlbmiss_linmem_top, mapped); @@ -144,18 +167,41 @@ void mmu_mark_initmem_nx(void) if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) && CONFIG_ETEXT_SHIFT < 23) mmu_patch_addis(&patch__itlbmiss_linmem_top8, -((long)_etext & ~(LARGE_PAGE_SIZE_8M - 1))); - if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT)) + if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT)) { + unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M); + unsigned long etext8 = ALIGN(__pa(_etext), SZ_8M); + unsigned long etext = __pa(_etext); + mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, __pa(_etext)); + + /* Update page tables for PTDUMP and BDI */ + mmu_mapin_ram_chunk(0, einittext8, __pgprot(0)); + if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) { + mmu_mapin_ram_chunk(0, etext, PAGE_KERNEL_TEXT); + mmu_mapin_ram_chunk(etext, einittext8, PAGE_KERNEL); + } else { + mmu_mapin_ram_chunk(0, etext8, PAGE_KERNEL_TEXT); + mmu_mapin_ram_chunk(etext8, einittext8, PAGE_KERNEL); + } + } } #ifdef CONFIG_STRICT_KERNEL_RWX void mmu_mark_rodata_ro(void) { + unsigned long sinittext = __pa(_sinittext); + unsigned long etext = __pa(_etext); + if (CONFIG_DATA_SHIFT < 23) mmu_patch_addis(&patch__dtlbmiss_romem_top8, -__pa(((unsigned long)_sinittext) & ~(LARGE_PAGE_SIZE_8M - 1))); mmu_patch_addis(&patch__dtlbmiss_romem_top, -__pa(_sinittext)); + + /* Update page tables for PTDUMP and BDI */ + mmu_mapin_ram_chunk(0, sinittext, __pgprot(0)); + mmu_mapin_ram_chunk(0, etext, PAGE_KERNEL_ROX); + mmu_mapin_ram_chunk(etext, sinittext, PAGE_KERNEL_RO); } #endif diff --git a/arch/powerpc/mm/nohash/Makefile b/arch/powerpc/mm/nohash/Makefile index 33b6f6f29d3f..0424f6ce5bd8 100644 --- a/arch/powerpc/mm/nohash/Makefile +++ b/arch/powerpc/mm/nohash/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_40x) += 40x.o obj-$(CONFIG_44x) += 44x.o obj-$(CONFIG_PPC_8xx) += 8xx.o obj-$(CONFIG_PPC_FSL_BOOK3E) += fsl_booke.o +obj-$(CONFIG_RANDOMIZE_BASE) += kaslr_booke.o ifdef CONFIG_HUGETLB_PAGE obj-$(CONFIG_PPC_FSL_BOOK3E) += book3e_hugetlbpage.o endif diff --git a/arch/powerpc/mm/nohash/fsl_booke.c b/arch/powerpc/mm/nohash/fsl_booke.c index 556e3cd52a35..b4eb06ceb189 100644 --- a/arch/powerpc/mm/nohash/fsl_booke.c +++ b/arch/powerpc/mm/nohash/fsl_booke.c @@ -263,11 +263,13 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base, int __initdata is_second_reloc; notrace void __init relocate_init(u64 dt_ptr, phys_addr_t start) { - unsigned long base = KERNELBASE; + unsigned long base = kernstart_virt_addr; + phys_addr_t size; kernstart_addr = start; if (is_second_reloc) { virt_phys_offset = PAGE_OFFSET - memstart_addr; + kaslr_late_init(); return; } @@ -291,7 +293,7 @@ notrace void __init relocate_init(u64 dt_ptr, phys_addr_t start) start &= ~0x3ffffff; base &= ~0x3ffffff; virt_phys_offset = base - start; - early_get_first_memblock_info(__va(dt_ptr), NULL); + early_get_first_memblock_info(__va(dt_ptr), &size); /* * We now get the memstart_addr, then we should check if this * address is the same as what the PAGE_OFFSET map to now. If @@ -316,6 +318,8 @@ notrace void __init relocate_init(u64 dt_ptr, phys_addr_t start) /* We should never reach here */ panic("Relocation error"); } + + kaslr_early_init(__va(dt_ptr), size); } #endif #endif diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c b/arch/powerpc/mm/nohash/kaslr_booke.c new file mode 100644 index 000000000000..4a75f2d9bf0e --- /dev/null +++ b/arch/powerpc/mm/nohash/kaslr_booke.c @@ -0,0 +1,401 @@ +// SPDX-License-Identifier: GPL-2.0-only +// +// Copyright (C) 2019 Jason Yan <yanaijie@huawei.com> + +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/stddef.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/memblock.h> +#include <linux/libfdt.h> +#include <linux/crash_core.h> +#include <asm/pgalloc.h> +#include <asm/prom.h> +#include <asm/kdump.h> +#include <mm/mmu_decl.h> +#include <generated/compile.h> +#include <generated/utsrelease.h> + +struct regions { + unsigned long pa_start; + unsigned long pa_end; + unsigned long kernel_size; + unsigned long dtb_start; + unsigned long dtb_end; + unsigned long initrd_start; + unsigned long initrd_end; + unsigned long crash_start; + unsigned long crash_end; + int reserved_mem; + int reserved_mem_addr_cells; + int reserved_mem_size_cells; +}; + +/* Simplified build-specific string for starting entropy. */ +static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@" + LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION; + +struct regions __initdata regions; + +static __init void kaslr_get_cmdline(void *fdt) +{ + int node = fdt_path_offset(fdt, "/chosen"); + + early_init_dt_scan_chosen(node, "chosen", 1, boot_command_line); +} + +static unsigned long __init rotate_xor(unsigned long hash, const void *area, + size_t size) +{ + size_t i; + const unsigned long *ptr = area; + + for (i = 0; i < size / sizeof(hash); i++) { + /* Rotate by odd number of bits and XOR. */ + hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7); + hash ^= ptr[i]; + } + + return hash; +} + +/* Attempt to create a simple starting entropy. This can make it defferent for + * every build but it is still not enough. Stronger entropy should + * be added to make it change for every boot. + */ +static unsigned long __init get_boot_seed(void *fdt) +{ + unsigned long hash = 0; + + hash = rotate_xor(hash, build_str, sizeof(build_str)); + hash = rotate_xor(hash, fdt, fdt_totalsize(fdt)); + + return hash; +} + +static __init u64 get_kaslr_seed(void *fdt) +{ + int node, len; + fdt64_t *prop; + u64 ret; + + node = fdt_path_offset(fdt, "/chosen"); + if (node < 0) + return 0; + + prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len); + if (!prop || len != sizeof(u64)) + return 0; + + ret = fdt64_to_cpu(*prop); + *prop = 0; + return ret; +} + +static __init bool regions_overlap(u32 s1, u32 e1, u32 s2, u32 e2) +{ + return e1 >= s2 && e2 >= s1; +} + +static __init bool overlaps_reserved_region(const void *fdt, u32 start, + u32 end) +{ + int subnode, len, i; + u64 base, size; + + /* check for overlap with /memreserve/ entries */ + for (i = 0; i < fdt_num_mem_rsv(fdt); i++) { + if (fdt_get_mem_rsv(fdt, i, &base, &size) < 0) + continue; + if (regions_overlap(start, end, base, base + size)) + return true; + } + + if (regions.reserved_mem < 0) + return false; + + /* check for overlap with static reservations in /reserved-memory */ + for (subnode = fdt_first_subnode(fdt, regions.reserved_mem); + subnode >= 0; + subnode = fdt_next_subnode(fdt, subnode)) { + const fdt32_t *reg; + u64 rsv_end; + + len = 0; + reg = fdt_getprop(fdt, subnode, "reg", &len); + while (len >= (regions.reserved_mem_addr_cells + + regions.reserved_mem_size_cells)) { + base = fdt32_to_cpu(reg[0]); + if (regions.reserved_mem_addr_cells == 2) + base = (base << 32) | fdt32_to_cpu(reg[1]); + + reg += regions.reserved_mem_addr_cells; + len -= 4 * regions.reserved_mem_addr_cells; + + size = fdt32_to_cpu(reg[0]); + if (regions.reserved_mem_size_cells == 2) + size = (size << 32) | fdt32_to_cpu(reg[1]); + + reg += regions.reserved_mem_size_cells; + len -= 4 * regions.reserved_mem_size_cells; + + if (base >= regions.pa_end) + continue; + + rsv_end = min(base + size, (u64)U32_MAX); + + if (regions_overlap(start, end, base, rsv_end)) + return true; + } + } + return false; +} + +static __init bool overlaps_region(const void *fdt, u32 start, + u32 end) +{ + if (regions_overlap(start, end, __pa(_stext), __pa(_end))) + return true; + + if (regions_overlap(start, end, regions.dtb_start, + regions.dtb_end)) + return true; + + if (regions_overlap(start, end, regions.initrd_start, + regions.initrd_end)) + return true; + + if (regions_overlap(start, end, regions.crash_start, + regions.crash_end)) + return true; + + return overlaps_reserved_region(fdt, start, end); +} + +static void __init get_crash_kernel(void *fdt, unsigned long size) +{ +#ifdef CONFIG_CRASH_CORE + unsigned long long crash_size, crash_base; + int ret; + + ret = parse_crashkernel(boot_command_line, size, &crash_size, + &crash_base); + if (ret != 0 || crash_size == 0) + return; + if (crash_base == 0) + crash_base = KDUMP_KERNELBASE; + + regions.crash_start = (unsigned long)crash_base; + regions.crash_end = (unsigned long)(crash_base + crash_size); + + pr_debug("crash_base=0x%llx crash_size=0x%llx\n", crash_base, crash_size); +#endif +} + +static void __init get_initrd_range(void *fdt) +{ + u64 start, end; + int node, len; + const __be32 *prop; + + node = fdt_path_offset(fdt, "/chosen"); + if (node < 0) + return; + + prop = fdt_getprop(fdt, node, "linux,initrd-start", &len); + if (!prop) + return; + start = of_read_number(prop, len / 4); + + prop = fdt_getprop(fdt, node, "linux,initrd-end", &len); + if (!prop) + return; + end = of_read_number(prop, len / 4); + + regions.initrd_start = (unsigned long)start; + regions.initrd_end = (unsigned long)end; + + pr_debug("initrd_start=0x%llx initrd_end=0x%llx\n", start, end); +} + +static __init unsigned long get_usable_address(const void *fdt, + unsigned long start, + unsigned long offset) +{ + unsigned long pa; + unsigned long pa_end; + + for (pa = offset; (long)pa > (long)start; pa -= SZ_16K) { + pa_end = pa + regions.kernel_size; + if (overlaps_region(fdt, pa, pa_end)) + continue; + + return pa; + } + return 0; +} + +static __init void get_cell_sizes(const void *fdt, int node, int *addr_cells, + int *size_cells) +{ + const int *prop; + int len; + + /* + * Retrieve the #address-cells and #size-cells properties + * from the 'node', or use the default if not provided. + */ + *addr_cells = *size_cells = 1; + + prop = fdt_getprop(fdt, node, "#address-cells", &len); + if (len == 4) + *addr_cells = fdt32_to_cpu(*prop); + prop = fdt_getprop(fdt, node, "#size-cells", &len); + if (len == 4) + *size_cells = fdt32_to_cpu(*prop); +} + +static unsigned long __init kaslr_legal_offset(void *dt_ptr, unsigned long index, + unsigned long offset) +{ + unsigned long koffset = 0; + unsigned long start; + + while ((long)index >= 0) { + offset = memstart_addr + index * SZ_64M + offset; + start = memstart_addr + index * SZ_64M; + koffset = get_usable_address(dt_ptr, start, offset); + if (koffset) + break; + index--; + } + + if (koffset != 0) + koffset -= memstart_addr; + + return koffset; +} + +static inline __init bool kaslr_disabled(void) +{ + return strstr(boot_command_line, "nokaslr") != NULL; +} + +static unsigned long __init kaslr_choose_location(void *dt_ptr, phys_addr_t size, + unsigned long kernel_sz) +{ + unsigned long offset, random; + unsigned long ram, linear_sz; + u64 seed; + unsigned long index; + + kaslr_get_cmdline(dt_ptr); + if (kaslr_disabled()) + return 0; + + random = get_boot_seed(dt_ptr); + + seed = get_tb() << 32; + seed ^= get_tb(); + random = rotate_xor(random, &seed, sizeof(seed)); + + /* + * Retrieve (and wipe) the seed from the FDT + */ + seed = get_kaslr_seed(dt_ptr); + if (seed) + random = rotate_xor(random, &seed, sizeof(seed)); + else + pr_warn("KASLR: No safe seed for randomizing the kernel base.\n"); + + ram = min_t(phys_addr_t, __max_low_memory, size); + ram = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM, true); + linear_sz = min_t(unsigned long, ram, SZ_512M); + + /* If the linear size is smaller than 64M, do not randmize */ + if (linear_sz < SZ_64M) + return 0; + + /* check for a reserved-memory node and record its cell sizes */ + regions.reserved_mem = fdt_path_offset(dt_ptr, "/reserved-memory"); + if (regions.reserved_mem >= 0) + get_cell_sizes(dt_ptr, regions.reserved_mem, + ®ions.reserved_mem_addr_cells, + ®ions.reserved_mem_size_cells); + + regions.pa_start = memstart_addr; + regions.pa_end = memstart_addr + linear_sz; + regions.dtb_start = __pa(dt_ptr); + regions.dtb_end = __pa(dt_ptr) + fdt_totalsize(dt_ptr); + regions.kernel_size = kernel_sz; + + get_initrd_range(dt_ptr); + get_crash_kernel(dt_ptr, ram); + + /* + * Decide which 64M we want to start + * Only use the low 8 bits of the random seed + */ + index = random & 0xFF; + index %= linear_sz / SZ_64M; + + /* Decide offset inside 64M */ + offset = random % (SZ_64M - kernel_sz); + offset = round_down(offset, SZ_16K); + + return kaslr_legal_offset(dt_ptr, index, offset); +} + +/* + * To see if we need to relocate the kernel to a random offset + * void *dt_ptr - address of the device tree + * phys_addr_t size - size of the first memory block + */ +notrace void __init kaslr_early_init(void *dt_ptr, phys_addr_t size) +{ + unsigned long tlb_virt; + phys_addr_t tlb_phys; + unsigned long offset; + unsigned long kernel_sz; + + kernel_sz = (unsigned long)_end - (unsigned long)_stext; + + offset = kaslr_choose_location(dt_ptr, size, kernel_sz); + if (offset == 0) + return; + + kernstart_virt_addr += offset; + kernstart_addr += offset; + + is_second_reloc = 1; + + if (offset >= SZ_64M) { + tlb_virt = round_down(kernstart_virt_addr, SZ_64M); + tlb_phys = round_down(kernstart_addr, SZ_64M); + + /* Create kernel map to relocate in */ + create_kaslr_tlb_entry(1, tlb_virt, tlb_phys); + } + + /* Copy the kernel to it's new location and run */ + memcpy((void *)kernstart_virt_addr, (void *)_stext, kernel_sz); + flush_icache_range(kernstart_virt_addr, kernstart_virt_addr + kernel_sz); + + reloc_kernel_entry(dt_ptr, kernstart_virt_addr); +} + +void __init kaslr_late_init(void) +{ + /* If randomized, clear the original kernel */ + if (kernstart_virt_addr != KERNELBASE) { + unsigned long kernel_sz; + + kernel_sz = (unsigned long)_end - kernstart_virt_addr; + memzero_explicit((void *)KERNELBASE, kernel_sz); + } +} diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 8ec5dfb65b2e..73b84166d06a 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -117,10 +117,7 @@ void __init mapin_ram(void) if (base >= top) continue; base = mmu_mapin_ram(base, top); - if (IS_ENABLED(CONFIG_BDI_SWITCH)) - __mapin_ram_chunk(reg->base, top); - else - __mapin_ram_chunk(base, top); + __mapin_ram_chunk(base, top); } } |