summaryrefslogtreecommitdiff
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/book3s32/mmu.c11
-rw-r--r--arch/powerpc/mm/book3s64/hash_native.c38
-rw-r--r--arch/powerpc/mm/book3s64/hash_utils.c19
-rw-r--r--arch/powerpc/mm/book3s64/pkeys.c10
-rw-r--r--arch/powerpc/mm/book3s64/radix_pgtable.c1
-rw-r--r--arch/powerpc/mm/book3s64/radix_tlb.c80
-rw-r--r--arch/powerpc/mm/dma-noncoherent.c8
-rw-r--r--arch/powerpc/mm/fault.c6
-rw-r--r--arch/powerpc/mm/init-common.c7
-rw-r--r--arch/powerpc/mm/init_32.c5
-rw-r--r--arch/powerpc/mm/init_64.c59
-rw-r--r--arch/powerpc/mm/ioremap_32.c1
-rw-r--r--arch/powerpc/mm/ioremap_64.c2
-rw-r--r--arch/powerpc/mm/mem.c203
-rw-r--r--arch/powerpc/mm/mmu_decl.h11
-rw-r--r--arch/powerpc/mm/nohash/8xx.c52
-rw-r--r--arch/powerpc/mm/nohash/Makefile1
-rw-r--r--arch/powerpc/mm/nohash/fsl_booke.c8
-rw-r--r--arch/powerpc/mm/nohash/kaslr_booke.c401
-rw-r--r--arch/powerpc/mm/pgtable_32.c5
20 files changed, 784 insertions, 144 deletions
diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c
index 84d5fab94f8f..69b2419accef 100644
--- a/arch/powerpc/mm/book3s32/mmu.c
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@ -251,9 +251,18 @@ void __init setbat(int index, unsigned long virt, phys_addr_t phys,
{
unsigned int bl;
int wimgxpp;
- struct ppc_bat *bat = BATS[index];
+ struct ppc_bat *bat;
unsigned long flags = pgprot_val(prot);
+ if (index == -1)
+ index = find_free_bat();
+ if (index == -1) {
+ pr_err("%s: no BAT available for mapping 0x%llx\n", __func__,
+ (unsigned long long)phys);
+ return;
+ }
+ bat = BATS[index];
+
if ((flags & _PAGE_NO_CACHE) ||
(cpu_has_feature(CPU_FTR_NEED_COHERENT) == 0))
flags &= ~_PAGE_COHERENT;
diff --git a/arch/powerpc/mm/book3s64/hash_native.c b/arch/powerpc/mm/book3s64/hash_native.c
index 523e42eb11da..d2d8237ea9d5 100644
--- a/arch/powerpc/mm/book3s64/hash_native.c
+++ b/arch/powerpc/mm/book3s64/hash_native.c
@@ -482,19 +482,12 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
return ret;
}
-static long native_hpte_find(unsigned long vpn, int psize, int ssize)
+static long __native_hpte_find(unsigned long want_v, unsigned long slot)
{
struct hash_pte *hptep;
- unsigned long hash;
+ unsigned long hpte_v;
unsigned long i;
- long slot;
- unsigned long want_v, hpte_v;
- hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize);
- want_v = hpte_encode_avpn(vpn, psize, ssize);
-
- /* Bolted mappings are only ever in the primary group */
- slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
for (i = 0; i < HPTES_PER_GROUP; i++) {
hptep = htab_address + slot;
@@ -508,6 +501,33 @@ static long native_hpte_find(unsigned long vpn, int psize, int ssize)
return -1;
}
+static long native_hpte_find(unsigned long vpn, int psize, int ssize)
+{
+ unsigned long hpte_group;
+ unsigned long want_v;
+ unsigned long hash;
+ long slot;
+
+ hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize);
+ want_v = hpte_encode_avpn(vpn, psize, ssize);
+
+ /*
+ * We try to keep bolted entries always in primary hash
+ * But in some case we can find them in secondary too.
+ */
+ hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+ slot = __native_hpte_find(want_v, hpte_group);
+ if (slot < 0) {
+ /* Try in secondary */
+ hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP;
+ slot = __native_hpte_find(want_v, hpte_group);
+ if (slot < 0)
+ return -1;
+ }
+
+ return slot;
+}
+
/*
* Update the page protection bits. Intended to be used to create
* guard pages for kernel data structures on pages which are bolted
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index 6c123760164e..b30435c7d804 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -263,6 +263,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
unsigned long vsid = get_kernel_vsid(vaddr, ssize);
unsigned long vpn = hpt_vpn(vaddr, vsid, ssize);
unsigned long tprot = prot;
+ bool secondary_hash = false;
/*
* If we hit a bad address return error.
@@ -291,13 +292,31 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
BUG_ON(!mmu_hash_ops.hpte_insert);
+repeat:
ret = mmu_hash_ops.hpte_insert(hpteg, vpn, paddr, tprot,
HPTE_V_BOLTED, psize, psize,
ssize);
+ if (ret == -1) {
+ /*
+ * Try to to keep bolted entries in primary.
+ * Remove non bolted entries and try insert again
+ */
+ ret = mmu_hash_ops.hpte_remove(hpteg);
+ if (ret != -1)
+ ret = mmu_hash_ops.hpte_insert(hpteg, vpn, paddr, tprot,
+ HPTE_V_BOLTED, psize, psize,
+ ssize);
+ if (ret == -1 && !secondary_hash) {
+ secondary_hash = true;
+ hpteg = ((~hash & htab_hash_mask) * HPTES_PER_GROUP);
+ goto repeat;
+ }
+ }
if (ret < 0)
break;
+ cond_resched();
#ifdef CONFIG_DEBUG_PAGEALLOC
if (debug_pagealloc_enabled() &&
(paddr >> PAGE_SHIFT) < linear_map_hash_count)
diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c
index ae7fca40e5b3..59e0ebbd8036 100644
--- a/arch/powerpc/mm/book3s64/pkeys.c
+++ b/arch/powerpc/mm/book3s64/pkeys.c
@@ -307,16 +307,6 @@ void thread_pkey_regs_init(struct thread_struct *thread)
write_iamr(pkey_iamr_mask);
}
-static inline bool pkey_allows_readwrite(int pkey)
-{
- int pkey_shift = pkeyshift(pkey);
-
- if (!is_pkey_enabled(pkey))
- return true;
-
- return !(read_amr() & ((AMR_RD_BIT|AMR_WR_BIT) << pkey_shift));
-}
-
int __execute_only_pkey(struct mm_struct *mm)
{
return mm->context.execute_only_pkey;
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 6ee17d09649c..974109bb85db 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -13,6 +13,7 @@
#include <linux/memblock.h>
#include <linux/of_fdt.h>
#include <linux/mm.h>
+#include <linux/hugetlb.h>
#include <linux/string_helpers.h>
#include <linux/stop_machine.h>
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index 67af871190c6..a95175c0972b 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -732,18 +732,13 @@ local:
}
preempt_enable();
}
+
void radix__flush_all_mm(struct mm_struct *mm)
{
__flush_all_mm(mm, false);
}
EXPORT_SYMBOL(radix__flush_all_mm);
-void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr)
-{
- tlb->need_flush_all = 1;
-}
-EXPORT_SYMBOL(radix__flush_tlb_pwc);
-
void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
int psize)
{
@@ -832,8 +827,7 @@ static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2;
static inline void __radix__flush_tlb_range(struct mm_struct *mm,
- unsigned long start, unsigned long end,
- bool flush_all_sizes)
+ unsigned long start, unsigned long end)
{
unsigned long pid;
@@ -879,26 +873,16 @@ is_local:
}
}
} else {
- bool hflush = flush_all_sizes;
- bool gflush = flush_all_sizes;
+ bool hflush = false;
unsigned long hstart, hend;
- unsigned long gstart, gend;
- if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
- hflush = true;
-
- if (hflush) {
+ if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
hstart = (start + PMD_SIZE - 1) & PMD_MASK;
hend = end & PMD_MASK;
if (hstart == hend)
hflush = false;
- }
-
- if (gflush) {
- gstart = (start + PUD_SIZE - 1) & PUD_MASK;
- gend = end & PUD_MASK;
- if (gstart == gend)
- gflush = false;
+ else
+ hflush = true;
}
if (local) {
@@ -907,9 +891,6 @@ is_local:
if (hflush)
__tlbiel_va_range(hstart, hend, pid,
PMD_SIZE, MMU_PAGE_2M);
- if (gflush)
- __tlbiel_va_range(gstart, gend, pid,
- PUD_SIZE, MMU_PAGE_1G);
asm volatile("ptesync": : :"memory");
} else if (cputlb_use_tlbie()) {
asm volatile("ptesync": : :"memory");
@@ -917,10 +898,6 @@ is_local:
if (hflush)
__tlbie_va_range(hstart, hend, pid,
PMD_SIZE, MMU_PAGE_2M);
- if (gflush)
- __tlbie_va_range(gstart, gend, pid,
- PUD_SIZE, MMU_PAGE_1G);
-
asm volatile("eieio; tlbsync; ptesync": : :"memory");
} else {
_tlbiel_va_range_multicast(mm,
@@ -928,9 +905,6 @@ is_local:
if (hflush)
_tlbiel_va_range_multicast(mm,
hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, false);
- if (gflush)
- _tlbiel_va_range_multicast(mm,
- gstart, gend, pid, PUD_SIZE, MMU_PAGE_1G, false);
}
}
preempt_enable();
@@ -945,7 +919,7 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
return radix__flush_hugetlb_tlb_range(vma, start, end);
#endif
- __radix__flush_tlb_range(vma->vm_mm, start, end, false);
+ __radix__flush_tlb_range(vma->vm_mm, start, end);
}
EXPORT_SYMBOL(radix__flush_tlb_range);
@@ -1021,53 +995,19 @@ void radix__tlb_flush(struct mmu_gather *tlb)
* that flushes the process table entry cache upon process teardown.
* See the comment for radix in arch_exit_mmap().
*/
- if (tlb->fullmm) {
+ if (tlb->fullmm || tlb->need_flush_all) {
__flush_all_mm(mm, true);
-#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)
- } else if (mm_tlb_flush_nested(mm)) {
- /*
- * If there is a concurrent invalidation that is clearing ptes,
- * then it's possible this invalidation will miss one of those
- * cleared ptes and miss flushing the TLB. If this invalidate
- * returns before the other one flushes TLBs, that can result
- * in it returning while there are still valid TLBs inside the
- * range to be invalidated.
- *
- * See mm/memory.c:tlb_finish_mmu() for more details.
- *
- * The solution to this is ensure the entire range is always
- * flushed here. The problem for powerpc is that the flushes
- * are page size specific, so this "forced flush" would not
- * do the right thing if there are a mix of page sizes in
- * the range to be invalidated. So use __flush_tlb_range
- * which invalidates all possible page sizes in the range.
- *
- * PWC flush probably is not be required because the core code
- * shouldn't free page tables in this path, but accounting
- * for the possibility makes us a bit more robust.
- *
- * need_flush_all is an uncommon case because page table
- * teardown should be done with exclusive locks held (but
- * after locks are dropped another invalidate could come
- * in), it could be optimized further if necessary.
- */
- if (!tlb->need_flush_all)
- __radix__flush_tlb_range(mm, start, end, true);
- else
- radix__flush_all_mm(mm);
-#endif
} else if ( (psize = radix_get_mmu_psize(page_size)) == -1) {
- if (!tlb->need_flush_all)
+ if (!tlb->freed_tables)
radix__flush_tlb_mm(mm);
else
radix__flush_all_mm(mm);
} else {
- if (!tlb->need_flush_all)
+ if (!tlb->freed_tables)
radix__flush_tlb_range_psize(mm, start, end, psize);
else
radix__flush_tlb_pwc_range_psize(mm, start, end, psize);
}
- tlb->need_flush_all = 0;
}
static __always_inline void __radix__flush_tlb_range_psize(struct mm_struct *mm,
diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c
index 2a82984356f8..5ab4f868e919 100644
--- a/arch/powerpc/mm/dma-noncoherent.c
+++ b/arch/powerpc/mm/dma-noncoherent.c
@@ -104,14 +104,14 @@ static void __dma_sync_page(phys_addr_t paddr, size_t size, int dir)
#endif
}
-void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
- size_t size, enum dma_data_direction dir)
+void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
+ enum dma_data_direction dir)
{
__dma_sync_page(paddr, size, dir);
}
-void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
- size_t size, enum dma_data_direction dir)
+void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
+ enum dma_data_direction dir)
{
__dma_sync_page(paddr, size, dir);
}
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 8432c281de92..b5047f9b5dec 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -645,6 +645,7 @@ NOKPROBE_SYMBOL(do_page_fault);
void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
{
const struct exception_table_entry *entry;
+ int is_write = page_fault_is_write(regs->dsisr);
/* Are we prepared to handle this fault? */
if ((entry = search_exception_tables(regs->nip)) != NULL) {
@@ -658,9 +659,10 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
case 0x300:
case 0x380:
case 0xe00:
- pr_alert("BUG: %s at 0x%08lx\n",
+ pr_alert("BUG: %s on %s at 0x%08lx\n",
regs->dar < PAGE_SIZE ? "Kernel NULL pointer dereference" :
- "Unable to handle kernel data access", regs->dar);
+ "Unable to handle kernel data access",
+ is_write ? "write" : "read", regs->dar);
break;
case 0x400:
case 0x480:
diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c
index a84da92920f7..42ef7a6e6098 100644
--- a/arch/powerpc/mm/init-common.c
+++ b/arch/powerpc/mm/init-common.c
@@ -21,6 +21,13 @@
#include <asm/pgtable.h>
#include <asm/kup.h>
+phys_addr_t memstart_addr __ro_after_init = (phys_addr_t)~0ull;
+EXPORT_SYMBOL_GPL(memstart_addr);
+phys_addr_t kernstart_addr __ro_after_init;
+EXPORT_SYMBOL_GPL(kernstart_addr);
+unsigned long kernstart_virt_addr __ro_after_init = KERNELBASE;
+EXPORT_SYMBOL_GPL(kernstart_virt_addr);
+
static bool disable_kuep = !IS_ENABLED(CONFIG_PPC_KUEP);
static bool disable_kuap = !IS_ENABLED(CONFIG_PPC_KUAP);
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index b04896a88d79..872df48ae41b 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -56,11 +56,6 @@
phys_addr_t total_memory;
phys_addr_t total_lowmem;
-phys_addr_t memstart_addr = (phys_addr_t)~0ull;
-EXPORT_SYMBOL(memstart_addr);
-phys_addr_t kernstart_addr;
-EXPORT_SYMBOL(kernstart_addr);
-
#ifdef CONFIG_RELOCATABLE
/* Used in __va()/__pa() */
long long virt_phys_offset;
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 4e08246acd79..4002ced3596f 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -63,38 +63,48 @@
#include <mm/mmu_decl.h>
-phys_addr_t memstart_addr = ~0;
-EXPORT_SYMBOL_GPL(memstart_addr);
-phys_addr_t kernstart_addr;
-EXPORT_SYMBOL_GPL(kernstart_addr);
-
#ifdef CONFIG_SPARSEMEM_VMEMMAP
/*
- * Given an address within the vmemmap, determine the pfn of the page that
- * represents the start of the section it is within. Note that we have to
+ * Given an address within the vmemmap, determine the page that
+ * represents the start of the subsection it is within. Note that we have to
* do this by hand as the proffered address may not be correctly aligned.
* Subtraction of non-aligned pointers produces undefined results.
*/
-static unsigned long __meminit vmemmap_section_start(unsigned long page)
+static struct page * __meminit vmemmap_subsection_start(unsigned long vmemmap_addr)
{
- unsigned long offset = page - ((unsigned long)(vmemmap));
+ unsigned long start_pfn;
+ unsigned long offset = vmemmap_addr - ((unsigned long)(vmemmap));
/* Return the pfn of the start of the section. */
- return (offset / sizeof(struct page)) & PAGE_SECTION_MASK;
+ start_pfn = (offset / sizeof(struct page)) & PAGE_SUBSECTION_MASK;
+ return pfn_to_page(start_pfn);
}
/*
- * Check if this vmemmap page is already initialised. If any section
- * which overlaps this vmemmap page is initialised then this page is
- * initialised already.
+ * Since memory is added in sub-section chunks, before creating a new vmemmap
+ * mapping, the kernel should check whether there is an existing memmap mapping
+ * covering the new subsection added. This is needed because kernel can map
+ * vmemmap area using 16MB pages which will cover a memory range of 16G. Such
+ * a range covers multiple subsections (2M)
+ *
+ * If any subsection in the 16G range mapped by vmemmap is valid we consider the
+ * vmemmap populated (There is a page table entry already present). We can't do
+ * a page table lookup here because with the hash translation we don't keep
+ * vmemmap details in linux page table.
*/
-static int __meminit vmemmap_populated(unsigned long start, int page_size)
+static int __meminit vmemmap_populated(unsigned long vmemmap_addr, int vmemmap_map_size)
{
- unsigned long end = start + page_size;
- start = (unsigned long)(pfn_to_page(vmemmap_section_start(start)));
+ struct page *start;
+ unsigned long vmemmap_end = vmemmap_addr + vmemmap_map_size;
+ start = vmemmap_subsection_start(vmemmap_addr);
- for (; start < end; start += (PAGES_PER_SECTION * sizeof(struct page)))
- if (pfn_valid(page_to_pfn((struct page *)start)))
+ for (; (unsigned long)start < vmemmap_end; start += PAGES_PER_SUBSECTION)
+ /*
+ * pfn valid check here is intended to really check
+ * whether we have any subsection already initialized
+ * in this range.
+ */
+ if (pfn_valid(page_to_pfn(start)))
return 1;
return 0;
@@ -201,6 +211,12 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
void *p = NULL;
int rc;
+ /*
+ * This vmemmap range is backing different subsections. If any
+ * of that subsection is marked valid, that means we already
+ * have initialized a page table covering this range and hence
+ * the vmemmap range is populated.
+ */
if (vmemmap_populated(start, page_size))
continue;
@@ -290,9 +306,10 @@ void __ref vmemmap_free(unsigned long start, unsigned long end,
struct page *page;
/*
- * the section has already be marked as invalid, so
- * vmemmap_populated() true means some other sections still
- * in this page, so skip it.
+ * We have already marked the subsection we are trying to remove
+ * invalid. So if we want to remove the vmemmap range, we
+ * need to make sure there is no subsection marked valid
+ * in this range.
*/
if (vmemmap_populated(start, page_size))
continue;
diff --git a/arch/powerpc/mm/ioremap_32.c b/arch/powerpc/mm/ioremap_32.c
index f36121f25243..743e11384dea 100644
--- a/arch/powerpc/mm/ioremap_32.c
+++ b/arch/powerpc/mm/ioremap_32.c
@@ -68,6 +68,7 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *call
/*
* Should check if it is a candidate for a BAT mapping
*/
+ pr_warn("ioremap() called early from %pS. Use early_ioremap() instead\n", caller);
err = early_ioremap_range(ioremap_bot - size, p, size, prot);
if (err)
diff --git a/arch/powerpc/mm/ioremap_64.c b/arch/powerpc/mm/ioremap_64.c
index fd29e51700cd..50a99d9684f7 100644
--- a/arch/powerpc/mm/ioremap_64.c
+++ b/arch/powerpc/mm/ioremap_64.c
@@ -81,6 +81,8 @@ void __iomem *__ioremap_caller(phys_addr_t addr, unsigned long size,
if (slab_is_available())
return do_ioremap(paligned, offset, size, prot, caller);
+ pr_warn("ioremap() called early from %pS. Use early_ioremap() instead\n", caller);
+
err = early_ioremap_range(ioremap_bot, paligned, size, prot);
if (err)
return NULL;
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index be941d382c8d..ad299e72ec30 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -31,6 +31,7 @@
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/memremap.h>
+#include <linux/dma-direct.h>
#include <asm/pgalloc.h>
#include <asm/prom.h>
@@ -104,6 +105,27 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end)
return -ENODEV;
}
+#define FLUSH_CHUNK_SIZE SZ_1G
+/**
+ * flush_dcache_range_chunked(): Write any modified data cache blocks out to
+ * memory and invalidate them, in chunks of up to FLUSH_CHUNK_SIZE
+ * Does not invalidate the corresponding instruction cache blocks.
+ *
+ * @start: the start address
+ * @stop: the stop address (exclusive)
+ * @chunk: the max size of the chunks
+ */
+static void flush_dcache_range_chunked(unsigned long start, unsigned long stop,
+ unsigned long chunk)
+{
+ unsigned long i;
+
+ for (i = start; i < stop; i += chunk) {
+ flush_dcache_range(i, min(stop, start + chunk));
+ cond_resched();
+ }
+}
+
int __ref arch_add_memory(int nid, u64 start, u64 size,
struct mhp_restrictions *restrictions)
{
@@ -120,7 +142,6 @@ int __ref arch_add_memory(int nid, u64 start, u64 size,
start, start + size, rc);
return -EFAULT;
}
- flush_dcache_range(start, start + size);
return __add_pages(nid, start_pfn, nr_pages, restrictions);
}
@@ -137,7 +158,8 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size,
/* Remove htab bolted mappings for this section of memory */
start = (unsigned long)__va(start);
- flush_dcache_range(start, start + size);
+ flush_dcache_range_chunked(start, start + size, FLUSH_CHUNK_SIZE);
+
ret = remove_section_mapping(start, start + size);
WARN_ON_ONCE(ret);
@@ -201,10 +223,10 @@ static int __init mark_nonram_nosave(void)
* everything else. GFP_DMA32 page allocations automatically fall back to
* ZONE_DMA.
*
- * By using 31-bit unconditionally, we can exploit ARCH_ZONE_DMA_BITS to
- * inform the generic DMA mapping code. 32-bit only devices (if not handled
- * by an IOMMU anyway) will take a first dip into ZONE_NORMAL and get
- * otherwise served by ZONE_DMA.
+ * By using 31-bit unconditionally, we can exploit zone_dma_bits to inform the
+ * generic DMA mapping code. 32-bit only devices (if not handled by an IOMMU
+ * anyway) will take a first dip into ZONE_NORMAL and get otherwise served by
+ * ZONE_DMA.
*/
static unsigned long max_zone_pfns[MAX_NR_ZONES];
@@ -216,15 +238,13 @@ void __init paging_init(void)
unsigned long long total_ram = memblock_phys_mem_size();
phys_addr_t top_of_ram = memblock_end_of_DRAM();
-#ifdef CONFIG_PPC32
- unsigned long v = __fix_to_virt(__end_of_fixed_addresses - 1);
- unsigned long end = __fix_to_virt(FIX_HOLE);
+#ifdef CONFIG_HIGHMEM
+ unsigned long v = __fix_to_virt(FIX_KMAP_END);
+ unsigned long end = __fix_to_virt(FIX_KMAP_BEGIN);
for (; v < end; v += PAGE_SIZE)
map_kernel_page(v, 0, __pgprot(0)); /* XXX gross */
-#endif
-#ifdef CONFIG_HIGHMEM
map_kernel_page(PKMAP_BASE, 0, __pgprot(0)); /* XXX gross */
pkmap_page_table = virt_to_kpte(PKMAP_BASE);
@@ -237,9 +257,18 @@ void __init paging_init(void)
printk(KERN_DEBUG "Memory hole size: %ldMB\n",
(long int)((top_of_ram - total_ram) >> 20));
+ /*
+ * Allow 30-bit DMA for very limited Broadcom wifi chips on many
+ * powerbooks.
+ */
+ if (IS_ENABLED(CONFIG_PPC32))
+ zone_dma_bits = 30;
+ else
+ zone_dma_bits = 31;
+
#ifdef CONFIG_ZONE_DMA
max_zone_pfns[ZONE_DMA] = min(max_low_pfn,
- 1UL << (ARCH_ZONE_DMA_BITS - PAGE_SHIFT));
+ 1UL << (zone_dma_bits - PAGE_SHIFT));
#endif
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
#ifdef CONFIG_HIGHMEM
@@ -318,6 +347,120 @@ void free_initmem(void)
free_initmem_default(POISON_FREE_INITMEM);
}
+/**
+ * flush_coherent_icache() - if a CPU has a coherent icache, flush it
+ * @addr: The base address to use (can be any valid address, the whole cache will be flushed)
+ * Return true if the cache was flushed, false otherwise
+ */
+static inline bool flush_coherent_icache(unsigned long addr)
+{
+ /*
+ * For a snooping icache, we still need a dummy icbi to purge all the
+ * prefetched instructions from the ifetch buffers. We also need a sync
+ * before the icbi to order the the actual stores to memory that might
+ * have modified instructions with the icbi.
+ */
+ if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) {
+ mb(); /* sync */
+ icbi((void *)addr);
+ mb(); /* sync */
+ isync();
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * invalidate_icache_range() - Flush the icache by issuing icbi across an address range
+ * @start: the start address
+ * @stop: the stop address (exclusive)
+ */
+static void invalidate_icache_range(unsigned long start, unsigned long stop)
+{
+ unsigned long shift = l1_icache_shift();
+ unsigned long bytes = l1_icache_bytes();
+ char *addr = (char *)(start & ~(bytes - 1));
+ unsigned long size = stop - (unsigned long)addr + (bytes - 1);
+ unsigned long i;
+
+ for (i = 0; i < size >> shift; i++, addr += bytes)
+ icbi(addr);
+
+ mb(); /* sync */
+ isync();
+}
+
+/**
+ * flush_icache_range: Write any modified data cache blocks out to memory
+ * and invalidate the corresponding blocks in the instruction cache
+ *
+ * Generic code will call this after writing memory, before executing from it.
+ *
+ * @start: the start address
+ * @stop: the stop address (exclusive)
+ */
+void flush_icache_range(unsigned long start, unsigned long stop)
+{
+ if (flush_coherent_icache(start))
+ return;
+
+ clean_dcache_range(start, stop);
+
+ if (IS_ENABLED(CONFIG_44x)) {
+ /*
+ * Flash invalidate on 44x because we are passed kmapped
+ * addresses and this doesn't work for userspace pages due to
+ * the virtually tagged icache.
+ */
+ iccci((void *)start);
+ mb(); /* sync */
+ isync();
+ } else
+ invalidate_icache_range(start, stop);
+}
+EXPORT_SYMBOL(flush_icache_range);
+
+#if !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC64)
+/**
+ * flush_dcache_icache_phys() - Flush a page by it's physical address
+ * @physaddr: the physical address of the page
+ */
+static void flush_dcache_icache_phys(unsigned long physaddr)
+{
+ unsigned long bytes = l1_dcache_bytes();
+ unsigned long nb = PAGE_SIZE / bytes;
+ unsigned long addr = physaddr & PAGE_MASK;
+ unsigned long msr, msr0;
+ unsigned long loop1 = addr, loop2 = addr;
+
+ msr0 = mfmsr();
+ msr = msr0 & ~MSR_DR;
+ /*
+ * This must remain as ASM to prevent potential memory accesses
+ * while the data MMU is disabled
+ */
+ asm volatile(
+ " mtctr %2;\n"
+ " mtmsr %3;\n"
+ " isync;\n"
+ "0: dcbst 0, %0;\n"
+ " addi %0, %0, %4;\n"
+ " bdnz 0b;\n"
+ " sync;\n"
+ " mtctr %2;\n"
+ "1: icbi 0, %1;\n"
+ " addi %1, %1, %4;\n"
+ " bdnz 1b;\n"
+ " sync;\n"
+ " mtmsr %5;\n"
+ " isync;\n"
+ : "+&r" (loop1), "+&r" (loop2)
+ : "r" (nb), "r" (msr), "i" (bytes), "r" (msr0)
+ : "ctr", "memory");
+}
+#endif // !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC64)
+
/*
* This is called when a page has been modified by the kernel.
* It just marks the page as not i-cache clean. We do the i-cache
@@ -350,12 +493,46 @@ void flush_dcache_icache_page(struct page *page)
__flush_dcache_icache(start);
kunmap_atomic(start);
} else {
- __flush_dcache_icache_phys(page_to_pfn(page) << PAGE_SHIFT);
+ unsigned long addr = page_to_pfn(page) << PAGE_SHIFT;
+
+ if (flush_coherent_icache(addr))
+ return;
+ flush_dcache_icache_phys(addr);
}
#endif
}
EXPORT_SYMBOL(flush_dcache_icache_page);
+/**
+ * __flush_dcache_icache(): Flush a particular page from the data cache to RAM.
+ * Note: this is necessary because the instruction cache does *not*
+ * snoop from the data cache.
+ *
+ * @page: the address of the page to flush
+ */
+void __flush_dcache_icache(void *p)
+{
+ unsigned long addr = (unsigned long)p;
+
+ if (flush_coherent_icache(addr))
+ return;
+
+ clean_dcache_range(addr, addr + PAGE_SIZE);
+
+ /*
+ * We don't flush the icache on 44x. Those have a virtual icache and we
+ * don't have access to the virtual address here (it's not the page
+ * vaddr but where it's mapped in user space). The flushing of the
+ * icache on these is handled elsewhere, when a change in the address
+ * space occurs, before returning to user space.
+ */
+
+ if (cpu_has_feature(MMU_FTR_TYPE_44x))
+ return;
+
+ invalidate_icache_range(addr, addr + PAGE_SIZE);
+}
+
void clear_user_page(void *page, unsigned long vaddr, struct page *pg)
{
clear_page(page);
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index c750ac9ec713..8e99649c24fc 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -139,10 +139,21 @@ extern unsigned long calc_cam_sz(unsigned long ram, unsigned long virt,
extern void adjust_total_lowmem(void);
extern int switch_to_as1(void);
extern void restore_to_as0(int esel, int offset, void *dt_ptr, int bootcpu);
+void create_kaslr_tlb_entry(int entry, unsigned long virt, phys_addr_t phys);
+void reloc_kernel_entry(void *fdt, int addr);
+extern int is_second_reloc;
#endif
extern void loadcam_entry(unsigned int index);
extern void loadcam_multi(int first_idx, int num, int tmp_idx);
+#ifdef CONFIG_RANDOMIZE_BASE
+void kaslr_early_init(void *dt_ptr, phys_addr_t size);
+void kaslr_late_init(void);
+#else
+static inline void kaslr_early_init(void *dt_ptr, phys_addr_t size) {}
+static inline void kaslr_late_init(void) {}
+#endif
+
struct tlbcam {
u32 MAS0;
u32 MAS1;
diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c
index 4a06cb342da2..090af2d2d3e4 100644
--- a/arch/powerpc/mm/nohash/8xx.c
+++ b/arch/powerpc/mm/nohash/8xx.c
@@ -103,6 +103,19 @@ static void mmu_patch_addis(s32 *site, long simm)
patch_instruction_site(site, instr);
}
+void __init mmu_mapin_ram_chunk(unsigned long offset, unsigned long top, pgprot_t prot)
+{
+ unsigned long s = offset;
+ unsigned long v = PAGE_OFFSET + s;
+ phys_addr_t p = memstart_addr + s;
+
+ for (; s < top; s += PAGE_SIZE) {
+ map_kernel_page(v, p, prot);
+ v += PAGE_SIZE;
+ p += PAGE_SIZE;
+ }
+}
+
unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
{
unsigned long mapped;
@@ -115,10 +128,20 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT))
mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, 0);
} else {
+ unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M);
+
mapped = top & ~(LARGE_PAGE_SIZE_8M - 1);
if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT))
- mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top,
- _ALIGN(__pa(_einittext), 8 << 20));
+ mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, einittext8);
+
+ /*
+ * Populate page tables to:
+ * - have them appear in /sys/kernel/debug/kernel_page_tables
+ * - allow the BDI to find the pages when they are not PINNED
+ */
+ mmu_mapin_ram_chunk(0, einittext8, PAGE_KERNEL_X);
+ mmu_mapin_ram_chunk(einittext8, mapped, PAGE_KERNEL);
+ mmu_mapin_immr();
}
mmu_patch_cmp_limit(&patch__dtlbmiss_linmem_top, mapped);
@@ -144,18 +167,41 @@ void mmu_mark_initmem_nx(void)
if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) && CONFIG_ETEXT_SHIFT < 23)
mmu_patch_addis(&patch__itlbmiss_linmem_top8,
-((long)_etext & ~(LARGE_PAGE_SIZE_8M - 1)));
- if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT))
+ if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT)) {
+ unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M);
+ unsigned long etext8 = ALIGN(__pa(_etext), SZ_8M);
+ unsigned long etext = __pa(_etext);
+
mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, __pa(_etext));
+
+ /* Update page tables for PTDUMP and BDI */
+ mmu_mapin_ram_chunk(0, einittext8, __pgprot(0));
+ if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) {
+ mmu_mapin_ram_chunk(0, etext, PAGE_KERNEL_TEXT);
+ mmu_mapin_ram_chunk(etext, einittext8, PAGE_KERNEL);
+ } else {
+ mmu_mapin_ram_chunk(0, etext8, PAGE_KERNEL_TEXT);
+ mmu_mapin_ram_chunk(etext8, einittext8, PAGE_KERNEL);
+ }
+ }
}
#ifdef CONFIG_STRICT_KERNEL_RWX
void mmu_mark_rodata_ro(void)
{
+ unsigned long sinittext = __pa(_sinittext);
+ unsigned long etext = __pa(_etext);
+
if (CONFIG_DATA_SHIFT < 23)
mmu_patch_addis(&patch__dtlbmiss_romem_top8,
-__pa(((unsigned long)_sinittext) &
~(LARGE_PAGE_SIZE_8M - 1)));
mmu_patch_addis(&patch__dtlbmiss_romem_top, -__pa(_sinittext));
+
+ /* Update page tables for PTDUMP and BDI */
+ mmu_mapin_ram_chunk(0, sinittext, __pgprot(0));
+ mmu_mapin_ram_chunk(0, etext, PAGE_KERNEL_ROX);
+ mmu_mapin_ram_chunk(etext, sinittext, PAGE_KERNEL_RO);
}
#endif
diff --git a/arch/powerpc/mm/nohash/Makefile b/arch/powerpc/mm/nohash/Makefile
index 33b6f6f29d3f..0424f6ce5bd8 100644
--- a/arch/powerpc/mm/nohash/Makefile
+++ b/arch/powerpc/mm/nohash/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_40x) += 40x.o
obj-$(CONFIG_44x) += 44x.o
obj-$(CONFIG_PPC_8xx) += 8xx.o
obj-$(CONFIG_PPC_FSL_BOOK3E) += fsl_booke.o
+obj-$(CONFIG_RANDOMIZE_BASE) += kaslr_booke.o
ifdef CONFIG_HUGETLB_PAGE
obj-$(CONFIG_PPC_FSL_BOOK3E) += book3e_hugetlbpage.o
endif
diff --git a/arch/powerpc/mm/nohash/fsl_booke.c b/arch/powerpc/mm/nohash/fsl_booke.c
index 556e3cd52a35..b4eb06ceb189 100644
--- a/arch/powerpc/mm/nohash/fsl_booke.c
+++ b/arch/powerpc/mm/nohash/fsl_booke.c
@@ -263,11 +263,13 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base,
int __initdata is_second_reloc;
notrace void __init relocate_init(u64 dt_ptr, phys_addr_t start)
{
- unsigned long base = KERNELBASE;
+ unsigned long base = kernstart_virt_addr;
+ phys_addr_t size;
kernstart_addr = start;
if (is_second_reloc) {
virt_phys_offset = PAGE_OFFSET - memstart_addr;
+ kaslr_late_init();
return;
}
@@ -291,7 +293,7 @@ notrace void __init relocate_init(u64 dt_ptr, phys_addr_t start)
start &= ~0x3ffffff;
base &= ~0x3ffffff;
virt_phys_offset = base - start;
- early_get_first_memblock_info(__va(dt_ptr), NULL);
+ early_get_first_memblock_info(__va(dt_ptr), &size);
/*
* We now get the memstart_addr, then we should check if this
* address is the same as what the PAGE_OFFSET map to now. If
@@ -316,6 +318,8 @@ notrace void __init relocate_init(u64 dt_ptr, phys_addr_t start)
/* We should never reach here */
panic("Relocation error");
}
+
+ kaslr_early_init(__va(dt_ptr), size);
}
#endif
#endif
diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c b/arch/powerpc/mm/nohash/kaslr_booke.c
new file mode 100644
index 000000000000..4a75f2d9bf0e
--- /dev/null
+++ b/arch/powerpc/mm/nohash/kaslr_booke.c
@@ -0,0 +1,401 @@
+// SPDX-License-Identifier: GPL-2.0-only
+//
+// Copyright (C) 2019 Jason Yan <yanaijie@huawei.com>
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/stddef.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/memblock.h>
+#include <linux/libfdt.h>
+#include <linux/crash_core.h>
+#include <asm/pgalloc.h>
+#include <asm/prom.h>
+#include <asm/kdump.h>
+#include <mm/mmu_decl.h>
+#include <generated/compile.h>
+#include <generated/utsrelease.h>
+
+struct regions {
+ unsigned long pa_start;
+ unsigned long pa_end;
+ unsigned long kernel_size;
+ unsigned long dtb_start;
+ unsigned long dtb_end;
+ unsigned long initrd_start;
+ unsigned long initrd_end;
+ unsigned long crash_start;
+ unsigned long crash_end;
+ int reserved_mem;
+ int reserved_mem_addr_cells;
+ int reserved_mem_size_cells;
+};
+
+/* Simplified build-specific string for starting entropy. */
+static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
+ LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
+
+struct regions __initdata regions;
+
+static __init void kaslr_get_cmdline(void *fdt)
+{
+ int node = fdt_path_offset(fdt, "/chosen");
+
+ early_init_dt_scan_chosen(node, "chosen", 1, boot_command_line);
+}
+
+static unsigned long __init rotate_xor(unsigned long hash, const void *area,
+ size_t size)
+{
+ size_t i;
+ const unsigned long *ptr = area;
+
+ for (i = 0; i < size / sizeof(hash); i++) {
+ /* Rotate by odd number of bits and XOR. */
+ hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7);
+ hash ^= ptr[i];
+ }
+
+ return hash;
+}
+
+/* Attempt to create a simple starting entropy. This can make it defferent for
+ * every build but it is still not enough. Stronger entropy should
+ * be added to make it change for every boot.
+ */
+static unsigned long __init get_boot_seed(void *fdt)
+{
+ unsigned long hash = 0;
+
+ hash = rotate_xor(hash, build_str, sizeof(build_str));
+ hash = rotate_xor(hash, fdt, fdt_totalsize(fdt));
+
+ return hash;
+}
+
+static __init u64 get_kaslr_seed(void *fdt)
+{
+ int node, len;
+ fdt64_t *prop;
+ u64 ret;
+
+ node = fdt_path_offset(fdt, "/chosen");
+ if (node < 0)
+ return 0;
+
+ prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len);
+ if (!prop || len != sizeof(u64))
+ return 0;
+
+ ret = fdt64_to_cpu(*prop);
+ *prop = 0;
+ return ret;
+}
+
+static __init bool regions_overlap(u32 s1, u32 e1, u32 s2, u32 e2)
+{
+ return e1 >= s2 && e2 >= s1;
+}
+
+static __init bool overlaps_reserved_region(const void *fdt, u32 start,
+ u32 end)
+{
+ int subnode, len, i;
+ u64 base, size;
+
+ /* check for overlap with /memreserve/ entries */
+ for (i = 0; i < fdt_num_mem_rsv(fdt); i++) {
+ if (fdt_get_mem_rsv(fdt, i, &base, &size) < 0)
+ continue;
+ if (regions_overlap(start, end, base, base + size))
+ return true;
+ }
+
+ if (regions.reserved_mem < 0)
+ return false;
+
+ /* check for overlap with static reservations in /reserved-memory */
+ for (subnode = fdt_first_subnode(fdt, regions.reserved_mem);
+ subnode >= 0;
+ subnode = fdt_next_subnode(fdt, subnode)) {
+ const fdt32_t *reg;
+ u64 rsv_end;
+
+ len = 0;
+ reg = fdt_getprop(fdt, subnode, "reg", &len);
+ while (len >= (regions.reserved_mem_addr_cells +
+ regions.reserved_mem_size_cells)) {
+ base = fdt32_to_cpu(reg[0]);
+ if (regions.reserved_mem_addr_cells == 2)
+ base = (base << 32) | fdt32_to_cpu(reg[1]);
+
+ reg += regions.reserved_mem_addr_cells;
+ len -= 4 * regions.reserved_mem_addr_cells;
+
+ size = fdt32_to_cpu(reg[0]);
+ if (regions.reserved_mem_size_cells == 2)
+ size = (size << 32) | fdt32_to_cpu(reg[1]);
+
+ reg += regions.reserved_mem_size_cells;
+ len -= 4 * regions.reserved_mem_size_cells;
+
+ if (base >= regions.pa_end)
+ continue;
+
+ rsv_end = min(base + size, (u64)U32_MAX);
+
+ if (regions_overlap(start, end, base, rsv_end))
+ return true;
+ }
+ }
+ return false;
+}
+
+static __init bool overlaps_region(const void *fdt, u32 start,
+ u32 end)
+{
+ if (regions_overlap(start, end, __pa(_stext), __pa(_end)))
+ return true;
+
+ if (regions_overlap(start, end, regions.dtb_start,
+ regions.dtb_end))
+ return true;
+
+ if (regions_overlap(start, end, regions.initrd_start,
+ regions.initrd_end))
+ return true;
+
+ if (regions_overlap(start, end, regions.crash_start,
+ regions.crash_end))
+ return true;
+
+ return overlaps_reserved_region(fdt, start, end);
+}
+
+static void __init get_crash_kernel(void *fdt, unsigned long size)
+{
+#ifdef CONFIG_CRASH_CORE
+ unsigned long long crash_size, crash_base;
+ int ret;
+
+ ret = parse_crashkernel(boot_command_line, size, &crash_size,
+ &crash_base);
+ if (ret != 0 || crash_size == 0)
+ return;
+ if (crash_base == 0)
+ crash_base = KDUMP_KERNELBASE;
+
+ regions.crash_start = (unsigned long)crash_base;
+ regions.crash_end = (unsigned long)(crash_base + crash_size);
+
+ pr_debug("crash_base=0x%llx crash_size=0x%llx\n", crash_base, crash_size);
+#endif
+}
+
+static void __init get_initrd_range(void *fdt)
+{
+ u64 start, end;
+ int node, len;
+ const __be32 *prop;
+
+ node = fdt_path_offset(fdt, "/chosen");
+ if (node < 0)
+ return;
+
+ prop = fdt_getprop(fdt, node, "linux,initrd-start", &len);
+ if (!prop)
+ return;
+ start = of_read_number(prop, len / 4);
+
+ prop = fdt_getprop(fdt, node, "linux,initrd-end", &len);
+ if (!prop)
+ return;
+ end = of_read_number(prop, len / 4);
+
+ regions.initrd_start = (unsigned long)start;
+ regions.initrd_end = (unsigned long)end;
+
+ pr_debug("initrd_start=0x%llx initrd_end=0x%llx\n", start, end);
+}
+
+static __init unsigned long get_usable_address(const void *fdt,
+ unsigned long start,
+ unsigned long offset)
+{
+ unsigned long pa;
+ unsigned long pa_end;
+
+ for (pa = offset; (long)pa > (long)start; pa -= SZ_16K) {
+ pa_end = pa + regions.kernel_size;
+ if (overlaps_region(fdt, pa, pa_end))
+ continue;
+
+ return pa;
+ }
+ return 0;
+}
+
+static __init void get_cell_sizes(const void *fdt, int node, int *addr_cells,
+ int *size_cells)
+{
+ const int *prop;
+ int len;
+
+ /*
+ * Retrieve the #address-cells and #size-cells properties
+ * from the 'node', or use the default if not provided.
+ */
+ *addr_cells = *size_cells = 1;
+
+ prop = fdt_getprop(fdt, node, "#address-cells", &len);
+ if (len == 4)
+ *addr_cells = fdt32_to_cpu(*prop);
+ prop = fdt_getprop(fdt, node, "#size-cells", &len);
+ if (len == 4)
+ *size_cells = fdt32_to_cpu(*prop);
+}
+
+static unsigned long __init kaslr_legal_offset(void *dt_ptr, unsigned long index,
+ unsigned long offset)
+{
+ unsigned long koffset = 0;
+ unsigned long start;
+
+ while ((long)index >= 0) {
+ offset = memstart_addr + index * SZ_64M + offset;
+ start = memstart_addr + index * SZ_64M;
+ koffset = get_usable_address(dt_ptr, start, offset);
+ if (koffset)
+ break;
+ index--;
+ }
+
+ if (koffset != 0)
+ koffset -= memstart_addr;
+
+ return koffset;
+}
+
+static inline __init bool kaslr_disabled(void)
+{
+ return strstr(boot_command_line, "nokaslr") != NULL;
+}
+
+static unsigned long __init kaslr_choose_location(void *dt_ptr, phys_addr_t size,
+ unsigned long kernel_sz)
+{
+ unsigned long offset, random;
+ unsigned long ram, linear_sz;
+ u64 seed;
+ unsigned long index;
+
+ kaslr_get_cmdline(dt_ptr);
+ if (kaslr_disabled())
+ return 0;
+
+ random = get_boot_seed(dt_ptr);
+
+ seed = get_tb() << 32;
+ seed ^= get_tb();
+ random = rotate_xor(random, &seed, sizeof(seed));
+
+ /*
+ * Retrieve (and wipe) the seed from the FDT
+ */
+ seed = get_kaslr_seed(dt_ptr);
+ if (seed)
+ random = rotate_xor(random, &seed, sizeof(seed));
+ else
+ pr_warn("KASLR: No safe seed for randomizing the kernel base.\n");
+
+ ram = min_t(phys_addr_t, __max_low_memory, size);
+ ram = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM, true);
+ linear_sz = min_t(unsigned long, ram, SZ_512M);
+
+ /* If the linear size is smaller than 64M, do not randmize */
+ if (linear_sz < SZ_64M)
+ return 0;
+
+ /* check for a reserved-memory node and record its cell sizes */
+ regions.reserved_mem = fdt_path_offset(dt_ptr, "/reserved-memory");
+ if (regions.reserved_mem >= 0)
+ get_cell_sizes(dt_ptr, regions.reserved_mem,
+ &regions.reserved_mem_addr_cells,
+ &regions.reserved_mem_size_cells);
+
+ regions.pa_start = memstart_addr;
+ regions.pa_end = memstart_addr + linear_sz;
+ regions.dtb_start = __pa(dt_ptr);
+ regions.dtb_end = __pa(dt_ptr) + fdt_totalsize(dt_ptr);
+ regions.kernel_size = kernel_sz;
+
+ get_initrd_range(dt_ptr);
+ get_crash_kernel(dt_ptr, ram);
+
+ /*
+ * Decide which 64M we want to start
+ * Only use the low 8 bits of the random seed
+ */
+ index = random & 0xFF;
+ index %= linear_sz / SZ_64M;
+
+ /* Decide offset inside 64M */
+ offset = random % (SZ_64M - kernel_sz);
+ offset = round_down(offset, SZ_16K);
+
+ return kaslr_legal_offset(dt_ptr, index, offset);
+}
+
+/*
+ * To see if we need to relocate the kernel to a random offset
+ * void *dt_ptr - address of the device tree
+ * phys_addr_t size - size of the first memory block
+ */
+notrace void __init kaslr_early_init(void *dt_ptr, phys_addr_t size)
+{
+ unsigned long tlb_virt;
+ phys_addr_t tlb_phys;
+ unsigned long offset;
+ unsigned long kernel_sz;
+
+ kernel_sz = (unsigned long)_end - (unsigned long)_stext;
+
+ offset = kaslr_choose_location(dt_ptr, size, kernel_sz);
+ if (offset == 0)
+ return;
+
+ kernstart_virt_addr += offset;
+ kernstart_addr += offset;
+
+ is_second_reloc = 1;
+
+ if (offset >= SZ_64M) {
+ tlb_virt = round_down(kernstart_virt_addr, SZ_64M);
+ tlb_phys = round_down(kernstart_addr, SZ_64M);
+
+ /* Create kernel map to relocate in */
+ create_kaslr_tlb_entry(1, tlb_virt, tlb_phys);
+ }
+
+ /* Copy the kernel to it's new location and run */
+ memcpy((void *)kernstart_virt_addr, (void *)_stext, kernel_sz);
+ flush_icache_range(kernstart_virt_addr, kernstart_virt_addr + kernel_sz);
+
+ reloc_kernel_entry(dt_ptr, kernstart_virt_addr);
+}
+
+void __init kaslr_late_init(void)
+{
+ /* If randomized, clear the original kernel */
+ if (kernstart_virt_addr != KERNELBASE) {
+ unsigned long kernel_sz;
+
+ kernel_sz = (unsigned long)_end - kernstart_virt_addr;
+ memzero_explicit((void *)KERNELBASE, kernel_sz);
+ }
+}
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 8ec5dfb65b2e..73b84166d06a 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -117,10 +117,7 @@ void __init mapin_ram(void)
if (base >= top)
continue;
base = mmu_mapin_ram(base, top);
- if (IS_ENABLED(CONFIG_BDI_SWITCH))
- __mapin_ram_chunk(reg->base, top);
- else
- __mapin_ram_chunk(base, top);
+ __mapin_ram_chunk(base, top);
}
}