From 965278dcb8ab0b1f666cc47937933c4be4aea48d Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 13 May 2015 15:07:54 +0100 Subject: ARM: 8356/1: mm: handle non-pmd-aligned end of RAM At boot time we round the memblock limit down to section size in an attempt to ensure that we will have mapped this RAM with section mappings prior to allocating from it. When mapping RAM we iterate over PMD-sized chunks, creating these section mappings. Section mappings are only created when the end of a chunk is aligned to section size. Unfortunately, with classic page tables (where PMD_SIZE is 2 * SECTION_SIZE) this means that if a chunk is between 1M and 2M in size the first 1M will not be mapped despite having been accounted for in the memblock limit. This has been observed to result in page tables being allocated from unmapped memory, causing boot-time hangs. This patch modifies the memblock limit rounding to always round down to PMD_SIZE instead of SECTION_SIZE. For classic MMU this means that we will round the memblock limit down to a 2M boundary, matching the limits on section mappings, and preventing allocations from unmapped memory. For LPAE there should be no change as PMD_SIZE == SECTION_SIZE. Signed-off-by: Mark Rutland Reported-by: Stefan Agner Tested-by: Stefan Agner Acked-by: Laura Abbott Tested-by: Hans de Goede Cc: Catalin Marinas Cc: Steve Capper Cc: stable@vger.kernel.org Signed-off-by: Russell King --- arch/arm/mm/mmu.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'arch/arm/mm/mmu.c') diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 4e6ef896c619..7186382672b5 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1112,22 +1112,22 @@ void __init sanity_check_meminfo(void) } /* - * Find the first non-section-aligned page, and point + * Find the first non-pmd-aligned page, and point * memblock_limit at it. This relies on rounding the - * limit down to be section-aligned, which happens at - * the end of this function. + * limit down to be pmd-aligned, which happens at the + * end of this function. * * With this algorithm, the start or end of almost any - * bank can be non-section-aligned. The only exception - * is that the start of the bank 0 must be section- + * bank can be non-pmd-aligned. The only exception is + * that the start of the bank 0 must be section- * aligned, since otherwise memory would need to be * allocated when mapping the start of bank 0, which * occurs before any free memory is mapped. */ if (!memblock_limit) { - if (!IS_ALIGNED(block_start, SECTION_SIZE)) + if (!IS_ALIGNED(block_start, PMD_SIZE)) memblock_limit = block_start; - else if (!IS_ALIGNED(block_end, SECTION_SIZE)) + else if (!IS_ALIGNED(block_end, PMD_SIZE)) memblock_limit = arm_lowmem_limit; } @@ -1137,12 +1137,12 @@ void __init sanity_check_meminfo(void) high_memory = __va(arm_lowmem_limit - 1) + 1; /* - * Round the memblock limit down to a section size. This + * Round the memblock limit down to a pmd size. This * helps to ensure that we will allocate memory from the - * last full section, which should be mapped. + * last full pmd, which should be mapped. */ if (memblock_limit) - memblock_limit = round_down(memblock_limit, SECTION_SIZE); + memblock_limit = round_down(memblock_limit, PMD_SIZE); if (!memblock_limit) memblock_limit = arm_lowmem_limit; -- cgit From c8ca2b4b29282605698968d15667939b23e208e2 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 4 Apr 2015 09:53:38 +0100 Subject: ARM: keystone2: move update of the phys-to-virt constants into generic code Make the init_meminfo function return the offset to be applied to the phys-to-virt translation constants. This allows us to move the update into generic code, along with the requirements for this update. This avoids platforms having to know the details of the phys-to-virt translation support. Acked-by: Santosh Shilimkar Tested-by: Murali Karicheri Signed-off-by: Russell King --- arch/arm/mm/mmu.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) (limited to 'arch/arm/mm/mmu.c') diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 4e6ef896c619..38ccbdf6c322 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1387,7 +1387,7 @@ static void __init map_lowmem(void) } } -#ifdef CONFIG_ARM_LPAE +#if defined(CONFIG_ARM_LPAE) && defined(CONFIG_ARM_PATCH_PHYS_VIRT) /* * early_paging_init() recreates boot time page table setup, allowing machines * to switch over to a high (>4G) address space on LPAE systems @@ -1397,6 +1397,7 @@ void __init early_paging_init(const struct machine_desc *mdesc, { pmdval_t pmdprot = procinfo->__cpu_mm_mmu_flags; unsigned long map_start, map_end; + long long offset; pgd_t *pgd0, *pgdk; pud_t *pud0, *pudk, *pud_start; pmd_t *pmd0, *pmdk; @@ -1419,7 +1420,13 @@ void __init early_paging_init(const struct machine_desc *mdesc, pudk = pud_offset(pgdk, map_start); pmdk = pmd_offset(pudk, map_start); - mdesc->init_meminfo(); + offset = mdesc->init_meminfo(); + if (offset == 0) + return; + + /* Re-set the phys pfn offset, and the pv offset */ + __pv_offset += offset; + __pv_phys_pfn_offset += PFN_DOWN(offset); /* Run the patch stub to update the constants */ fixup_pv_table(&__pv_table_begin, @@ -1502,8 +1509,19 @@ void __init early_paging_init(const struct machine_desc *mdesc, void __init early_paging_init(const struct machine_desc *mdesc, struct proc_info_list *procinfo) { - if (mdesc->init_meminfo) - mdesc->init_meminfo(); + long long offset; + + if (!mdesc->init_meminfo) + return; + + offset = mdesc->init_meminfo(); + if (offset == 0) + return; + + pr_crit("Physical address space modification is only to support Keystone2.\n"); + pr_crit("Please enable ARM_LPAE and ARM_PATCH_PHYS_VIRT support to use this\n"); + pr_crit("feature. Your kernel may crash now, have a good day.\n"); + add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); } #endif -- cgit From 39b74fe82f734ac0bec726733805bb7d46c95933 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 4 Apr 2015 10:25:28 +0100 Subject: ARM: keystone2: move address space switch printk into generic code There is no point platform code doing this, let's move it into the generic code so it doesn't get duplicated. Acked-by: Santosh Shilimkar Tested-by: Murali Karicheri Signed-off-by: Russell King --- arch/arm/mm/mmu.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/arm/mm/mmu.c') diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 38ccbdf6c322..91262d28a4c9 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1424,6 +1424,9 @@ void __init early_paging_init(const struct machine_desc *mdesc, if (offset == 0) return; + pr_info("Switching physical address space to 0x%08llx\n", + (u64)PHYS_OFFSET + offset); + /* Re-set the phys pfn offset, and the pv offset */ __pv_offset += offset; __pv_phys_pfn_offset += PFN_DOWN(offset); -- cgit From c0b759d87eab301af0380f5459057656178e78cf Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 4 Apr 2015 10:01:10 +0100 Subject: ARM: keystone2: rename init_meminfo to pv_fixup The init_meminfo() method is not about initialising meminfo - it's about fixing up the physical to virtual translation so that we use a different physical address space, possibly above the 4GB physical address space. Therefore, the name "init_meminfo()" is confusing. Rename it to pv_fixup() instead. Acked-by: Santosh Shilimkar Tested-by: Murali Karicheri Signed-off-by: Russell King --- arch/arm/mm/mmu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/arm/mm/mmu.c') diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 91262d28a4c9..0e5ed87221dd 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1404,7 +1404,7 @@ void __init early_paging_init(const struct machine_desc *mdesc, phys_addr_t phys; int i; - if (!(mdesc->init_meminfo)) + if (!mdesc->pv_fixup) return; /* remap kernel code and data */ @@ -1420,7 +1420,7 @@ void __init early_paging_init(const struct machine_desc *mdesc, pudk = pud_offset(pgdk, map_start); pmdk = pmd_offset(pudk, map_start); - offset = mdesc->init_meminfo(); + offset = mdesc->pv_fixup(); if (offset == 0) return; @@ -1514,10 +1514,10 @@ void __init early_paging_init(const struct machine_desc *mdesc, { long long offset; - if (!mdesc->init_meminfo) + if (!mdesc->pv_fixup) return; - offset = mdesc->init_meminfo(); + offset = mdesc->pv_fixup(); if (offset == 0) return; -- cgit From d8dc7fbd53eeb329a1dda5a19df7058b9c1c413e Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 4 Apr 2015 16:58:38 +0100 Subject: ARM: re-implement physical address space switching Re-implement the physical address space switching to be architecturally compliant. This involves flushing the caches, disabling the MMU, and only then updating the page tables. Once that is complete, the system can be brought back up again. Since we disable the MMU, we need to do the update in assembly code. Luckily, the entries which need updating are fairly trivial, and are all setup by the early assembly code. We can merely adjust each entry by the delta required. Not only does this fix the code to be architecturally compliant, but it fixes a couple of bugs too: 1. The original code would only ever update the first L2 entry covering a fraction of the kernel; the remainder were left untouched. 2. The L2 entries covering the DTB blob were likewise untouched. This solution fixes up all entries. Tested-by: Murali Karicheri Signed-off-by: Russell King --- arch/arm/mm/mmu.c | 124 ++++++++++++++++++------------------------------------ 1 file changed, 40 insertions(+), 84 deletions(-) (limited to 'arch/arm/mm/mmu.c') diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 0e5ed87221dd..a5d3180a6886 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1387,7 +1387,11 @@ static void __init map_lowmem(void) } } -#if defined(CONFIG_ARM_LPAE) && defined(CONFIG_ARM_PATCH_PHYS_VIRT) +#ifdef CONFIG_ARM_PV_FIXUP +extern unsigned long __atags_pointer; +typedef void pgtables_remap(long long offset, unsigned long pgd, void *bdata); +pgtables_remap lpae_pgtables_remap_asm; + /* * early_paging_init() recreates boot time page table setup, allowing machines * to switch over to a high (>4G) address space on LPAE systems @@ -1395,35 +1399,30 @@ static void __init map_lowmem(void) void __init early_paging_init(const struct machine_desc *mdesc, struct proc_info_list *procinfo) { - pmdval_t pmdprot = procinfo->__cpu_mm_mmu_flags; - unsigned long map_start, map_end; + pgtables_remap *lpae_pgtables_remap; + unsigned long pa_pgd; + unsigned int cr, ttbcr; long long offset; - pgd_t *pgd0, *pgdk; - pud_t *pud0, *pudk, *pud_start; - pmd_t *pmd0, *pmdk; - phys_addr_t phys; - int i; + void *boot_data; if (!mdesc->pv_fixup) return; - /* remap kernel code and data */ - map_start = init_mm.start_code & PMD_MASK; - map_end = ALIGN(init_mm.brk, PMD_SIZE); - - /* get a handle on things... */ - pgd0 = pgd_offset_k(0); - pud_start = pud0 = pud_offset(pgd0, 0); - pmd0 = pmd_offset(pud0, 0); - - pgdk = pgd_offset_k(map_start); - pudk = pud_offset(pgdk, map_start); - pmdk = pmd_offset(pudk, map_start); - offset = mdesc->pv_fixup(); if (offset == 0) return; + /* + * Get the address of the remap function in the 1:1 identity + * mapping setup by the early page table assembly code. We + * must get this prior to the pv update. The following barrier + * ensures that this is complete before we fixup any P:V offsets. + */ + lpae_pgtables_remap = (pgtables_remap *)(unsigned long)__pa(lpae_pgtables_remap_asm); + pa_pgd = __pa(swapper_pg_dir); + boot_data = __va(__atags_pointer); + barrier(); + pr_info("Switching physical address space to 0x%08llx\n", (u64)PHYS_OFFSET + offset); @@ -1436,75 +1435,32 @@ void __init early_paging_init(const struct machine_desc *mdesc, (&__pv_table_end - &__pv_table_begin) << 2); /* - * Cache cleaning operations for self-modifying code - * We should clean the entries by MVA but running a - * for loop over every pv_table entry pointer would - * just complicate the code. - */ - flush_cache_louis(); - dsb(ishst); - isb(); - - /* - * FIXME: This code is not architecturally compliant: we modify - * the mappings in-place, indeed while they are in use by this - * very same code. This may lead to unpredictable behaviour of - * the CPU. - * - * Even modifying the mappings in a separate page table does - * not resolve this. - * - * The architecture strongly recommends that when a mapping is - * changed, that it is changed by first going via an invalid - * mapping and back to the new mapping. This is to ensure that - * no TLB conflicts (caused by the TLB having more than one TLB - * entry match a translation) can occur. However, doing that - * here will result in unmapping the code we are running. - */ - pr_warn("WARNING: unsafe modification of in-place page tables - tainting kernel\n"); - add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); - - /* - * Remap level 1 table. This changes the physical addresses - * used to refer to the level 2 page tables to the high - * physical address alias, leaving everything else the same. - */ - for (i = 0; i < PTRS_PER_PGD; pud0++, i++) { - set_pud(pud0, - __pud(__pa(pmd0) | PMD_TYPE_TABLE | L_PGD_SWAPPER)); - pmd0 += PTRS_PER_PMD; - } - - /* - * Remap the level 2 table, pointing the mappings at the high - * physical address alias of these pages. - */ - phys = __pa(map_start); - do { - *pmdk++ = __pmd(phys | pmdprot); - phys += PMD_SIZE; - } while (phys < map_end); - - /* - * Ensure that the above updates are flushed out of the cache. - * This is not strictly correct; on a system where the caches - * are coherent with each other, but the MMU page table walks - * may not be coherent, flush_cache_all() may be a no-op, and - * this will fail. + * We changing not only the virtual to physical mapping, but also + * the physical addresses used to access memory. We need to flush + * all levels of cache in the system with caching disabled to + * ensure that all data is written back, and nothing is prefetched + * into the caches. We also need to prevent the TLB walkers + * allocating into the caches too. Note that this is ARMv7 LPAE + * specific. */ + cr = get_cr(); + set_cr(cr & ~(CR_I | CR_C)); + asm("mrc p15, 0, %0, c2, c0, 2" : "=r" (ttbcr)); + asm volatile("mcr p15, 0, %0, c2, c0, 2" + : : "r" (ttbcr & ~(3 << 8 | 3 << 10))); flush_cache_all(); /* - * Re-write the TTBR values to point them at the high physical - * alias of the page tables. We expect __va() will work on - * cpu_get_pgd(), which returns the value of TTBR0. + * Fixup the page tables - this must be in the idmap region as + * we need to disable the MMU to do this safely, and hence it + * needs to be assembly. It's fairly simple, as we're using the + * temporary tables setup by the initial assembly code. */ - cpu_switch_mm(pgd0, &init_mm); - cpu_set_ttbr(1, __pa(pgd0) + TTBR1_OFFSET); + lpae_pgtables_remap(offset, pa_pgd, boot_data); - /* Finally flush any stale TLB values. */ - local_flush_bp_all(); - local_flush_tlb_all(); + /* Re-enable the caches and cacheable TLB walks */ + asm volatile("mcr p15, 0, %0, c2, c0, 2" : : "r" (ttbcr)); + set_cr(cr); } #else -- cgit From 1221ed10f2a56ecdd8ff75f436f52aca5ba0f1d3 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 4 Apr 2015 17:25:20 +0100 Subject: ARM: cleanup early_paging_init() calling Eliminate the needless nommu version of this function, and get rid of the proc_info_list structure argument - we no longer need this in order to fix up the page table entries. Acked-by: Santosh Shilimkar Tested-by: Murali Karicheri Signed-off-by: Russell King --- arch/arm/mm/mmu.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/arm/mm/mmu.c') diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index a5d3180a6886..5aefa9533684 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1396,8 +1396,7 @@ pgtables_remap lpae_pgtables_remap_asm; * early_paging_init() recreates boot time page table setup, allowing machines * to switch over to a high (>4G) address space on LPAE systems */ -void __init early_paging_init(const struct machine_desc *mdesc, - struct proc_info_list *procinfo) +void __init early_paging_init(const struct machine_desc *mdesc) { pgtables_remap *lpae_pgtables_remap; unsigned long pa_pgd; @@ -1465,8 +1464,7 @@ void __init early_paging_init(const struct machine_desc *mdesc, #else -void __init early_paging_init(const struct machine_desc *mdesc, - struct proc_info_list *procinfo) +void __init early_paging_init(const struct machine_desc *mdesc) { long long offset; -- cgit