summaryrefslogtreecommitdiff
path: root/arch/arm/boot/compressed/head.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm/boot/compressed/head.S')
-rw-r--r--arch/arm/boot/compressed/head.S643
1 files changed, 440 insertions, 203 deletions
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index 75189f13cf54..9f406e9c0ea6 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -1,17 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* linux/arch/arm/boot/compressed/head.S
*
* Copyright (C) 1996-2002 Russell King
* Copyright (C) 2004 Hyok S. Choi (MPU support)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
+#include <asm/v7m.h>
+
+#include "efi-header.S"
+
+#ifdef __ARMEB__
+#define OF_DT_MAGIC 0xd00dfeed
+#else
+#define OF_DT_MAGIC 0xedfe0dd0
+#endif
+
+ AR_CLASS( .arch armv7-a )
+ M_CLASS( .arch armv7-m )
- .arch armv7-a
/*
* Debugging stuff
*
@@ -24,21 +32,21 @@
#if defined(CONFIG_DEBUG_ICEDCC)
#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) || defined(CONFIG_CPU_V7)
- .macro loadsp, rb, tmp
+ .macro loadsp, rb, tmp1, tmp2
.endm
- .macro writeb, ch, rb
+ .macro writeb, ch, rb, tmp
mcr p14, 0, \ch, c0, c5, 0
.endm
#elif defined(CONFIG_CPU_XSCALE)
- .macro loadsp, rb, tmp
+ .macro loadsp, rb, tmp1, tmp2
.endm
- .macro writeb, ch, rb
+ .macro writeb, ch, rb, tmp
mcr p14, 0, \ch, c8, c0, 0
.endm
#else
- .macro loadsp, rb, tmp
+ .macro loadsp, rb, tmp1, tmp2
.endm
- .macro writeb, ch, rb
+ .macro writeb, ch, rb, tmp
mcr p14, 0, \ch, c1, c0, 0
.endm
#endif
@@ -47,27 +55,23 @@
#include CONFIG_DEBUG_LL_INCLUDE
- .macro writeb, ch, rb
+ .macro writeb, ch, rb, tmp
+#ifdef CONFIG_DEBUG_UART_FLOW_CONTROL
+ waituartcts \tmp, \rb
+#endif
+ waituarttxrdy \tmp, \rb
senduart \ch, \rb
+ busyuart \tmp, \rb
.endm
#if defined(CONFIG_ARCH_SA1100)
- .macro loadsp, rb, tmp
+ .macro loadsp, rb, tmp1, tmp2
mov \rb, #0x80000000 @ physical base address
-#ifdef CONFIG_DEBUG_LL_SER3
- add \rb, \rb, #0x00050000 @ Ser3
-#else
add \rb, \rb, #0x00010000 @ Ser1
-#endif
- .endm
-#elif defined(CONFIG_ARCH_S3C24XX)
- .macro loadsp, rb, tmp
- mov \rb, #0x50000000
- add \rb, \rb, #0x4000 * CONFIG_S3C_LOWLEVEL_UART_PORT
.endm
#else
- .macro loadsp, rb, tmp
- addruart \rb, \tmp
+ .macro loadsp, rb, tmp1, tmp2
+ addruart \rb, \tmp1, \tmp2
.endm
#endif
#endif
@@ -84,64 +88,148 @@
bl phex
.endm
- .macro debug_reloc_start
+ /*
+ * Debug kernel copy by printing the memory addresses involved
+ */
+ .macro dbgkc, begin, end, cbegin, cend
#ifdef DEBUG
- kputc #'\n'
- kphex r6, 8 /* processor id */
- kputc #':'
- kphex r7, 8 /* architecture id */
-#ifdef CONFIG_CPU_CP15
- kputc #':'
- mrc p15, 0, r0, c1, c0
- kphex r0, 8 /* control reg */
-#endif
- kputc #'\n'
- kphex r5, 8 /* decompressed kernel start */
+ kputc #'C'
+ kputc #':'
+ kputc #'0'
+ kputc #'x'
+ kphex \begin, 8 /* Start of compressed kernel */
+ kputc #'-'
+ kputc #'0'
+ kputc #'x'
+ kphex \end, 8 /* End of compressed kernel */
kputc #'-'
- kphex r9, 8 /* decompressed kernel end */
kputc #'>'
- kphex r4, 8 /* kernel execution address */
+ kputc #'0'
+ kputc #'x'
+ kphex \cbegin, 8 /* Start of kernel copy */
+ kputc #'-'
+ kputc #'0'
+ kputc #'x'
+ kphex \cend, 8 /* End of kernel copy */
kputc #'\n'
#endif
.endm
- .macro debug_reloc_end
+ /*
+ * Debug print of the final appended DTB location
+ */
+ .macro dbgadtb, begin, size
#ifdef DEBUG
- kphex r5, 8 /* end of kernel */
+ kputc #'D'
+ kputc #'T'
+ kputc #'B'
+ kputc #':'
+ kputc #'0'
+ kputc #'x'
+ kphex \begin, 8 /* Start of appended DTB */
+ kputc #' '
+ kputc #'('
+ kputc #'0'
+ kputc #'x'
+ kphex \size, 8 /* Size of appended DTB */
+ kputc #')'
kputc #'\n'
- mov r0, r4
- bl memdump /* dump 256 bytes at start of kernel */
#endif
.endm
- .section ".start", #alloc, #execinstr
+ .macro enable_cp15_barriers, reg
+ mrc p15, 0, \reg, c1, c0, 0 @ read SCTLR
+ tst \reg, #(1 << 5) @ CP15BEN bit set?
+ bne .L_\@
+ orr \reg, \reg, #(1 << 5) @ CP15 barrier instructions
+ mcr p15, 0, \reg, c1, c0, 0 @ write SCTLR
+ ARM( .inst 0xf57ff06f @ v7+ isb )
+ THUMB( isb )
+.L_\@:
+ .endm
+
+ /*
+ * The kernel build system appends the size of the
+ * decompressed kernel at the end of the compressed data
+ * in little-endian form.
+ */
+ .macro get_inflated_image_size, res:req, tmp1:req, tmp2:req
+ adr \res, .Linflated_image_size_offset
+ ldr \tmp1, [\res]
+ add \tmp1, \tmp1, \res @ address of inflated image size
+
+ ldrb \res, [\tmp1] @ get_unaligned_le32
+ ldrb \tmp2, [\tmp1, #1]
+ orr \res, \res, \tmp2, lsl #8
+ ldrb \tmp2, [\tmp1, #2]
+ ldrb \tmp1, [\tmp1, #3]
+ orr \res, \res, \tmp2, lsl #16
+ orr \res, \res, \tmp1, lsl #24
+ .endm
+
+ .macro be32tocpu, val, tmp
+#ifndef __ARMEB__
+ /* convert to little endian */
+ rev_l \val, \tmp
+#endif
+ .endm
+
+ .section ".start", "ax"
/*
* sort out different calling conventions
*/
.align
- .arm @ Always enter in ARM state
+ /*
+ * Always enter in ARM state for CPUs that support the ARM ISA.
+ * As of today (2014) that's exactly the members of the A and R
+ * classes.
+ */
+ AR_CLASS( .arm )
start:
.type start,#function
- .rept 7
- mov r0, r0
+ /*
+ * These 7 nops along with the 1 nop immediately below for
+ * !THUMB2 form 8 nops that make the compressed kernel bootable
+ * on legacy ARM systems that were assuming the kernel in a.out
+ * binary format. The boot loaders on these systems would
+ * jump 32 bytes into the image to skip the a.out header.
+ * with these 8 nops filling exactly 32 bytes, things still
+ * work as expected on these legacy systems. Thumb2 mode keeps
+ * 7 of the nops as it turns out that some boot loaders
+ * were patching the initial instructions of the kernel, i.e
+ * had started to exploit this "patch area".
+ */
+ __initial_nops
+ .rept 5
+ __nop
.endr
- ARM( mov r0, r0 )
- ARM( b 1f )
- THUMB( adr r12, BSYM(1f) )
- THUMB( bx r12 )
-
- .word 0x016f2818 @ Magic numbers to help the loader
- .word start @ absolute load/run zImage address
- .word _edata @ zImage end address
- THUMB( .thumb )
+#ifndef CONFIG_THUMB2_KERNEL
+ __nop
+#else
+ AR_CLASS( sub pc, pc, #3 ) @ A/R: switch to Thumb2 mode
+ M_CLASS( nop.w ) @ M: already in Thumb2 mode
+ .thumb
+#endif
+ W(b) 1f
+
+ .word _magic_sig @ Magic numbers to help the loader
+ .word _magic_start @ absolute load/run zImage address
+ .word _magic_end @ zImage end address
+ .word 0x04030201 @ endianness flag
+ .word 0x45454545 @ another magic number to indicate
+ .word _magic_table @ additional data table
+
+ __EFI_HEADER
1:
- mrs r9, cpsr
+ ARM_BE8( setend be ) @ go BE8 if compiled for BE8
+ AR_CLASS( mrs r9, cpsr )
#ifdef CONFIG_ARM_VIRT_EXT
bl __hyp_stub_install @ get into SVC mode, reversibly
#endif
mov r7, r1 @ save architecture ID
mov r8, r2 @ save atags pointer
+#ifndef CONFIG_CPU_V7M
/*
* Booting from Angel - need to enter SVC mode and disable
* FIQs/IRQs (numeric definitions from angel arm.h source).
@@ -157,6 +245,7 @@ not_angel:
safe_svcmode_maskall r0
msr spsr_cxsf, r9 @ Save the CPU boot mode in
@ SPSR
+#endif
/*
* Note that some cache flushing and other stuff may
* be needed here - is there an Angel SWI call for this?
@@ -170,57 +259,86 @@ not_angel:
.text
#ifdef CONFIG_AUTO_ZRELADDR
- @ determine final kernel image address
- mov r4, pc
- and r4, r4, #0xf8000000
- add r4, r4, #TEXT_OFFSET
+ /*
+ * Find the start of physical memory. As we are executing
+ * without the MMU on, we are in the physical address space.
+ * We just need to get rid of any offset by aligning the
+ * address.
+ *
+ * This alignment is a balance between the requirements of
+ * different platforms - we have chosen 128MB to allow
+ * platforms which align the start of their physical memory
+ * to 128MB to use this feature, while allowing the zImage
+ * to be placed within the first 128MB of memory on other
+ * platforms. Increasing the alignment means we place
+ * stricter alignment requirements on the start of physical
+ * memory, but relaxing it means that we break people who
+ * are already placing their zImage in (eg) the top 64MB
+ * of this range.
+ */
+ mov r0, pc
+ and r0, r0, #0xf8000000
+#ifdef CONFIG_USE_OF
+ adr r1, LC1
+#ifdef CONFIG_ARM_APPENDED_DTB
+ /*
+ * Look for an appended DTB. If found, we cannot use it to
+ * validate the calculated start of physical memory, as its
+ * memory nodes may need to be augmented by ATAGS stored at
+ * an offset from the same start of physical memory.
+ */
+ ldr r2, [r1, #4] @ get &_edata
+ add r2, r2, r1 @ relocate it
+ ldr r2, [r2] @ get DTB signature
+ ldr r3, =OF_DT_MAGIC
+ cmp r2, r3 @ do we have a DTB there?
+ beq 1f @ if yes, skip validation
+#endif /* CONFIG_ARM_APPENDED_DTB */
+
+ /*
+ * Make sure we have some stack before calling C code.
+ * No GOT fixup has occurred yet, but none of the code we're
+ * about to call uses any global variables.
+ */
+ ldr sp, [r1] @ get stack location
+ add sp, sp, r1 @ apply relocation
+
+ /* Validate calculated start against passed DTB */
+ mov r1, r8
+ bl fdt_check_mem_start
+1:
+#endif /* CONFIG_USE_OF */
+ /* Determine final kernel image address. */
+ add r4, r0, #TEXT_OFFSET
#else
ldr r4, =zreladdr
#endif
/*
* Set up a page table only if it won't overwrite ourself.
- * That means r4 < pc && r4 - 16k page directory > &_end.
+ * That means r4 < pc || r4 - 16k page directory > &_end.
* Given that r4 > &_end is most unfrequent, we add a rough
* additional 1MB of room for a possible appended DTB.
*/
mov r0, pc
cmp r0, r4
- ldrcc r0, LC0+32
+ ldrcc r0, .Lheadroom
addcc r0, r0, pc
cmpcc r4, r0
orrcc r4, r4, #1 @ remember we skipped cache_on
blcs cache_on
-restart: adr r0, LC0
- ldmia r0, {r1, r2, r3, r6, r10, r11, r12}
- ldr sp, [r0, #28]
-
- /*
- * We might be running at a different address. We need
- * to fix up various pointers.
- */
- sub r0, r0, r1 @ calculate the delta offset
- add r6, r6, r0 @ _edata
- add r10, r10, r0 @ inflated kernel size location
+restart: adr r0, LC1
+ ldr sp, [r0]
+ ldr r6, [r0, #4]
+ add sp, sp, r0
+ add r6, r6, r0
- /*
- * The kernel build system appends the size of the
- * decompressed kernel at the end of the compressed data
- * in little-endian form.
- */
- ldrb r9, [r10, #0]
- ldrb lr, [r10, #1]
- orr r9, r9, lr, lsl #8
- ldrb lr, [r10, #2]
- ldrb r10, [r10, #3]
- orr r9, r9, lr, lsl #16
- orr r9, r9, r10, lsl #24
+ get_inflated_image_size r9, r10, lr
#ifndef CONFIG_ZBOOT_ROM
/* malloc space is above the relocated stack (64k max) */
- add sp, sp, r0
- add r10, sp, #0x10000
+ add r10, sp, #MALLOC_SIZE
#else
/*
* With ZBOOT_ROM the bss/stack is non relocatable,
@@ -233,9 +351,6 @@ restart: adr r0, LC0
mov r5, #0 @ init dtb size to 0
#ifdef CONFIG_ARM_APPENDED_DTB
/*
- * r0 = delta
- * r2 = BSS start
- * r3 = BSS end
* r4 = final kernel address (possibly with LSB set)
* r5 = appended dtb size (still unknown)
* r6 = _edata
@@ -243,8 +358,6 @@ restart: adr r0, LC0
* r8 = atags/device tree pointer
* r9 = size of decompressed image
* r10 = end of this image, including bss/stack/malloc space if non XIP
- * r11 = GOT start
- * r12 = GOT end
* sp = stack pointer
*
* if there are device trees (dtb) appended to zImage, advance r10 so that the
@@ -252,11 +365,7 @@ restart: adr r0, LC0
*/
ldr lr, [r6, #0]
-#ifndef __ARMEB__
- ldr r1, =0xedfe0dd0 @ sig is 0xd00dfeed big endian
-#else
- ldr r1, =0xd00dfeed
-#endif
+ ldr r1, =OF_DT_MAGIC
cmp lr, r1
bne dtb_check_done @ not found
@@ -265,16 +374,31 @@ restart: adr r0, LC0
* OK... Let's do some funky business here.
* If we do have a DTB appended to zImage, and we do have
* an ATAG list around, we want the later to be translated
- * and folded into the former here. To be on the safe side,
- * let's temporarily move the stack away into the malloc
- * area. No GOT fixup has occurred yet, but none of the
- * code we're about to call uses any global variable.
+ * and folded into the former here. No GOT fixup has occurred
+ * yet, but none of the code we're about to call uses any
+ * global variable.
*/
- add sp, sp, #0x10000
- stmfd sp!, {r0-r3, ip, lr}
+
+ /* Get the initial DTB size */
+ ldr r5, [r6, #4]
+ be32tocpu r5, r1
+ dbgadtb r6, r5
+ /* 50% DTB growth should be good enough */
+ add r5, r5, r5, lsr #1
+ /* preserve 64-bit alignment */
+ add r5, r5, #7
+ bic r5, r5, #7
+ /* clamp to 32KB min and 1MB max */
+ cmp r5, #(1 << 15)
+ movlo r5, #(1 << 15)
+ cmp r5, #(1 << 20)
+ movhi r5, #(1 << 20)
+ /* temporarily relocate the stack past the DTB work space */
+ add sp, sp, r5
+
mov r0, r8
mov r1, r6
- sub r2, sp, r6
+ mov r2, r5
bl atags_to_fdt
/*
@@ -287,11 +411,10 @@ restart: adr r0, LC0
bic r0, r0, #1
add r0, r0, #0x100
mov r1, r6
- sub r2, sp, r6
+ mov r2, r5
bleq atags_to_fdt
- ldmfd sp!, {r0-r3, ip, lr}
- sub sp, sp, #0x10000
+ sub sp, sp, r5
#endif
mov r8, r6 @ use the appended device tree
@@ -308,15 +431,9 @@ restart: adr r0, LC0
subs r1, r5, r1
addhi r9, r9, r1
- /* Get the dtb's size */
+ /* Get the current DTB size */
ldr r5, [r6, #4]
-#ifndef __ARMEB__
- /* convert r5 (dtb size) to little endian */
- eor r1, r5, r5, ror #16
- bic r1, r1, #0x00ff0000
- mov r5, r5, ror #8
- eor r5, r5, r1, lsr #8
-#endif
+ be32tocpu r5, r1
/* preserve 64-bit alignment */
add r5, r5, #7
@@ -373,7 +490,12 @@ dtb_check_done:
cmp r0, #HYP_MODE
bne 1f
- bl __hyp_get_vectors
+ /*
+ * Compute the address of the hyp vectors after relocation.
+ * Call __hyp_set_vectors with the new address so that we
+ * can HVC again after the copy.
+ */
+ adr_l r0, __hyp_stub_vectors
sub r0, r0, r5
add r0, r0, r10
bl __hyp_set_vectors
@@ -386,6 +508,20 @@ dtb_check_done:
add r6, r9, r5
add r9, r9, r10
+#ifdef DEBUG
+ sub r10, r6, r5
+ sub r10, r9, r10
+ /*
+ * We are about to copy the kernel to a new memory area.
+ * The boundaries of the new memory area can be found in
+ * r10 and r9, whilst r5 and r6 contain the boundaries
+ * of the memory we are going to copy.
+ * Calling dbgkc will help with the printing of this
+ * information.
+ */
+ dbgkc r5, r6, r10, r9
+#endif
+
1: ldmdb r6!, {r0 - r3, r10 - r12, lr}
cmp r6, r5
stmdb r9!, {r0 - r3, r10 - r12, lr}
@@ -394,19 +530,19 @@ dtb_check_done:
/* Preserve offset to relocated code. */
sub r6, r9, r6
-#ifndef CONFIG_ZBOOT_ROM
- /* cache_clean_flush may use the stack, so relocate it */
- add sp, sp, r6
-#endif
-
- tst r4, #1
- bleq cache_clean_flush
+ mov r0, r9 @ start of relocated zImage
+ add r1, sp, r6 @ end of relocated zImage
+ bl cache_clean_flush
- adr r0, BSYM(restart)
+ badr r0, restart
add r0, r0, r6
mov pc, r0
wont_overwrite:
+ adr r0, LC0
+ ldmia r0, {r1, r2, r3, r11, r12}
+ sub r0, r0, r1 @ calculate the delta offset
+
/*
* If delta is zero, we are running at the address we were linked at.
* r0 = delta
@@ -493,13 +629,16 @@ not_relocated: mov r0, #0
*/
mov r0, r4
mov r1, sp @ malloc space above stack
- add r2, sp, #0x10000 @ 64k max
+ add r2, sp, #MALLOC_SIZE @ 64k max
mov r3, r7
bl decompress_kernel
+
+ get_inflated_image_size r1, r2, r3
+
+ mov r0, r4 @ start of inflated image
+ add r1, r1, r0 @ end of inflated image
bl cache_clean_flush
bl cache_off
- mov r1, r7 @ restore architecture number
- mov r2, r8 @ restore atags pointer
#ifdef CONFIG_ARM_VIRT_EXT
mrs r0, spsr @ Get saved CPU boot mode
@@ -507,17 +646,11 @@ not_relocated: mov r0, #0
cmp r0, #HYP_MODE @ if not booted in HYP mode...
bne __enter_kernel @ boot kernel directly
- adr r12, .L__hyp_reentry_vectors_offset
- ldr r0, [r12]
- add r0, r0, r12
-
+ adr_l r0, __hyp_reentry_vectors
bl __hyp_set_vectors
__HVC(0) @ otherwise bounce to hyp mode
b . @ should never be reached
-
- .align 2
-.L__hyp_reentry_vectors_offset: .long __hyp_reentry_vectors - .
#else
b __enter_kernel
#endif
@@ -527,14 +660,21 @@ not_relocated: mov r0, #0
LC0: .word LC0 @ r1
.word __bss_start @ r2
.word _end @ r3
- .word _edata @ r6
- .word input_data_end - 4 @ r10 (inflated size location)
.word _got_start @ r11
.word _got_end @ ip
- .word .L_user_stack_end @ sp
- .word _end - restart + 16384 + 1024*1024
.size LC0, . - LC0
+ .type LC1, #object
+LC1: .word .L_user_stack_end - LC1 @ sp
+ .word _edata - LC1 @ r6
+ .size LC1, . - LC1
+
+.Lheadroom:
+ .word _end - restart + 16384 + 1024*1024
+
+.Linflated_image_size_offset:
+ .long (input_data_end - 4) - .
+
#ifdef CONFIG_ARCH_RPC
.globl params
params: ldr r0, =0x10000100 @ params_phys for RPC
@@ -544,6 +684,24 @@ params: ldr r0, =0x10000100 @ params_phys for RPC
#endif
/*
+ * dcache_line_size - get the minimum D-cache line size from the CTR register
+ * on ARMv7.
+ */
+ .macro dcache_line_size, reg, tmp
+#ifdef CONFIG_CPU_V7M
+ movw \tmp, #:lower16:BASEADDR_V7M_SCB + V7M_SCB_CTR
+ movt \tmp, #:upper16:BASEADDR_V7M_SCB + V7M_SCB_CTR
+ ldr \tmp, [\tmp]
+#else
+ mrc p15, 0, \tmp, c0, c0, 1 @ read ctr
+#endif
+ lsr \tmp, \tmp, #16
+ and \tmp, \tmp, #0xf @ cache line size encoding
+ mov \reg, #4 @ bytes per word
+ mov \reg, \reg, lsl \tmp @ actual cache line size
+ .endm
+
+/*
* Turn on the cache. We need to setup some page tables so that we
* can have both the I and D caches on.
*
@@ -699,9 +857,7 @@ __armv4_mmu_cache_on:
mrc p15, 0, r0, c1, c0, 0 @ read control reg
orr r0, r0, #0x5000 @ I-cache enable, RR cache replacement
orr r0, r0, #0x0030
-#ifdef CONFIG_CPU_ENDIAN_BE8
- orr r0, r0, #1 << 25 @ big-endian page tables
-#endif
+ ARM_BE8( orr r0, r0, #1 << 25 ) @ big-endian page tables
bl __common_mmu_cache_on
mov r0, #0
mcr p15, 0, r0, c8, c7, 0 @ flush I,D TLBs
@@ -709,6 +865,7 @@ __armv4_mmu_cache_on:
mov pc, r12
__armv7_mmu_cache_on:
+ enable_cp15_barriers r11
mov r12, lr
#ifdef CONFIG_MMU
mrc p15, 0, r11, c0, c1, 4 @ read ID_MMFR0
@@ -728,14 +885,12 @@ __armv7_mmu_cache_on:
orr r0, r0, #1 << 22 @ U (v6 unaligned access model)
@ (needed for ARM1176)
#ifdef CONFIG_MMU
-#ifdef CONFIG_CPU_ENDIAN_BE8
- orr r0, r0, #1 << 25 @ big-endian page tables
-#endif
+ ARM_BE8( orr r0, r0, #1 << 25 ) @ big-endian page tables
mrcne p15, 0, r6, c2, c0, 2 @ read ttb control reg
orrne r0, r0, #1 @ MMU enabled
movne r1, #0xfffffffd @ domain 0 = client
bic r6, r6, #1 << 31 @ 32-bit translation system
- bic r6, r6, #3 << 0 @ use only ttbr0
+ bic r6, r6, #(7 << 0) | (1 << 4) @ use only ttbr0
mcrne p15, 0, r3, c2, c0, 0 @ load page table pointer
mcrne p15, 0, r1, c3, c0, 0 @ load domain access control
mcrne p15, 0, r6, c2, c0, 2 @ load ttb control
@@ -796,6 +951,16 @@ __common_mmu_cache_on:
call_cache_fn: adr r12, proc_types
#ifdef CONFIG_CPU_CP15
mrc p15, 0, r9, c0, c0 @ get processor ID
+#elif defined(CONFIG_CPU_V7M)
+ /*
+ * On v7-M the processor id is located in the V7M_SCB_CPUID
+ * register, but as cache handling is IMPLEMENTATION DEFINED on
+ * v7-M (if existant at all) we just return early here.
+ * If V7M_SCB_CPUID were used the cpu ID functions (i.e.
+ * __armv7_mmu_cache_{on,off,flush}) would be selected which
+ * use cp15 registers that are not implemented on v7-M.
+ */
+ bx lr
#else
ldr r9, =CONFIG_PROCESSOR_ID
#endif
@@ -1023,13 +1188,11 @@ __armv4_mmu_cache_off:
__armv7_mmu_cache_off:
mrc p15, 0, r0, c1, c0
#ifdef CONFIG_MMU
- bic r0, r0, #0x000d
+ bic r0, r0, #0x0005
#else
- bic r0, r0, #0x000c
+ bic r0, r0, #0x0004
#endif
mcr p15, 0, r0, c1, c0 @ turn MMU and cache off
- mov r12, lr
- bl __armv7_mmu_cache_flush
mov r0, #0
#ifdef CONFIG_MMU
mcr p15, 0, r0, c8, c7, 0 @ invalidate whole TLB
@@ -1037,11 +1200,14 @@ __armv7_mmu_cache_off:
mcr p15, 0, r0, c7, c5, 6 @ invalidate BTC
mcr p15, 0, r0, c7, c10, 4 @ DSB
mcr p15, 0, r0, c7, c5, 4 @ ISB
- mov pc, r12
+ mov pc, lr
/*
* Clean and flush the cache to maintain consistency.
*
+ * On entry,
+ * r0 = start address
+ * r1 = end address (exclusive)
* On exit,
* r1, r2, r3, r9, r10, r11, r12 corrupted
* This routine must preserve:
@@ -1050,9 +1216,12 @@ __armv7_mmu_cache_off:
.align 5
cache_clean_flush:
mov r3, #16
+ mov r11, r1
b call_cache_fn
__armv4_mpu_cache_flush:
+ tst r4, #1
+ movne pc, lr
mov r2, #1
mov r3, #0
mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache
@@ -1070,6 +1239,8 @@ __armv4_mpu_cache_flush:
mov pc, lr
__fa526_cache_flush:
+ tst r4, #1
+ movne pc, lr
mov r1, #0
mcr p15, 0, r1, c7, c14, 0 @ clean and invalidate D cache
mcr p15, 0, r1, c7, c5, 0 @ flush I cache
@@ -1078,13 +1249,17 @@ __fa526_cache_flush:
__armv6_mmu_cache_flush:
mov r1, #0
- mcr p15, 0, r1, c7, c14, 0 @ clean+invalidate D
+ tst r4, #1
+ mcreq p15, 0, r1, c7, c14, 0 @ clean+invalidate D
mcr p15, 0, r1, c7, c5, 0 @ invalidate I+BTB
- mcr p15, 0, r1, c7, c15, 0 @ clean+invalidate unified
+ mcreq p15, 0, r1, c7, c15, 0 @ clean+invalidate unified
mcr p15, 0, r1, c7, c10, 4 @ drain WB
mov pc, lr
__armv7_mmu_cache_flush:
+ enable_cp15_barriers r10
+ tst r4, #1
+ bne iflush
mrc p15, 0, r10, c0, c1, 5 @ read ID_MMFR1
tst r10, #0xf << 16 @ hierarchical cache (ARMv7)
mov r10, #0
@@ -1092,51 +1267,16 @@ __armv7_mmu_cache_flush:
mcr p15, 0, r10, c7, c14, 0 @ clean+invalidate D
b iflush
hierarchical:
- mcr p15, 0, r10, c7, c10, 5 @ DMB
- stmfd sp!, {r0-r7, r9-r11}
- mrc p15, 1, r0, c0, c0, 1 @ read clidr
- ands r3, r0, #0x7000000 @ extract loc from clidr
- mov r3, r3, lsr #23 @ left align loc bit field
- beq finished @ if loc is 0, then no need to clean
- mov r10, #0 @ start clean at cache level 0
-loop1:
- add r2, r10, r10, lsr #1 @ work out 3x current cache level
- mov r1, r0, lsr r2 @ extract cache type bits from clidr
- and r1, r1, #7 @ mask of the bits for current cache only
- cmp r1, #2 @ see what cache we have at this level
- blt skip @ skip if no cache, or just i-cache
- mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr
- mcr p15, 0, r10, c7, c5, 4 @ isb to sych the new cssr&csidr
- mrc p15, 1, r1, c0, c0, 0 @ read the new csidr
- and r2, r1, #7 @ extract the length of the cache lines
- add r2, r2, #4 @ add 4 (line length offset)
- ldr r4, =0x3ff
- ands r4, r4, r1, lsr #3 @ find maximum number on the way size
- clz r5, r4 @ find bit position of way size increment
- ldr r7, =0x7fff
- ands r7, r7, r1, lsr #13 @ extract max number of the index size
-loop2:
- mov r9, r4 @ create working copy of max way size
-loop3:
- ARM( orr r11, r10, r9, lsl r5 ) @ factor way and cache number into r11
- ARM( orr r11, r11, r7, lsl r2 ) @ factor index number into r11
- THUMB( lsl r6, r9, r5 )
- THUMB( orr r11, r10, r6 ) @ factor way and cache number into r11
- THUMB( lsl r6, r7, r2 )
- THUMB( orr r11, r11, r6 ) @ factor index number into r11
- mcr p15, 0, r11, c7, c14, 2 @ clean & invalidate by set/way
- subs r9, r9, #1 @ decrement the way
- bge loop3
- subs r7, r7, #1 @ decrement the index
- bge loop2
-skip:
- add r10, r10, #2 @ increment cache number
- cmp r3, r10
- bgt loop1
-finished:
- ldmfd sp!, {r0-r7, r9-r11}
- mov r10, #0 @ swith back to cache level 0
- mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr
+ dcache_line_size r1, r2 @ r1 := dcache min line size
+ sub r2, r1, #1 @ r2 := line size mask
+ bic r0, r0, r2 @ round down start to line size
+ sub r11, r11, #1 @ end address is exclusive
+ bic r11, r11, r2 @ round down end to line size
+0: cmp r0, r11 @ finished?
+ bgt iflush
+ mcr p15, 0, r0, c7, c14, 1 @ Dcache clean/invalidate by VA
+ add r0, r0, r1
+ b 0b
iflush:
mcr p15, 0, r10, c7, c10, 4 @ DSB
mcr p15, 0, r10, c7, c5, 0 @ invalidate I+BTB
@@ -1145,13 +1285,17 @@ iflush:
mov pc, lr
__armv5tej_mmu_cache_flush:
-1: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate D cache
+ tst r4, #1
+ movne pc, lr
+1: mrc p15, 0, APSR_nzcv, c7, c14, 3 @ test,clean,invalidate D cache
bne 1b
mcr p15, 0, r0, c7, c5, 0 @ flush I cache
mcr p15, 0, r0, c7, c10, 4 @ drain WB
mov pc, lr
__armv4_mmu_cache_flush:
+ tst r4, #1
+ movne pc, lr
mov r2, #64*1024 @ default: 32K dcache size (*2)
mov r11, #32 @ default: 32 byte line size
mrc p15, 0, r3, c0, c0, 1 @ read cache type
@@ -1185,6 +1329,8 @@ no_cache_id:
__armv3_mmu_cache_flush:
__armv3_mpu_cache_flush:
+ tst r4, #1
+ movne pc, lr
mov r1, #0
mcr p15, 0, r1, c7, c0, 0 @ invalidate whole cache v3
mov pc, lr
@@ -1215,11 +1361,11 @@ phex: adr r3, phexbuf
b 1b
@ puts corrupts {r0, r1, r2, r3}
-puts: loadsp r3, r1
+puts: loadsp r3, r2, r1
1: ldrb r2, [r0], #1
teq r2, #0
moveq pc, lr
-2: writeb r2, r3
+2: writeb r2, r3, r1
mov r1, #0x00020000
3: subs r1, r1, #1
bne 3b
@@ -1232,8 +1378,8 @@ puts: loadsp r3, r1
@ putc corrupts {r0, r1, r2, r3}
putc:
mov r2, r0
+ loadsp r3, r1, r0
mov r0, #0
- loadsp r3, r1
b 2b
@ memdump corrupts {r0, r1, r2, r3, r10, r11, r12, lr}
@@ -1273,7 +1419,11 @@ memdump: mov r12, r0
__hyp_reentry_vectors:
W(b) . @ reset
W(b) . @ undef
+#ifdef CONFIG_EFI_STUB
+ W(b) __enter_kernel_from_hyp @ hvc from HYP
+#else
W(b) . @ svc
+#endif
W(b) . @ pabort
W(b) . @ dabort
W(b) __enter_kernel @ hyp
@@ -1283,11 +1433,98 @@ __hyp_reentry_vectors:
__enter_kernel:
mov r0, #0 @ must be 0
- ARM( mov pc, r4 ) @ call kernel
- THUMB( bx r4 ) @ entry point is always ARM
+ mov r1, r7 @ restore architecture number
+ mov r2, r8 @ restore atags pointer
+ ARM( mov pc, r4 ) @ call kernel
+ M_CLASS( add r4, r4, #1 ) @ enter in Thumb mode for M class
+ THUMB( bx r4 ) @ entry point is always ARM for A/R classes
reloc_code_end:
+#ifdef CONFIG_EFI_STUB
+__enter_kernel_from_hyp:
+ mrc p15, 4, r0, c1, c0, 0 @ read HSCTLR
+ bic r0, r0, #0x5 @ disable MMU and caches
+ mcr p15, 4, r0, c1, c0, 0 @ write HSCTLR
+ isb
+ b __enter_kernel
+
+ENTRY(efi_enter_kernel)
+ mov r4, r0 @ preserve image base
+ mov r8, r1 @ preserve DT pointer
+
+ adr_l r0, call_cache_fn
+ adr r1, 0f @ clean the region of code we
+ bl cache_clean_flush @ may run with the MMU off
+
+#ifdef CONFIG_ARM_VIRT_EXT
+ @
+ @ The EFI spec does not support booting on ARM in HYP mode,
+ @ since it mandates that the MMU and caches are on, with all
+ @ 32-bit addressable DRAM mapped 1:1 using short descriptors.
+ @
+ @ While the EDK2 reference implementation adheres to this,
+ @ U-Boot might decide to enter the EFI stub in HYP mode
+ @ anyway, with the MMU and caches either on or off.
+ @
+ mrs r0, cpsr @ get the current mode
+ msr spsr_cxsf, r0 @ record boot mode
+ and r0, r0, #MODE_MASK @ are we running in HYP mode?
+ cmp r0, #HYP_MODE
+ bne .Lefi_svc
+
+ mrc p15, 4, r1, c1, c0, 0 @ read HSCTLR
+ tst r1, #0x1 @ MMU enabled at HYP?
+ beq 1f
+
+ @
+ @ When running in HYP mode with the caches on, we're better
+ @ off just carrying on using the cached 1:1 mapping that the
+ @ firmware provided. Set up the HYP vectors so HVC instructions
+ @ issued from HYP mode take us to the correct handler code. We
+ @ will disable the MMU before jumping to the kernel proper.
+ @
+ ARM( bic r1, r1, #(1 << 30) ) @ clear HSCTLR.TE
+ THUMB( orr r1, r1, #(1 << 30) ) @ set HSCTLR.TE
+ mcr p15, 4, r1, c1, c0, 0
+ adr r0, __hyp_reentry_vectors
+ mcr p15, 4, r0, c12, c0, 0 @ set HYP vector base (HVBAR)
+ isb
+ b .Lefi_hyp
+
+ @
+ @ When running in HYP mode with the caches off, we need to drop
+ @ into SVC mode now, and let the decompressor set up its cached
+ @ 1:1 mapping as usual.
+ @
+1: mov r9, r4 @ preserve image base
+ bl __hyp_stub_install @ install HYP stub vectors
+ safe_svcmode_maskall r1 @ drop to SVC mode
+ msr spsr_cxsf, r0 @ record boot mode
+ orr r4, r9, #1 @ restore image base and set LSB
+ b .Lefi_hyp
+.Lefi_svc:
+#endif
+ mrc p15, 0, r0, c1, c0, 0 @ read SCTLR
+ tst r0, #0x1 @ MMU enabled?
+ orreq r4, r4, #1 @ set LSB if not
+
+.Lefi_hyp:
+ mov r0, r8 @ DT start
+ add r1, r8, r2 @ DT end
+ bl cache_clean_flush
+
+ adr r0, 0f @ switch to our stack
+ ldr sp, [r0]
+ add sp, sp, r0
+
+ mov r5, #0 @ appended DTB size
+ mov r7, #0xFFFFFFFF @ machine ID
+ b wont_overwrite
+ENDPROC(efi_enter_kernel)
+0: .long .L_user_stack_end - .
+#endif
+
.align
.section ".stack", "aw", %nobits
.L_user_stack: .space 4096