summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRussell King <rmk+kernel@armlinux.org.uk>2020-11-09 10:19:25 +0000
committerRussell King <rmk+kernel@armlinux.org.uk>2020-11-09 10:19:25 +0000
commitae73ad0527075a39122e5374504e1137c2e2d14f (patch)
tree9f2e11d7eabc4edb61c76f5d1ed1453c864b43a0
parent3650b228f83adda7e5ee532e2b90429c03f7b9ec (diff)
parent9443076e4330a14ae2c6114307668b98a8293b77 (diff)
Merge tag 'arm-p2v-for-v5.11' of git://git.kernel.org/pub/scm/linux/kernel/git/ardb/linux into devel-stable
Implement the necessary changes in the ARM assembler boot code to permit the relative alignment of the physical and the virtual addresses of the kernel to be as little as 2 MiB, as opposed to the minimum of 16 MiB we support today. Series was posted here, and reviewed by Nicolas Pitre and Linus Walleij: https://lore.kernel.org/linux-arm-kernel/20200921154117.757-1-ardb@kernel.org/
-rw-r--r--arch/arm/Kconfig2
-rw-r--r--arch/arm/include/asm/assembler.h84
-rw-r--r--arch/arm/include/asm/elf.h5
-rw-r--r--arch/arm/include/asm/memory.h57
-rw-r--r--arch/arm/kernel/Makefile1
-rw-r--r--arch/arm/kernel/head.S142
-rw-r--r--arch/arm/kernel/module.c20
-rw-r--r--arch/arm/kernel/phys2virt.S238
8 files changed, 387 insertions, 162 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index fe2f17eb2b50..d8d62b8e72e4 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -243,7 +243,7 @@ config ARM_PATCH_PHYS_VIRT
kernel in system memory.
This can only be used with non-XIP MMU kernels where the base
- of physical memory is at a 16MB boundary.
+ of physical memory is at a 2 MiB boundary.
Only disable this option if you know that you do not require
this feature (eg, building a kernel for a single machine) and
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index feac2c8b86f2..72627c5fb3b2 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -494,4 +494,88 @@ THUMB( orr \reg , \reg , #PSR_T_BIT )
#define _ASM_NOKPROBE(entry)
#endif
+ .macro __adldst_l, op, reg, sym, tmp, c
+ .if __LINUX_ARM_ARCH__ < 7
+ ldr\c \tmp, .La\@
+ .subsection 1
+ .align 2
+.La\@: .long \sym - .Lpc\@
+ .previous
+ .else
+ .ifnb \c
+ THUMB( ittt \c )
+ .endif
+ movw\c \tmp, #:lower16:\sym - .Lpc\@
+ movt\c \tmp, #:upper16:\sym - .Lpc\@
+ .endif
+
+#ifndef CONFIG_THUMB2_KERNEL
+ .set .Lpc\@, . + 8 // PC bias
+ .ifc \op, add
+ add\c \reg, \tmp, pc
+ .else
+ \op\c \reg, [pc, \tmp]
+ .endif
+#else
+.Lb\@: add\c \tmp, \tmp, pc
+ /*
+ * In Thumb-2 builds, the PC bias depends on whether we are currently
+ * emitting into a .arm or a .thumb section. The size of the add opcode
+ * above will be 2 bytes when emitting in Thumb mode and 4 bytes when
+ * emitting in ARM mode, so let's use this to account for the bias.
+ */
+ .set .Lpc\@, . + (. - .Lb\@)
+
+ .ifnc \op, add
+ \op\c \reg, [\tmp]
+ .endif
+#endif
+ .endm
+
+ /*
+ * mov_l - move a constant value or [relocated] address into a register
+ */
+ .macro mov_l, dst:req, imm:req
+ .if __LINUX_ARM_ARCH__ < 7
+ ldr \dst, =\imm
+ .else
+ movw \dst, #:lower16:\imm
+ movt \dst, #:upper16:\imm
+ .endif
+ .endm
+
+ /*
+ * adr_l - adr pseudo-op with unlimited range
+ *
+ * @dst: destination register
+ * @sym: name of the symbol
+ * @cond: conditional opcode suffix
+ */
+ .macro adr_l, dst:req, sym:req, cond
+ __adldst_l add, \dst, \sym, \dst, \cond
+ .endm
+
+ /*
+ * ldr_l - ldr <literal> pseudo-op with unlimited range
+ *
+ * @dst: destination register
+ * @sym: name of the symbol
+ * @cond: conditional opcode suffix
+ */
+ .macro ldr_l, dst:req, sym:req, cond
+ __adldst_l ldr, \dst, \sym, \dst, \cond
+ .endm
+
+ /*
+ * str_l - str <literal> pseudo-op with unlimited range
+ *
+ * @src: source register
+ * @sym: name of the symbol
+ * @tmp: mandatory scratch register
+ * @cond: conditional opcode suffix
+ */
+ .macro str_l, src:req, sym:req, tmp:req, cond
+ __adldst_l str, \src, \sym, \tmp, \cond
+ .endm
+
#endif /* __ASM_ASSEMBLER_H__ */
diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h
index b078d992414b..0ac62a54b73c 100644
--- a/arch/arm/include/asm/elf.h
+++ b/arch/arm/include/asm/elf.h
@@ -51,6 +51,7 @@ typedef struct user_fp elf_fpregset_t;
#define R_ARM_NONE 0
#define R_ARM_PC24 1
#define R_ARM_ABS32 2
+#define R_ARM_REL32 3
#define R_ARM_CALL 28
#define R_ARM_JUMP24 29
#define R_ARM_TARGET1 38
@@ -58,11 +59,15 @@ typedef struct user_fp elf_fpregset_t;
#define R_ARM_PREL31 42
#define R_ARM_MOVW_ABS_NC 43
#define R_ARM_MOVT_ABS 44
+#define R_ARM_MOVW_PREL_NC 45
+#define R_ARM_MOVT_PREL 46
#define R_ARM_THM_CALL 10
#define R_ARM_THM_JUMP24 30
#define R_ARM_THM_MOVW_ABS_NC 47
#define R_ARM_THM_MOVT_ABS 48
+#define R_ARM_THM_MOVW_PREL_NC 49
+#define R_ARM_THM_MOVT_PREL 50
/*
* These are used to set parameters in the core dumps.
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 99035b5891ef..2611be35f26b 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -173,6 +173,7 @@ extern unsigned long vectors_base;
* so that all we need to do is modify the 8-bit constant field.
*/
#define __PV_BITS_31_24 0x81000000
+#define __PV_BITS_23_16 0x810000
#define __PV_BITS_7_0 0x81
extern unsigned long __pv_phys_pfn_offset;
@@ -183,43 +184,65 @@ extern const void *__pv_table_begin, *__pv_table_end;
#define PHYS_OFFSET ((phys_addr_t)__pv_phys_pfn_offset << PAGE_SHIFT)
#define PHYS_PFN_OFFSET (__pv_phys_pfn_offset)
-#define __pv_stub(from,to,instr,type) \
+#ifndef CONFIG_THUMB2_KERNEL
+#define __pv_stub(from,to,instr) \
__asm__("@ __pv_stub\n" \
"1: " instr " %0, %1, %2\n" \
+ "2: " instr " %0, %0, %3\n" \
" .pushsection .pv_table,\"a\"\n" \
- " .long 1b\n" \
+ " .long 1b - ., 2b - .\n" \
" .popsection\n" \
: "=r" (to) \
- : "r" (from), "I" (type))
+ : "r" (from), "I" (__PV_BITS_31_24), \
+ "I"(__PV_BITS_23_16))
+
+#define __pv_add_carry_stub(x, y) \
+ __asm__("@ __pv_add_carry_stub\n" \
+ "0: movw %R0, #0\n" \
+ " adds %Q0, %1, %R0, lsl #20\n" \
+ "1: mov %R0, %2\n" \
+ " adc %R0, %R0, #0\n" \
+ " .pushsection .pv_table,\"a\"\n" \
+ " .long 0b - ., 1b - .\n" \
+ " .popsection\n" \
+ : "=&r" (y) \
+ : "r" (x), "I" (__PV_BITS_7_0) \
+ : "cc")
-#define __pv_stub_mov_hi(t) \
- __asm__ volatile("@ __pv_stub_mov\n" \
- "1: mov %R0, %1\n" \
+#else
+#define __pv_stub(from,to,instr) \
+ __asm__("@ __pv_stub\n" \
+ "0: movw %0, #0\n" \
+ " lsl %0, #21\n" \
+ " " instr " %0, %1, %0\n" \
" .pushsection .pv_table,\"a\"\n" \
- " .long 1b\n" \
+ " .long 0b - .\n" \
" .popsection\n" \
- : "=r" (t) \
- : "I" (__PV_BITS_7_0))
+ : "=&r" (to) \
+ : "r" (from))
#define __pv_add_carry_stub(x, y) \
- __asm__ volatile("@ __pv_add_carry_stub\n" \
- "1: adds %Q0, %1, %2\n" \
+ __asm__("@ __pv_add_carry_stub\n" \
+ "0: movw %R0, #0\n" \
+ " lsls %R0, #21\n" \
+ " adds %Q0, %1, %R0\n" \
+ "1: mvn %R0, #0\n" \
" adc %R0, %R0, #0\n" \
" .pushsection .pv_table,\"a\"\n" \
- " .long 1b\n" \
+ " .long 0b - ., 1b - .\n" \
" .popsection\n" \
- : "+r" (y) \
- : "r" (x), "I" (__PV_BITS_31_24) \
+ : "=&r" (y) \
+ : "r" (x) \
: "cc")
+#endif
static inline phys_addr_t __virt_to_phys_nodebug(unsigned long x)
{
phys_addr_t t;
if (sizeof(phys_addr_t) == 4) {
- __pv_stub(x, t, "add", __PV_BITS_31_24);
+ __pv_stub(x, t, "add");
} else {
- __pv_stub_mov_hi(t);
__pv_add_carry_stub(x, t);
}
return t;
@@ -235,7 +258,7 @@ static inline unsigned long __phys_to_virt(phys_addr_t x)
* assembler expression receives 32 bit argument
* in place where 'r' 32 bit operand is expected.
*/
- __pv_stub((unsigned long) x, t, "sub", __PV_BITS_31_24);
+ __pv_stub((unsigned long) x, t, "sub");
return t;
}
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 89e5d864e923..9e465efcc8b6 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -92,6 +92,7 @@ obj-$(CONFIG_PARAVIRT) += paravirt.o
head-y := head$(MMUEXT).o
obj-$(CONFIG_DEBUG_LL) += debug.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
+obj-$(CONFIG_ARM_PATCH_PHYS_VIRT) += phys2virt.o
# This is executed very early using a temporary stack when no memory allocator
# nor global data is available. Everything has to be allocated on the stack.
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index f8904227e7fd..7e3f36809011 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -586,146 +586,4 @@ ENTRY(fixup_smp)
ldmfd sp!, {r4 - r6, pc}
ENDPROC(fixup_smp)
-#ifdef __ARMEB__
-#define LOW_OFFSET 0x4
-#define HIGH_OFFSET 0x0
-#else
-#define LOW_OFFSET 0x0
-#define HIGH_OFFSET 0x4
-#endif
-
-#ifdef CONFIG_ARM_PATCH_PHYS_VIRT
-
-/* __fixup_pv_table - patch the stub instructions with the delta between
- * PHYS_OFFSET and PAGE_OFFSET, which is assumed to be 16MiB aligned and
- * can be expressed by an immediate shifter operand. The stub instruction
- * has a form of '(add|sub) rd, rn, #imm'.
- */
- __HEAD
-__fixup_pv_table:
- adr r0, 1f
- ldmia r0, {r3-r7}
- mvn ip, #0
- subs r3, r0, r3 @ PHYS_OFFSET - PAGE_OFFSET
- add r4, r4, r3 @ adjust table start address
- add r5, r5, r3 @ adjust table end address
- add r6, r6, r3 @ adjust __pv_phys_pfn_offset address
- add r7, r7, r3 @ adjust __pv_offset address
- mov r0, r8, lsr #PAGE_SHIFT @ convert to PFN
- str r0, [r6] @ save computed PHYS_OFFSET to __pv_phys_pfn_offset
- strcc ip, [r7, #HIGH_OFFSET] @ save to __pv_offset high bits
- mov r6, r3, lsr #24 @ constant for add/sub instructions
- teq r3, r6, lsl #24 @ must be 16MiB aligned
-THUMB( it ne @ cross section branch )
- bne __error
- str r3, [r7, #LOW_OFFSET] @ save to __pv_offset low bits
- b __fixup_a_pv_table
-ENDPROC(__fixup_pv_table)
-
- .align
-1: .long .
- .long __pv_table_begin
- .long __pv_table_end
-2: .long __pv_phys_pfn_offset
- .long __pv_offset
-
- .text
-__fixup_a_pv_table:
- adr r0, 3f
- ldr r6, [r0]
- add r6, r6, r3
- ldr r0, [r6, #HIGH_OFFSET] @ pv_offset high word
- ldr r6, [r6, #LOW_OFFSET] @ pv_offset low word
- mov r6, r6, lsr #24
- cmn r0, #1
-#ifdef CONFIG_THUMB2_KERNEL
- moveq r0, #0x200000 @ set bit 21, mov to mvn instruction
- lsls r6, #24
- beq 2f
- clz r7, r6
- lsr r6, #24
- lsl r6, r7
- bic r6, #0x0080
- lsrs r7, #1
- orrcs r6, #0x0080
- orr r6, r6, r7, lsl #12
- orr r6, #0x4000
- b 2f
-1: add r7, r3
- ldrh ip, [r7, #2]
-ARM_BE8(rev16 ip, ip)
- tst ip, #0x4000
- and ip, #0x8f00
- orrne ip, r6 @ mask in offset bits 31-24
- orreq ip, r0 @ mask in offset bits 7-0
-ARM_BE8(rev16 ip, ip)
- strh ip, [r7, #2]
- bne 2f
- ldrh ip, [r7]
-ARM_BE8(rev16 ip, ip)
- bic ip, #0x20
- orr ip, ip, r0, lsr #16
-ARM_BE8(rev16 ip, ip)
- strh ip, [r7]
-2: cmp r4, r5
- ldrcc r7, [r4], #4 @ use branch for delay slot
- bcc 1b
- bx lr
-#else
-#ifdef CONFIG_CPU_ENDIAN_BE8
- moveq r0, #0x00004000 @ set bit 22, mov to mvn instruction
-#else
- moveq r0, #0x400000 @ set bit 22, mov to mvn instruction
-#endif
- b 2f
-1: ldr ip, [r7, r3]
-#ifdef CONFIG_CPU_ENDIAN_BE8
- @ in BE8, we load data in BE, but instructions still in LE
- bic ip, ip, #0xff000000
- tst ip, #0x000f0000 @ check the rotation field
- orrne ip, ip, r6, lsl #24 @ mask in offset bits 31-24
- biceq ip, ip, #0x00004000 @ clear bit 22
- orreq ip, ip, r0 @ mask in offset bits 7-0
-#else
- bic ip, ip, #0x000000ff
- tst ip, #0xf00 @ check the rotation field
- orrne ip, ip, r6 @ mask in offset bits 31-24
- biceq ip, ip, #0x400000 @ clear bit 22
- orreq ip, ip, r0 @ mask in offset bits 7-0
-#endif
- str ip, [r7, r3]
-2: cmp r4, r5
- ldrcc r7, [r4], #4 @ use branch for delay slot
- bcc 1b
- ret lr
-#endif
-ENDPROC(__fixup_a_pv_table)
-
- .align
-3: .long __pv_offset
-
-ENTRY(fixup_pv_table)
- stmfd sp!, {r4 - r7, lr}
- mov r3, #0 @ no offset
- mov r4, r0 @ r0 = table start
- add r5, r0, r1 @ r1 = table size
- bl __fixup_a_pv_table
- ldmfd sp!, {r4 - r7, pc}
-ENDPROC(fixup_pv_table)
-
- .data
- .align 2
- .globl __pv_phys_pfn_offset
- .type __pv_phys_pfn_offset, %object
-__pv_phys_pfn_offset:
- .word 0
- .size __pv_phys_pfn_offset, . -__pv_phys_pfn_offset
-
- .globl __pv_offset
- .type __pv_offset, %object
-__pv_offset:
- .quad 0
- .size __pv_offset, . -__pv_offset
-#endif
-
#include "head-common.S"
diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c
index e15444b25ca0..beac45e89ba6 100644
--- a/arch/arm/kernel/module.c
+++ b/arch/arm/kernel/module.c
@@ -185,14 +185,24 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
*(u32 *)loc |= offset & 0x7fffffff;
break;
+ case R_ARM_REL32:
+ *(u32 *)loc += sym->st_value - loc;
+ break;
+
case R_ARM_MOVW_ABS_NC:
case R_ARM_MOVT_ABS:
+ case R_ARM_MOVW_PREL_NC:
+ case R_ARM_MOVT_PREL:
offset = tmp = __mem_to_opcode_arm(*(u32 *)loc);
offset = ((offset & 0xf0000) >> 4) | (offset & 0xfff);
offset = (offset ^ 0x8000) - 0x8000;
offset += sym->st_value;
- if (ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_ABS)
+ if (ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_PREL ||
+ ELF32_R_TYPE(rel->r_info) == R_ARM_MOVW_PREL_NC)
+ offset -= loc;
+ if (ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_ABS ||
+ ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_PREL)
offset >>= 16;
tmp &= 0xfff0f000;
@@ -283,6 +293,8 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
case R_ARM_THM_MOVW_ABS_NC:
case R_ARM_THM_MOVT_ABS:
+ case R_ARM_THM_MOVW_PREL_NC:
+ case R_ARM_THM_MOVT_PREL:
upper = __mem_to_opcode_thumb16(*(u16 *)loc);
lower = __mem_to_opcode_thumb16(*(u16 *)(loc + 2));
@@ -302,7 +314,11 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
offset = (offset ^ 0x8000) - 0x8000;
offset += sym->st_value;
- if (ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_ABS)
+ if (ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_PREL ||
+ ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVW_PREL_NC)
+ offset -= loc;
+ if (ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_ABS ||
+ ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_PREL)
offset >>= 16;
upper = (u16)((upper & 0xfbf0) |
diff --git a/arch/arm/kernel/phys2virt.S b/arch/arm/kernel/phys2virt.S
new file mode 100644
index 000000000000..fb53db78fe78
--- /dev/null
+++ b/arch/arm/kernel/phys2virt.S
@@ -0,0 +1,238 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 1994-2002 Russell King
+ * Copyright (c) 2003, 2020 ARM Limited
+ * All Rights Reserved
+ */
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/page.h>
+
+#ifdef __ARMEB__
+#define LOW_OFFSET 0x4
+#define HIGH_OFFSET 0x0
+#else
+#define LOW_OFFSET 0x0
+#define HIGH_OFFSET 0x4
+#endif
+
+/*
+ * __fixup_pv_table - patch the stub instructions with the delta between
+ * PHYS_OFFSET and PAGE_OFFSET, which is assumed to be
+ * 2 MiB aligned.
+ *
+ * Called from head.S, which expects the following registers to be preserved:
+ * r1 = machine no, r2 = atags or dtb,
+ * r8 = phys_offset, r9 = cpuid, r10 = procinfo
+ */
+ __HEAD
+ENTRY(__fixup_pv_table)
+ mov r0, r8, lsr #PAGE_SHIFT @ convert to PFN
+ str_l r0, __pv_phys_pfn_offset, r3
+
+ adr_l r0, __pv_offset
+ subs r3, r8, #PAGE_OFFSET @ PHYS_OFFSET - PAGE_OFFSET
+ mvn ip, #0
+ strcc ip, [r0, #HIGH_OFFSET] @ save to __pv_offset high bits
+ str r3, [r0, #LOW_OFFSET] @ save to __pv_offset low bits
+
+ mov r0, r3, lsr #21 @ constant for add/sub instructions
+ teq r3, r0, lsl #21 @ must be 2 MiB aligned
+ bne 0f
+
+ adr_l r4, __pv_table_begin
+ adr_l r5, __pv_table_end
+ b __fixup_a_pv_table
+
+0: mov r0, r0 @ deadloop on error
+ b 0b
+ENDPROC(__fixup_pv_table)
+
+ .text
+__fixup_a_pv_table:
+ adr_l r6, __pv_offset
+ ldr r0, [r6, #HIGH_OFFSET] @ pv_offset high word
+ ldr r6, [r6, #LOW_OFFSET] @ pv_offset low word
+ cmn r0, #1
+#ifdef CONFIG_THUMB2_KERNEL
+ @
+ @ The Thumb-2 versions of the patchable sequences are
+ @
+ @ phys-to-virt: movw <reg>, #offset<31:21>
+ @ lsl <reg>, #21
+ @ sub <VA>, <PA>, <reg>
+ @
+ @ virt-to-phys (non-LPAE): movw <reg>, #offset<31:21>
+ @ lsl <reg>, #21
+ @ add <PA>, <VA>, <reg>
+ @
+ @ virt-to-phys (LPAE): movw <reg>, #offset<31:21>
+ @ lsl <reg>, #21
+ @ adds <PAlo>, <VA>, <reg>
+ @ mov <PAhi>, #offset<39:32>
+ @ adc <PAhi>, <PAhi>, #0
+ @
+ @ In the non-LPAE case, all patchable instructions are MOVW
+ @ instructions, where we need to patch in the offset into the
+ @ second halfword of the opcode (the 16-bit immediate is encoded
+ @ as imm4:i:imm3:imm8)
+ @
+ @ 15 11 10 9 4 3 0 15 14 12 11 8 7 0
+ @ +-----------+---+-------------+------++---+------+----+------+
+ @ MOVW | 1 1 1 1 0 | i | 1 0 0 1 0 0 | imm4 || 0 | imm3 | Rd | imm8 |
+ @ +-----------+---+-------------+------++---+------+----+------+
+ @
+ @ In the LPAE case, we also need to patch in the high word of the
+ @ offset into the immediate field of the MOV instruction, or patch it
+ @ to a MVN instruction if the offset is negative. In this case, we
+ @ need to inspect the first halfword of the opcode, to check whether
+ @ it is MOVW or MOV/MVN, and to perform the MOV to MVN patching if
+ @ needed. The encoding of the immediate is rather complex for values
+ @ of i:imm3 != 0b0000, but fortunately, we never need more than 8 lower
+ @ order bits, which can be patched into imm8 directly (and i:imm3
+ @ cleared)
+ @
+ @ 15 11 10 9 5 0 15 14 12 11 8 7 0
+ @ +-----------+---+---------------------++---+------+----+------+
+ @ MOV | 1 1 1 1 0 | i | 0 0 0 1 0 0 1 1 1 1 || 0 | imm3 | Rd | imm8 |
+ @ MVN | 1 1 1 1 0 | i | 0 0 0 1 1 0 1 1 1 1 || 0 | imm3 | Rd | imm8 |
+ @ +-----------+---+---------------------++---+------+----+------+
+ @
+ moveq r0, #0x200000 @ set bit 21, mov to mvn instruction
+ lsrs r3, r6, #29 @ isolate top 3 bits of displacement
+ ubfx r6, r6, #21, #8 @ put bits 28:21 into the MOVW imm8 field
+ bfi r6, r3, #12, #3 @ put bits 31:29 into the MOVW imm3 field
+ b .Lnext
+.Lloop: add r7, r4
+ adds r4, #4 @ clears Z flag
+#ifdef CONFIG_ARM_LPAE
+ ldrh ip, [r7]
+ARM_BE8(rev16 ip, ip)
+ tst ip, #0x200 @ MOVW has bit 9 set, MVN has it clear
+ bne 0f @ skip to MOVW handling (Z flag is clear)
+ bic ip, #0x20 @ clear bit 5 (MVN -> MOV)
+ orr ip, ip, r0, lsr #16 @ MOV -> MVN if offset < 0
+ARM_BE8(rev16 ip, ip)
+ strh ip, [r7]
+ @ Z flag is set
+0:
+#endif
+ ldrh ip, [r7, #2]
+ARM_BE8(rev16 ip, ip)
+ and ip, #0xf00 @ clear everything except Rd field
+ orreq ip, r0 @ Z flag set -> MOV/MVN -> patch in high bits
+ orrne ip, r6 @ Z flag clear -> MOVW -> patch in low bits
+ARM_BE8(rev16 ip, ip)
+ strh ip, [r7, #2]
+#else
+#ifdef CONFIG_CPU_ENDIAN_BE8
+@ in BE8, we load data in BE, but instructions still in LE
+#define PV_BIT24 0x00000001
+#define PV_IMM8_MASK 0xff000000
+#define PV_IMMR_MSB 0x00080000
+#else
+#define PV_BIT24 0x01000000
+#define PV_IMM8_MASK 0x000000ff
+#define PV_IMMR_MSB 0x00000800
+#endif
+
+ @
+ @ The ARM versions of the patchable sequences are
+ @
+ @ phys-to-virt: sub <VA>, <PA>, #offset<31:24>, lsl #24
+ @ sub <VA>, <PA>, #offset<23:16>, lsl #16
+ @
+ @ virt-to-phys (non-LPAE): add <PA>, <VA>, #offset<31:24>, lsl #24
+ @ add <PA>, <VA>, #offset<23:16>, lsl #16
+ @
+ @ virt-to-phys (LPAE): movw <reg>, #offset<31:20>
+ @ adds <PAlo>, <VA>, <reg>, lsl #20
+ @ mov <PAhi>, #offset<39:32>
+ @ adc <PAhi>, <PAhi>, #0
+ @
+ @ In the non-LPAE case, all patchable instructions are ADD or SUB
+ @ instructions, where we need to patch in the offset into the
+ @ immediate field of the opcode, which is emitted with the correct
+ @ rotation value. (The effective value of the immediate is imm12<7:0>
+ @ rotated right by [2 * imm12<11:8>] bits)
+ @
+ @ 31 28 27 23 22 20 19 16 15 12 11 0
+ @ +------+-----------------+------+------+-------+
+ @ ADD | cond | 0 0 1 0 1 0 0 0 | Rn | Rd | imm12 |
+ @ SUB | cond | 0 0 1 0 0 1 0 0 | Rn | Rd | imm12 |
+ @ MOV | cond | 0 0 1 1 1 0 1 0 | Rn | Rd | imm12 |
+ @ MVN | cond | 0 0 1 1 1 1 1 0 | Rn | Rd | imm12 |
+ @ +------+-----------------+------+------+-------+
+ @
+ @ In the LPAE case, we use a MOVW instruction to carry the low offset
+ @ word, and patch in the high word of the offset into the immediate
+ @ field of the subsequent MOV instruction, or patch it to a MVN
+ @ instruction if the offset is negative. We can distinguish MOVW
+ @ instructions based on bits 23:22 of the opcode, and ADD/SUB can be
+ @ distinguished from MOV/MVN (all using the encodings above) using
+ @ bit 24.
+ @
+ @ 31 28 27 23 22 20 19 16 15 12 11 0
+ @ +------+-----------------+------+------+-------+
+ @ MOVW | cond | 0 0 1 1 0 0 0 0 | imm4 | Rd | imm12 |
+ @ +------+-----------------+------+------+-------+
+ @
+ moveq r0, #0x400000 @ set bit 22, mov to mvn instruction
+ mov r3, r6, lsr #16 @ put offset bits 31-16 into r3
+ mov r6, r6, lsr #24 @ put offset bits 31-24 into r6
+ and r3, r3, #0xf0 @ only keep offset bits 23-20 in r3
+ b .Lnext
+.Lloop: ldr ip, [r7, r4]
+#ifdef CONFIG_ARM_LPAE
+ tst ip, #PV_BIT24 @ ADD/SUB have bit 24 clear
+ beq 1f
+ARM_BE8(rev ip, ip)
+ tst ip, #0xc00000 @ MOVW has bits 23:22 clear
+ bic ip, ip, #0x400000 @ clear bit 22
+ bfc ip, #0, #12 @ clear imm12 field of MOV[W] instruction
+ orreq ip, ip, r6, lsl #4 @ MOVW -> mask in offset bits 31-24
+ orreq ip, ip, r3, lsr #4 @ MOVW -> mask in offset bits 23-20
+ orrne ip, ip, r0 @ MOV -> mask in offset bits 7-0 (or bit 22)
+ARM_BE8(rev ip, ip)
+ b 2f
+1:
+#endif
+ tst ip, #PV_IMMR_MSB @ rotation value >= 16 ?
+ bic ip, ip, #PV_IMM8_MASK
+ orreq ip, ip, r6 ARM_BE8(, lsl #24) @ mask in offset bits 31-24
+ orrne ip, ip, r3 ARM_BE8(, lsl #24) @ mask in offset bits 23-20
+2:
+ str ip, [r7, r4]
+ add r4, r4, #4
+#endif
+
+.Lnext:
+ cmp r4, r5
+ ldrcc r7, [r4] @ use branch for delay slot
+ bcc .Lloop
+ ret lr
+ENDPROC(__fixup_a_pv_table)
+
+ENTRY(fixup_pv_table)
+ stmfd sp!, {r4 - r7, lr}
+ mov r4, r0 @ r0 = table start
+ add r5, r0, r1 @ r1 = table size
+ bl __fixup_a_pv_table
+ ldmfd sp!, {r4 - r7, pc}
+ENDPROC(fixup_pv_table)
+
+ .data
+ .align 2
+ .globl __pv_phys_pfn_offset
+ .type __pv_phys_pfn_offset, %object
+__pv_phys_pfn_offset:
+ .word 0
+ .size __pv_phys_pfn_offset, . -__pv_phys_pfn_offset
+
+ .globl __pv_offset
+ .type __pv_offset, %object
+__pv_offset:
+ .quad 0
+ .size __pv_offset, . -__pv_offset