summaryrefslogtreecommitdiff
path: root/arch/x86/boot/compressed/head_64.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/boot/compressed/head_64.S')
-rw-r--r--arch/x86/boot/compressed/head_64.S517
1 files changed, 242 insertions, 275 deletions
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index fbf4c32d0b62..d9dab940ff62 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* linux/boot/head.S
*
@@ -32,18 +33,53 @@
#include <asm/processor-flags.h>
#include <asm/asm-offsets.h>
#include <asm/bootparam.h>
+#include <asm/desc_defs.h>
+#include <asm/trapnr.h>
+
+/*
+ * Fix alignment at 16 bytes. Following CONFIG_FUNCTION_ALIGNMENT will result
+ * in assembly errors due to trying to move .org backward due to the excessive
+ * alignment.
+ */
+#undef __ALIGN
+#define __ALIGN .balign 16, 0x90
/*
* Locally defined symbols should be marked hidden:
*/
.hidden _bss
.hidden _ebss
- .hidden _got
- .hidden _egot
+ .hidden _end
__HEAD
+
+/*
+ * This macro gives the relative virtual address of X, i.e. the offset of X
+ * from startup_32. This is the same as the link-time virtual address of X,
+ * since startup_32 is at 0, but defining it this way tells the
+ * assembler/linker that we do not want the actual run-time address of X. This
+ * prevents the linker from trying to create unwanted run-time relocation
+ * entries for the reference when the compressed kernel is linked as PIE.
+ *
+ * A reference X(%reg) will result in the link-time VA of X being stored with
+ * the instruction, and a run-time R_X86_64_RELATIVE relocation entry that
+ * adds the 64-bit base address where the kernel is loaded.
+ *
+ * Replacing it with (X-startup_32)(%reg) results in the offset being stored,
+ * and no run-time relocation.
+ *
+ * The macro should be used as a displacement with a base register containing
+ * the run-time address of startup_32 [i.e. rva(X)(%reg)], or as an immediate
+ * [$ rva(X)].
+ *
+ * This macro can only be used from within the .head.text section, since the
+ * expression requires startup_32 to be in the same section as the code being
+ * assembled.
+ */
+#define rva(X) ((X) - startup_32)
+
.code32
-ENTRY(startup_32)
+SYM_FUNC_START(startup_32)
/*
* 32bit entry is 0 and it is ABI so immutable!
* If we come here directly from a bootloader,
@@ -51,19 +87,7 @@ ENTRY(startup_32)
* all need to be under the 4G limit.
*/
cld
- /*
- * Test KEEP_SEGMENTS flag to see if the bootloader is asking
- * us to not reload segments
- */
- testb $KEEP_SEGMENTS, BP_loadflags(%esi)
- jnz 1f
-
cli
- movl $(__BOOT_DS), %eax
- movl %eax, %ds
- movl %eax, %es
- movl %eax, %ss
-1:
/*
* Calculate the delta between where we were compiled to run
@@ -76,16 +100,39 @@ ENTRY(startup_32)
leal (BP_scratch+4)(%esi), %esp
call 1f
1: popl %ebp
- subl $1b, %ebp
+ subl $ rva(1b), %ebp
+
+ /* Load new GDT with the 64bit segments using 32bit descriptor */
+ leal rva(gdt)(%ebp), %eax
+ movl %eax, 2(%eax)
+ lgdt (%eax)
+
+ /* Load segment registers with our descriptors */
+ movl $__BOOT_DS, %eax
+ movl %eax, %ds
+ movl %eax, %es
+ movl %eax, %fs
+ movl %eax, %gs
+ movl %eax, %ss
-/* setup a stack and make sure cpu supports long mode. */
- movl $boot_stack_end, %eax
- addl %ebp, %eax
- movl %eax, %esp
+ /* Setup a stack and load CS from current GDT */
+ leal rva(boot_stack_end)(%ebp), %esp
+ pushl $__KERNEL32_CS
+ leal rva(1f)(%ebp), %eax
+ pushl %eax
+ lretl
+1:
+
+ /* Setup Exception handling for SEV-ES */
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ call startup32_load_idt
+#endif
+
+ /* Make sure cpu supports long mode. */
call verify_cpu
testl %eax, %eax
- jnz no_longmode
+ jnz .Lno_longmode
/*
* Compute the delta between where we were compiled to run at
@@ -104,24 +151,19 @@ ENTRY(startup_32)
notl %eax
andl %eax, %ebx
cmpl $LOAD_PHYSICAL_ADDR, %ebx
- jge 1f
+ jae 1f
#endif
movl $LOAD_PHYSICAL_ADDR, %ebx
1:
/* Target address to relocate to for decompression */
- movl BP_init_size(%esi), %eax
- subl $_end, %eax
- addl %eax, %ebx
+ addl BP_init_size(%esi), %ebx
+ subl $ rva(_end), %ebx
/*
* Prepare for entering 64 bit mode
*/
- /* Load new GDT with the 64bit segments using 32bit descriptor */
- addl %ebp, gdt+2(%ebp)
- lgdt gdt(%ebp)
-
/* Enable PAE mode */
movl %cr4, %eax
orl $X86_CR4_PAE, %eax
@@ -130,39 +172,66 @@ ENTRY(startup_32)
/*
* Build early 4G boot pagetable
*/
+ /*
+ * If SEV is active then set the encryption mask in the page tables.
+ * This will ensure that when the kernel is copied and decompressed
+ * it will be done so encrypted.
+ */
+ xorl %edx, %edx
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ call get_sev_encryption_bit
+ xorl %edx, %edx
+ testl %eax, %eax
+ jz 1f
+ subl $32, %eax /* Encryption bit is always above bit 31 */
+ bts %eax, %edx /* Set encryption mask for page tables */
+ /*
+ * Set MSR_AMD64_SEV_ENABLED_BIT in sev_status so that
+ * startup32_check_sev_cbit() will do a check. sev_enable() will
+ * initialize sev_status with all the bits reported by
+ * MSR_AMD_SEV_STATUS later, but only MSR_AMD64_SEV_ENABLED_BIT
+ * needs to be set for now.
+ */
+ movl $1, rva(sev_status)(%ebp)
+1:
+#endif
+
/* Initialize Page tables to 0 */
- leal pgtable(%ebx), %edi
+ leal rva(pgtable)(%ebx), %edi
xorl %eax, %eax
movl $(BOOT_INIT_PGT_SIZE/4), %ecx
rep stosl
/* Build Level 4 */
- leal pgtable + 0(%ebx), %edi
+ leal rva(pgtable + 0)(%ebx), %edi
leal 0x1007 (%edi), %eax
movl %eax, 0(%edi)
+ addl %edx, 4(%edi)
/* Build Level 3 */
- leal pgtable + 0x1000(%ebx), %edi
+ leal rva(pgtable + 0x1000)(%ebx), %edi
leal 0x1007(%edi), %eax
movl $4, %ecx
1: movl %eax, 0x00(%edi)
+ addl %edx, 0x04(%edi)
addl $0x00001000, %eax
addl $8, %edi
decl %ecx
jnz 1b
/* Build Level 2 */
- leal pgtable + 0x2000(%ebx), %edi
+ leal rva(pgtable + 0x2000)(%ebx), %edi
movl $0x00000183, %eax
movl $2048, %ecx
1: movl %eax, 0(%edi)
+ addl %edx, 4(%edi)
addl $0x00200000, %eax
addl $8, %edi
decl %ecx
jnz 1b
/* Enable the boot page tables */
- leal pgtable(%ebx), %eax
+ leal rva(pgtable)(%ebx), %eax
movl %eax, %cr3
/* Enable Long mode in EFER (Extended Feature Enable Register) */
@@ -177,62 +246,36 @@ ENTRY(startup_32)
movl $__BOOT_TSS, %eax
ltr %ax
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ /* Check if the C-bit position is correct when SEV is active */
+ call startup32_check_sev_cbit
+#endif
+
/*
* Setup for the jump to 64bit mode
*
- * When the jump is performend we will be in long mode but
+ * When the jump is performed we will be in long mode but
* in 32bit compatibility mode with EFER.LME = 1, CS.L = 0, CS.D = 1
* (and in turn EFER.LMA = 1). To jump into 64bit mode we use
* the new gdt/idt that has __KERNEL_CS with CS.L = 1.
* We place all of the values on our mini stack so lret can
* used to perform that far jump.
*/
+ leal rva(startup_64)(%ebp), %eax
pushl $__KERNEL_CS
- leal startup_64(%ebp), %eax
-#ifdef CONFIG_EFI_MIXED
- movl efi32_config(%ebp), %ebx
- cmp $0, %ebx
- jz 1f
- leal handover_entry(%ebp), %eax
-1:
-#endif
pushl %eax
/* Enter paged protected Mode, activating Long Mode */
- movl $(X86_CR0_PG | X86_CR0_PE), %eax /* Enable Paging and Protected mode */
+ movl $CR0_STATE, %eax
movl %eax, %cr0
/* Jump from 32bit compatibility mode into 64bit mode. */
lret
-ENDPROC(startup_32)
-
-#ifdef CONFIG_EFI_MIXED
- .org 0x190
-ENTRY(efi32_stub_entry)
- add $0x4, %esp /* Discard return address */
- popl %ecx
- popl %edx
- popl %esi
-
- leal (BP_scratch+4)(%esi), %esp
- call 1f
-1: pop %ebp
- subl $1b, %ebp
-
- movl %ecx, efi32_config(%ebp)
- movl %edx, efi32_config+8(%ebp)
- sgdtl efi32_boot_gdt(%ebp)
-
- leal efi32_config(%ebp), %eax
- movl %eax, efi_config(%ebp)
-
- jmp startup_32
-ENDPROC(efi32_stub_entry)
-#endif
+SYM_FUNC_END(startup_32)
.code64
.org 0x200
-ENTRY(startup_64)
+SYM_CODE_START(startup_64)
/*
* 64bit entry is 0x200 and it is ABI so immutable!
* We come here either from startup_32 or directly from a
@@ -243,65 +286,9 @@ ENTRY(startup_64)
* that maps our entire kernel(text+data+bss+brk), zero page
* and command line.
*/
-#ifdef CONFIG_EFI_STUB
- /*
- * The entry point for the PE/COFF executable is efi_pe_entry, so
- * only legacy boot loaders will execute this jmp.
- */
- jmp preferred_addr
-
-ENTRY(efi_pe_entry)
- movq %rcx, efi64_config(%rip) /* Handle */
- movq %rdx, efi64_config+8(%rip) /* EFI System table pointer */
-
- leaq efi64_config(%rip), %rax
- movq %rax, efi_config(%rip)
-
- call 1f
-1: popq %rbp
- subq $1b, %rbp
-
- /*
- * Relocate efi_config->call().
- */
- addq %rbp, efi64_config+40(%rip)
-
- movq %rax, %rdi
- call make_boot_params
- cmpq $0,%rax
- je fail
- mov %rax, %rsi
- leaq startup_32(%rip), %rax
- movl %eax, BP_code32_start(%rsi)
- jmp 2f /* Skip the relocation */
-
-handover_entry:
- call 1f
-1: popq %rbp
- subq $1b, %rbp
-
- /*
- * Relocate efi_config->call().
- */
- movq efi_config(%rip), %rax
- addq %rbp, 40(%rax)
-2:
- movq efi_config(%rip), %rdi
- call efi_main
- movq %rax,%rsi
- cmpq $0,%rax
- jne 2f
-fail:
- /* EFI init failed, so hang. */
- hlt
- jmp fail
-2:
- movl BP_code32_start(%esi), %eax
- leaq preferred_addr(%rax), %rax
- jmp *%rax
-preferred_addr:
-#endif
+ cld
+ cli
/* Setup data segments. */
xorl %eax, %eax
@@ -333,61 +320,94 @@ preferred_addr:
notq %rax
andq %rax, %rbp
cmpq $LOAD_PHYSICAL_ADDR, %rbp
- jge 1f
+ jae 1f
#endif
movq $LOAD_PHYSICAL_ADDR, %rbp
1:
/* Target address to relocate to for decompression */
movl BP_init_size(%rsi), %ebx
- subl $_end, %ebx
+ subl $ rva(_end), %ebx
addq %rbp, %rbx
/* Set up the stack */
- leaq boot_stack_end(%rbx), %rsp
-
-#ifdef CONFIG_X86_5LEVEL
- /* Check if 5-level paging has already enabled */
- movq %cr4, %rax
- testl $X86_CR4_LA57, %eax
- jnz lvl5
+ leaq rva(boot_stack_end)(%rbx), %rsp
/*
* At this point we are in long mode with 4-level paging enabled,
- * but we want to enable 5-level paging.
+ * but we might want to enable 5-level paging or vice versa.
*
- * The problem is that we cannot do it directly. Setting LA57 in
- * long mode would trigger #GP. So we need to switch off long mode
- * first.
+ * The problem is that we cannot do it directly. Setting or clearing
+ * CR4.LA57 in long mode would trigger #GP. So we need to switch off
+ * long mode and paging first.
*
- * NOTE: This is not going to work if bootloader put us above 4G
- * limit.
+ * We also need a trampoline in lower memory to switch over from
+ * 4- to 5-level paging for cases when the bootloader puts the kernel
+ * above 4G, but didn't enable 5-level paging for us.
*
- * The first step is go into compatibility mode.
+ * The same trampoline can be used to switch from 5- to 4-level paging
+ * mode, like when starting 4-level paging kernel via kexec() when
+ * original kernel worked in 5-level paging mode.
+ *
+ * For the trampoline, we need the top page table to reside in lower
+ * memory as we don't have a way to load 64-bit values into CR3 in
+ * 32-bit mode.
*/
- /* Clear additional page table */
- leaq lvl5_pgtable(%rbx), %rdi
- xorq %rax, %rax
- movq $(PAGE_SIZE/8), %rcx
- rep stosq
+ /* Make sure we have GDT with 32-bit code segment */
+ leaq gdt64(%rip), %rax
+ addq %rax, 2(%rax)
+ lgdt (%rax)
+
+ /* Reload CS so IRET returns to a CS actually in the GDT */
+ pushq $__KERNEL_CS
+ leaq .Lon_kernel_cs(%rip), %rax
+ pushq %rax
+ lretq
+.Lon_kernel_cs:
/*
- * Setup current CR3 as the first and only entry in a new top level
- * page table.
+ * RSI holds a pointer to a boot_params structure provided by the
+ * loader, and this needs to be preserved across C function calls. So
+ * move it into a callee saved register.
*/
- movq %cr3, %rdi
- leaq 0x7 (%rdi), %rax
- movq %rax, lvl5_pgtable(%rbx)
+ movq %rsi, %r15
- /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
- pushq $__KERNEL32_CS
- leaq compatible_mode(%rip), %rax
- pushq %rax
- lretq
-lvl5:
+ call load_stage1_idt
+
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ /*
+ * Now that the stage1 interrupt handlers are set up, #VC exceptions from
+ * CPUID instructions can be properly handled for SEV-ES guests.
+ *
+ * For SEV-SNP, the CPUID table also needs to be set up in advance of any
+ * CPUID instructions being issued, so go ahead and do that now via
+ * sev_enable(), which will also handle the rest of the SEV-related
+ * detection/setup to ensure that has been done in advance of any dependent
+ * code. Pass the boot_params pointer as the first argument.
+ */
+ movq %r15, %rdi
+ call sev_enable
#endif
+ /* Preserve only the CR4 bits that must be preserved, and clear the rest */
+ movq %cr4, %rax
+ andl $(X86_CR4_PAE | X86_CR4_MCE | X86_CR4_LA57), %eax
+ movq %rax, %cr4
+
+ /*
+ * configure_5level_paging() updates the number of paging levels using
+ * a trampoline in 32-bit addressable memory if the current number does
+ * not match the desired number.
+ *
+ * Pass the boot_params pointer as the first argument. The second
+ * argument is the relocated address of the page table to use instead
+ * of the page table in trampoline memory (if required).
+ */
+ movq %r15, %rdi
+ leaq rva(top_pgtable)(%rbx), %rsi
+ call configure_5level_paging
+
/* Zero EFLAGS */
pushq $0
popfq
@@ -396,38 +416,33 @@ lvl5:
* Copy the compressed kernel to the end of our buffer
* where decompression in place becomes safe.
*/
- pushq %rsi
leaq (_bss-8)(%rip), %rsi
- leaq (_bss-8)(%rbx), %rdi
- movq $_bss /* - $startup_32 */, %rcx
- shrq $3, %rcx
+ leaq rva(_bss-8)(%rbx), %rdi
+ movl $(_bss - startup_32), %ecx
+ shrl $3, %ecx
std
rep movsq
cld
- popq %rsi
+
+ /*
+ * The GDT may get overwritten either during the copy we just did or
+ * during extract_kernel below. To avoid any issues, repoint the GDTR
+ * to the new copy of the GDT.
+ */
+ leaq rva(gdt64)(%rbx), %rax
+ leaq rva(gdt)(%rbx), %rdx
+ movq %rdx, 2(%rax)
+ lgdt (%rax)
/*
* Jump to the relocated address.
*/
- leaq relocated(%rbx), %rax
+ leaq rva(.Lrelocated)(%rbx), %rax
jmp *%rax
-
-#ifdef CONFIG_EFI_STUB
- .org 0x390
-ENTRY(efi64_stub_entry)
- movq %rdi, efi64_config(%rip) /* Handle */
- movq %rsi, efi64_config+8(%rip) /* EFI System table pointer */
-
- leaq efi64_config(%rip), %rax
- movq %rax, efi_config(%rip)
-
- movq %rdx, %rsi
- jmp handover_entry
-ENDPROC(efi64_stub_entry)
-#endif
+SYM_CODE_END(startup_64)
.text
-relocated:
+SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
/*
* Clear BSS (stack is currently empty)
@@ -439,134 +454,86 @@ relocated:
shrq $3, %rcx
rep stosq
-/*
- * Adjust our own GOT
- */
- leaq _got(%rip), %rdx
- leaq _egot(%rip), %rcx
-1:
- cmpq %rcx, %rdx
- jae 2f
- addq %rbx, (%rdx)
- addq $8, %rdx
- jmp 1b
-2:
-
+ call load_stage2_idt
+
+ /* Pass boot_params to initialize_identity_maps() */
+ movq %r15, %rdi
+ call initialize_identity_maps
+
/*
* Do the extraction, and jump to the new kernel..
*/
- pushq %rsi /* Save the real mode argument */
- movq %rsi, %rdi /* real mode address */
- leaq boot_heap(%rip), %rsi /* malloc area for uncompression */
- leaq input_data(%rip), %rdx /* input_data */
- movl $z_input_len, %ecx /* input_len */
- movq %rbp, %r8 /* output target address */
- movq $z_output_len, %r9 /* decompressed length, end of relocs */
- call extract_kernel /* returns kernel location in %rax */
- popq %rsi
+ /* pass struct boot_params pointer and output target address */
+ movq %r15, %rdi
+ movq %rbp, %rsi
+ call extract_kernel /* returns kernel entry point in %rax */
/*
* Jump to the decompressed kernel.
*/
+ movq %r15, %rsi
jmp *%rax
+SYM_FUNC_END(.Lrelocated)
.code32
-#ifdef CONFIG_X86_5LEVEL
-compatible_mode:
- /* Setup data and stack segments */
- movl $__KERNEL_DS, %eax
- movl %eax, %ds
- movl %eax, %ss
-
- /* Disable paging */
- movl %cr0, %eax
- btrl $X86_CR0_PG_BIT, %eax
- movl %eax, %cr0
-
- /* Point CR3 to 5-level paging */
- leal lvl5_pgtable(%ebx), %eax
- movl %eax, %cr3
-
- /* Enable PAE and LA57 mode */
- movl %cr4, %eax
- orl $(X86_CR4_PAE | X86_CR4_LA57), %eax
- movl %eax, %cr4
-
- /* Calculate address we are running at */
- call 1f
-1: popl %edi
- subl $1b, %edi
-
- /* Prepare stack for far return to Long Mode */
- pushl $__KERNEL_CS
- leal lvl5(%edi), %eax
- push %eax
-
- /* Enable paging back */
- movl $(X86_CR0_PG | X86_CR0_PE), %eax
- movl %eax, %cr0
-
- lret
-#endif
-
-no_longmode:
- /* This isn't an x86-64 CPU so hang */
+SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode)
+ /* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */
1:
hlt
jmp 1b
+SYM_FUNC_END(.Lno_longmode)
+ .globl verify_cpu
#include "../../kernel/verify_cpu.S"
.data
-gdt:
- .word gdt_end - gdt
- .long gdt
+SYM_DATA_START_LOCAL(gdt64)
+ .word gdt_end - gdt - 1
+ .quad gdt - gdt64
+SYM_DATA_END(gdt64)
+ .balign 8
+SYM_DATA_START_LOCAL(gdt)
+ .word gdt_end - gdt - 1
+ .long 0
.word 0
.quad 0x00cf9a000000ffff /* __KERNEL32_CS */
.quad 0x00af9a000000ffff /* __KERNEL_CS */
.quad 0x00cf92000000ffff /* __KERNEL_DS */
.quad 0x0080890000000000 /* TS descriptor */
.quad 0x0000000000000000 /* TS continued */
-gdt_end:
+SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end)
-#ifdef CONFIG_EFI_STUB
-efi_config:
+SYM_DATA_START(boot_idt_desc)
+ .word boot_idt_end - boot_idt - 1
.quad 0
-
-#ifdef CONFIG_EFI_MIXED
- .global efi32_config
-efi32_config:
- .fill 5,8,0
- .quad efi64_thunk
- .byte 0
-#endif
-
- .global efi64_config
-efi64_config:
- .fill 5,8,0
- .quad efi_call
- .byte 1
-#endif /* CONFIG_EFI_STUB */
+SYM_DATA_END(boot_idt_desc)
+ .balign 8
+SYM_DATA_START(boot_idt)
+ .rept BOOT_IDT_ENTRIES
+ .quad 0
+ .quad 0
+ .endr
+SYM_DATA_END_LABEL(boot_idt, SYM_L_GLOBAL, boot_idt_end)
/*
* Stack and heap for uncompression
*/
.bss
.balign 4
-boot_heap:
- .fill BOOT_HEAP_SIZE, 1, 0
-boot_stack:
+SYM_DATA_START_LOCAL(boot_stack)
.fill BOOT_STACK_SIZE, 1, 0
-boot_stack_end:
+ .balign 16
+SYM_DATA_END_LABEL(boot_stack, SYM_L_LOCAL, boot_stack_end)
/*
* Space for page tables (not in .bss so not zeroed)
*/
- .section ".pgtable","a",@nobits
+ .section ".pgtable","aw",@nobits
.balign 4096
-pgtable:
- .fill BOOT_PGT_SIZE, 1, 0
-#ifdef CONFIG_X86_5LEVEL
-lvl5_pgtable:
- .fill PAGE_SIZE, 1, 0
-#endif
+SYM_DATA_LOCAL(pgtable, .fill BOOT_PGT_SIZE, 1, 0)
+
+/*
+ * The page table is going to be used instead of page table in the trampoline
+ * memory.
+ */
+SYM_DATA_LOCAL(top_pgtable, .fill PAGE_SIZE, 1, 0)