diff options
Diffstat (limited to 'arch/x86/boot/compressed/head_64.S')
| -rw-r--r-- | arch/x86/boot/compressed/head_64.S | 406 |
1 files changed, 279 insertions, 127 deletions
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 06e71c2c16bf..d9dab940ff62 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * linux/boot/head.S * @@ -31,10 +32,54 @@ #include <asm/msr.h> #include <asm/processor-flags.h> #include <asm/asm-offsets.h> +#include <asm/bootparam.h> +#include <asm/desc_defs.h> +#include <asm/trapnr.h> + +/* + * Fix alignment at 16 bytes. Following CONFIG_FUNCTION_ALIGNMENT will result + * in assembly errors due to trying to move .org backward due to the excessive + * alignment. + */ +#undef __ALIGN +#define __ALIGN .balign 16, 0x90 + +/* + * Locally defined symbols should be marked hidden: + */ + .hidden _bss + .hidden _ebss + .hidden _end __HEAD + +/* + * This macro gives the relative virtual address of X, i.e. the offset of X + * from startup_32. This is the same as the link-time virtual address of X, + * since startup_32 is at 0, but defining it this way tells the + * assembler/linker that we do not want the actual run-time address of X. This + * prevents the linker from trying to create unwanted run-time relocation + * entries for the reference when the compressed kernel is linked as PIE. + * + * A reference X(%reg) will result in the link-time VA of X being stored with + * the instruction, and a run-time R_X86_64_RELATIVE relocation entry that + * adds the 64-bit base address where the kernel is loaded. + * + * Replacing it with (X-startup_32)(%reg) results in the offset being stored, + * and no run-time relocation. + * + * The macro should be used as a displacement with a base register containing + * the run-time address of startup_32 [i.e. rva(X)(%reg)], or as an immediate + * [$ rva(X)]. + * + * This macro can only be used from within the .head.text section, since the + * expression requires startup_32 to be in the same section as the code being + * assembled. + */ +#define rva(X) ((X) - startup_32) + .code32 -ENTRY(startup_32) +SYM_FUNC_START(startup_32) /* * 32bit entry is 0 and it is ABI so immutable! * If we come here directly from a bootloader, @@ -42,19 +87,7 @@ ENTRY(startup_32) * all need to be under the 4G limit. */ cld - /* - * Test KEEP_SEGMENTS flag to see if the bootloader is asking - * us to not reload segments - */ - testb $(1<<6), BP_loadflags(%esi) - jnz 1f - cli - movl $(__BOOT_DS), %eax - movl %eax, %ds - movl %eax, %es - movl %eax, %ss -1: /* * Calculate the delta between where we were compiled to run @@ -67,16 +100,39 @@ ENTRY(startup_32) leal (BP_scratch+4)(%esi), %esp call 1f 1: popl %ebp - subl $1b, %ebp + subl $ rva(1b), %ebp + + /* Load new GDT with the 64bit segments using 32bit descriptor */ + leal rva(gdt)(%ebp), %eax + movl %eax, 2(%eax) + lgdt (%eax) + + /* Load segment registers with our descriptors */ + movl $__BOOT_DS, %eax + movl %eax, %ds + movl %eax, %es + movl %eax, %fs + movl %eax, %gs + movl %eax, %ss + + /* Setup a stack and load CS from current GDT */ + leal rva(boot_stack_end)(%ebp), %esp -/* setup a stack and make sure cpu supports long mode. */ - movl $boot_stack_end, %eax - addl %ebp, %eax - movl %eax, %esp + pushl $__KERNEL32_CS + leal rva(1f)(%ebp), %eax + pushl %eax + lretl +1: + + /* Setup Exception handling for SEV-ES */ +#ifdef CONFIG_AMD_MEM_ENCRYPT + call startup32_load_idt +#endif + /* Make sure cpu supports long mode. */ call verify_cpu testl %eax, %eax - jnz no_longmode + jnz .Lno_longmode /* * Compute the delta between where we were compiled to run at @@ -94,62 +150,88 @@ ENTRY(startup_32) addl %eax, %ebx notl %eax andl %eax, %ebx -#else - movl $LOAD_PHYSICAL_ADDR, %ebx + cmpl $LOAD_PHYSICAL_ADDR, %ebx + jae 1f #endif + movl $LOAD_PHYSICAL_ADDR, %ebx +1: /* Target address to relocate to for decompression */ - addl $z_extract_offset, %ebx + addl BP_init_size(%esi), %ebx + subl $ rva(_end), %ebx /* * Prepare for entering 64 bit mode */ - /* Load new GDT with the 64bit segments using 32bit descriptor */ - leal gdt(%ebp), %eax - movl %eax, gdt+2(%ebp) - lgdt gdt(%ebp) - /* Enable PAE mode */ - movl $(X86_CR4_PAE), %eax + movl %cr4, %eax + orl $X86_CR4_PAE, %eax movl %eax, %cr4 /* * Build early 4G boot pagetable */ + /* + * If SEV is active then set the encryption mask in the page tables. + * This will ensure that when the kernel is copied and decompressed + * it will be done so encrypted. + */ + xorl %edx, %edx +#ifdef CONFIG_AMD_MEM_ENCRYPT + call get_sev_encryption_bit + xorl %edx, %edx + testl %eax, %eax + jz 1f + subl $32, %eax /* Encryption bit is always above bit 31 */ + bts %eax, %edx /* Set encryption mask for page tables */ + /* + * Set MSR_AMD64_SEV_ENABLED_BIT in sev_status so that + * startup32_check_sev_cbit() will do a check. sev_enable() will + * initialize sev_status with all the bits reported by + * MSR_AMD_SEV_STATUS later, but only MSR_AMD64_SEV_ENABLED_BIT + * needs to be set for now. + */ + movl $1, rva(sev_status)(%ebp) +1: +#endif + /* Initialize Page tables to 0 */ - leal pgtable(%ebx), %edi + leal rva(pgtable)(%ebx), %edi xorl %eax, %eax - movl $((4096*6)/4), %ecx + movl $(BOOT_INIT_PGT_SIZE/4), %ecx rep stosl /* Build Level 4 */ - leal pgtable + 0(%ebx), %edi + leal rva(pgtable + 0)(%ebx), %edi leal 0x1007 (%edi), %eax movl %eax, 0(%edi) + addl %edx, 4(%edi) /* Build Level 3 */ - leal pgtable + 0x1000(%ebx), %edi + leal rva(pgtable + 0x1000)(%ebx), %edi leal 0x1007(%edi), %eax movl $4, %ecx 1: movl %eax, 0x00(%edi) + addl %edx, 0x04(%edi) addl $0x00001000, %eax addl $8, %edi decl %ecx jnz 1b /* Build Level 2 */ - leal pgtable + 0x2000(%ebx), %edi + leal rva(pgtable + 0x2000)(%ebx), %edi movl $0x00000183, %eax movl $2048, %ecx 1: movl %eax, 0(%edi) + addl %edx, 4(%edi) addl $0x00200000, %eax addl $8, %edi decl %ecx jnz 1b /* Enable the boot page tables */ - leal pgtable(%ebx), %eax + leal rva(pgtable)(%ebx), %eax movl %eax, %cr3 /* Enable Long mode in EFER (Extended Feature Enable Register) */ @@ -161,34 +243,39 @@ ENTRY(startup_32) /* After gdt is loaded */ xorl %eax, %eax lldt %ax - movl $0x20, %eax + movl $__BOOT_TSS, %eax ltr %ax +#ifdef CONFIG_AMD_MEM_ENCRYPT + /* Check if the C-bit position is correct when SEV is active */ + call startup32_check_sev_cbit +#endif + /* * Setup for the jump to 64bit mode * - * When the jump is performend we will be in long mode but + * When the jump is performed we will be in long mode but * in 32bit compatibility mode with EFER.LME = 1, CS.L = 0, CS.D = 1 * (and in turn EFER.LMA = 1). To jump into 64bit mode we use * the new gdt/idt that has __KERNEL_CS with CS.L = 1. * We place all of the values on our mini stack so lret can * used to perform that far jump. */ + leal rva(startup_64)(%ebp), %eax pushl $__KERNEL_CS - leal startup_64(%ebp), %eax pushl %eax /* Enter paged protected Mode, activating Long Mode */ - movl $(X86_CR0_PG | X86_CR0_PE), %eax /* Enable Paging and Protected mode */ + movl $CR0_STATE, %eax movl %eax, %cr0 /* Jump from 32bit compatibility mode into 64bit mode. */ lret -ENDPROC(startup_32) +SYM_FUNC_END(startup_32) .code64 .org 0x200 -ENTRY(startup_64) +SYM_CODE_START(startup_64) /* * 64bit entry is 0x200 and it is ABI so immutable! * We come here either from startup_32 or directly from a @@ -199,46 +286,9 @@ ENTRY(startup_64) * that maps our entire kernel(text+data+bss+brk), zero page * and command line. */ -#ifdef CONFIG_EFI_STUB - /* - * The entry point for the PE/COFF executable is efi_pe_entry, so - * only legacy boot loaders will execute this jmp. - */ - jmp preferred_addr - -ENTRY(efi_pe_entry) - mov %rcx, %rdi - mov %rdx, %rsi - pushq %rdi - pushq %rsi - call make_boot_params - cmpq $0,%rax - je 1f - mov %rax, %rdx - popq %rsi - popq %rdi - -ENTRY(efi_stub_entry) - call efi_main - movq %rax,%rsi - cmpq $0,%rax - jne 2f -1: - /* EFI init failed, so hang. */ - hlt - jmp 1b -2: - call 3f -3: - popq %rax - subq $3b, %rax - subq BP_pref_address(%rsi), %rax - add BP_code32_start(%esi), %eax - leaq preferred_addr(%rax), %rax - jmp *%rax -preferred_addr: -#endif + cld + cli /* Setup data segments. */ xorl %eax, %eax @@ -269,15 +319,94 @@ preferred_addr: addq %rax, %rbp notq %rax andq %rax, %rbp -#else - movq $LOAD_PHYSICAL_ADDR, %rbp + cmpq $LOAD_PHYSICAL_ADDR, %rbp + jae 1f #endif + movq $LOAD_PHYSICAL_ADDR, %rbp +1: /* Target address to relocate to for decompression */ - leaq z_extract_offset(%rbp), %rbx + movl BP_init_size(%rsi), %ebx + subl $ rva(_end), %ebx + addq %rbp, %rbx /* Set up the stack */ - leaq boot_stack_end(%rbx), %rsp + leaq rva(boot_stack_end)(%rbx), %rsp + + /* + * At this point we are in long mode with 4-level paging enabled, + * but we might want to enable 5-level paging or vice versa. + * + * The problem is that we cannot do it directly. Setting or clearing + * CR4.LA57 in long mode would trigger #GP. So we need to switch off + * long mode and paging first. + * + * We also need a trampoline in lower memory to switch over from + * 4- to 5-level paging for cases when the bootloader puts the kernel + * above 4G, but didn't enable 5-level paging for us. + * + * The same trampoline can be used to switch from 5- to 4-level paging + * mode, like when starting 4-level paging kernel via kexec() when + * original kernel worked in 5-level paging mode. + * + * For the trampoline, we need the top page table to reside in lower + * memory as we don't have a way to load 64-bit values into CR3 in + * 32-bit mode. + */ + + /* Make sure we have GDT with 32-bit code segment */ + leaq gdt64(%rip), %rax + addq %rax, 2(%rax) + lgdt (%rax) + + /* Reload CS so IRET returns to a CS actually in the GDT */ + pushq $__KERNEL_CS + leaq .Lon_kernel_cs(%rip), %rax + pushq %rax + lretq + +.Lon_kernel_cs: + /* + * RSI holds a pointer to a boot_params structure provided by the + * loader, and this needs to be preserved across C function calls. So + * move it into a callee saved register. + */ + movq %rsi, %r15 + + call load_stage1_idt + +#ifdef CONFIG_AMD_MEM_ENCRYPT + /* + * Now that the stage1 interrupt handlers are set up, #VC exceptions from + * CPUID instructions can be properly handled for SEV-ES guests. + * + * For SEV-SNP, the CPUID table also needs to be set up in advance of any + * CPUID instructions being issued, so go ahead and do that now via + * sev_enable(), which will also handle the rest of the SEV-related + * detection/setup to ensure that has been done in advance of any dependent + * code. Pass the boot_params pointer as the first argument. + */ + movq %r15, %rdi + call sev_enable +#endif + + /* Preserve only the CR4 bits that must be preserved, and clear the rest */ + movq %cr4, %rax + andl $(X86_CR4_PAE | X86_CR4_MCE | X86_CR4_LA57), %eax + movq %rax, %cr4 + + /* + * configure_5level_paging() updates the number of paging levels using + * a trampoline in 32-bit addressable memory if the current number does + * not match the desired number. + * + * Pass the boot_params pointer as the first argument. The second + * argument is the relocated address of the page table to use instead + * of the page table in trampoline memory (if required). + */ + movq %r15, %rdi + leaq rva(top_pgtable)(%rbx), %rsi + call configure_5level_paging /* Zero EFLAGS */ pushq $0 @@ -287,24 +416,33 @@ preferred_addr: * Copy the compressed kernel to the end of our buffer * where decompression in place becomes safe. */ - pushq %rsi leaq (_bss-8)(%rip), %rsi - leaq (_bss-8)(%rbx), %rdi - movq $_bss /* - $startup_32 */, %rcx - shrq $3, %rcx + leaq rva(_bss-8)(%rbx), %rdi + movl $(_bss - startup_32), %ecx + shrl $3, %ecx std rep movsq cld - popq %rsi + + /* + * The GDT may get overwritten either during the copy we just did or + * during extract_kernel below. To avoid any issues, repoint the GDTR + * to the new copy of the GDT. + */ + leaq rva(gdt64)(%rbx), %rax + leaq rva(gdt)(%rbx), %rdx + movq %rdx, 2(%rax) + lgdt (%rax) /* * Jump to the relocated address. */ - leaq relocated(%rbx), %rax + leaq rva(.Lrelocated)(%rbx), %rax jmp *%rax +SYM_CODE_END(startup_64) .text -relocated: +SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) /* * Clear BSS (stack is currently empty) @@ -316,72 +454,86 @@ relocated: shrq $3, %rcx rep stosq + call load_stage2_idt + + /* Pass boot_params to initialize_identity_maps() */ + movq %r15, %rdi + call initialize_identity_maps + /* - * Adjust our own GOT - */ - leaq _got(%rip), %rdx - leaq _egot(%rip), %rcx -1: - cmpq %rcx, %rdx - jae 2f - addq %rbx, (%rdx) - addq $8, %rdx - jmp 1b -2: - -/* - * Do the decompression, and jump to the new kernel.. + * Do the extraction, and jump to the new kernel.. */ - pushq %rsi /* Save the real mode argument */ - movq %rsi, %rdi /* real mode address */ - leaq boot_heap(%rip), %rsi /* malloc area for uncompression */ - leaq input_data(%rip), %rdx /* input_data */ - movl $z_input_len, %ecx /* input_len */ - movq %rbp, %r8 /* output target address */ - call decompress_kernel - popq %rsi + /* pass struct boot_params pointer and output target address */ + movq %r15, %rdi + movq %rbp, %rsi + call extract_kernel /* returns kernel entry point in %rax */ /* * Jump to the decompressed kernel. */ - jmp *%rbp + movq %r15, %rsi + jmp *%rax +SYM_FUNC_END(.Lrelocated) .code32 -no_longmode: - /* This isn't an x86-64 CPU so hang */ +SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode) + /* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */ 1: hlt jmp 1b +SYM_FUNC_END(.Lno_longmode) + .globl verify_cpu #include "../../kernel/verify_cpu.S" .data -gdt: - .word gdt_end - gdt - .long gdt +SYM_DATA_START_LOCAL(gdt64) + .word gdt_end - gdt - 1 + .quad gdt - gdt64 +SYM_DATA_END(gdt64) + .balign 8 +SYM_DATA_START_LOCAL(gdt) + .word gdt_end - gdt - 1 + .long 0 .word 0 - .quad 0x0000000000000000 /* NULL descriptor */ + .quad 0x00cf9a000000ffff /* __KERNEL32_CS */ .quad 0x00af9a000000ffff /* __KERNEL_CS */ .quad 0x00cf92000000ffff /* __KERNEL_DS */ .quad 0x0080890000000000 /* TS descriptor */ .quad 0x0000000000000000 /* TS continued */ -gdt_end: +SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end) + +SYM_DATA_START(boot_idt_desc) + .word boot_idt_end - boot_idt - 1 + .quad 0 +SYM_DATA_END(boot_idt_desc) + .balign 8 +SYM_DATA_START(boot_idt) + .rept BOOT_IDT_ENTRIES + .quad 0 + .quad 0 + .endr +SYM_DATA_END_LABEL(boot_idt, SYM_L_GLOBAL, boot_idt_end) /* * Stack and heap for uncompression */ .bss .balign 4 -boot_heap: - .fill BOOT_HEAP_SIZE, 1, 0 -boot_stack: +SYM_DATA_START_LOCAL(boot_stack) .fill BOOT_STACK_SIZE, 1, 0 -boot_stack_end: + .balign 16 +SYM_DATA_END_LABEL(boot_stack, SYM_L_LOCAL, boot_stack_end) /* * Space for page tables (not in .bss so not zeroed) */ - .section ".pgtable","a",@nobits + .section ".pgtable","aw",@nobits .balign 4096 -pgtable: - .fill 6*4096, 1, 0 +SYM_DATA_LOCAL(pgtable, .fill BOOT_PGT_SIZE, 1, 0) + +/* + * The page table is going to be used instead of page table in the trampoline + * memory. + */ +SYM_DATA_LOCAL(top_pgtable, .fill PAGE_SIZE, 1, 0) |
