diff options
Diffstat (limited to 'arch/x86/kernel/relocate_kernel_64.S')
-rw-r--r-- | arch/x86/kernel/relocate_kernel_64.S | 503 |
1 files changed, 396 insertions, 107 deletions
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index 56cab1bb25f5..ea604f4d0b52 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -5,12 +5,15 @@ */ #include <linux/linkage.h> +#include <linux/stringify.h> +#include <asm/alternative.h> #include <asm/page_types.h> #include <asm/kexec.h> #include <asm/processor-flags.h> #include <asm/pgtable_types.h> #include <asm/nospec-branch.h> #include <asm/unwind_hints.h> +#include <asm/asm-offsets.h> /* * Must be relocatable PIC code callable as a C function, in particular @@ -21,33 +24,47 @@ #define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) /* - * control_page + KEXEC_CONTROL_CODE_MAX_SIZE - * ~ control_page + PAGE_SIZE are used as data storage and stack for - * jumping back + * The .text..relocate_kernel and .data..relocate_kernel sections are copied + * into the control page, and the remainder of the page is used as the stack. */ -#define DATA(offset) (KEXEC_CONTROL_CODE_MAX_SIZE+(offset)) + .section .data..relocate_kernel,"a"; /* Minimal CPU state */ -#define RSP DATA(0x0) -#define CR0 DATA(0x8) -#define CR3 DATA(0x10) -#define CR4 DATA(0x18) - -/* other data */ -#define CP_PA_TABLE_PAGE DATA(0x20) -#define CP_PA_SWAP_PAGE DATA(0x28) -#define CP_PA_BACKUP_PAGES_MAP DATA(0x30) - - .text - .align PAGE_SIZE +SYM_DATA_LOCAL(saved_rsp, .quad 0) +SYM_DATA_LOCAL(saved_cr0, .quad 0) +SYM_DATA_LOCAL(saved_cr3, .quad 0) +SYM_DATA_LOCAL(saved_cr4, .quad 0) + /* other data */ +SYM_DATA(kexec_va_control_page, .quad 0) +SYM_DATA(kexec_pa_table_page, .quad 0) +SYM_DATA(kexec_pa_swap_page, .quad 0) +SYM_DATA_LOCAL(pa_backup_pages_map, .quad 0) +SYM_DATA(kexec_debug_8250_mmio32, .quad 0) +SYM_DATA(kexec_debug_8250_port, .word 0) + + .balign 16 +SYM_DATA_START_LOCAL(kexec_debug_gdt) + .word kexec_debug_gdt_end - kexec_debug_gdt - 1 + .long 0 + .word 0 + .quad 0x00cf9a000000ffff /* __KERNEL32_CS */ + .quad 0x00af9a000000ffff /* __KERNEL_CS */ + .quad 0x00cf92000000ffff /* __KERNEL_DS */ +SYM_DATA_END_LABEL(kexec_debug_gdt, SYM_L_LOCAL, kexec_debug_gdt_end) + + .balign 8 +SYM_DATA_START(kexec_debug_idt) + .skip 0x100, 0x00 +SYM_DATA_END(kexec_debug_idt) + + .section .text..relocate_kernel,"ax"; .code64 -SYM_CODE_START_NOALIGN(relocate_range) SYM_CODE_START_NOALIGN(relocate_kernel) UNWIND_HINT_END_OF_STACK ANNOTATE_NOENDBR /* * %rdi indirection_page - * %rsi page_list + * %rsi pa_control_page * %rdx start address * %rcx preserve_context * %r8 host_mem_enc_active @@ -62,63 +79,87 @@ SYM_CODE_START_NOALIGN(relocate_kernel) pushq %r15 pushf - movq PTR(VA_CONTROL_PAGE)(%rsi), %r11 - movq %rsp, RSP(%r11) - movq %cr0, %rax - movq %rax, CR0(%r11) - movq %cr3, %rax - movq %rax, CR3(%r11) - movq %cr4, %rax - movq %rax, CR4(%r11) - - /* Save CR4. Required to enable the right paging mode later. */ - movq %rax, %r13 + /* Invalidate GDT/IDT, zero out flags */ + pushq $0 + pushq $0 - /* zero out flags, and disable interrupts */ - pushq $0 + lidt (%rsp) + lgdt (%rsp) + addq $8, %rsp popfq - /* Save SME active flag */ - movq %r8, %r12 + /* Switch to the identity mapped page tables */ + movq %cr3, %rax + movq kexec_pa_table_page(%rip), %r9 + movq %r9, %cr3 - /* - * get physical address of control page now - * this is impossible after page table switch - */ - movq PTR(PA_CONTROL_PAGE)(%rsi), %r8 + /* Leave CR4 in %r13 to enable the right paging mode later. */ + movq %cr4, %r13 - /* get physical address of page table now too */ - movq PTR(PA_TABLE_PAGE)(%rsi), %r9 + /* Disable global pages immediately to ensure this mapping is RWX */ + movq %r13, %r12 + andq $~(X86_CR4_PGE), %r12 + movq %r12, %cr4 - /* get physical address of swap page now */ - movq PTR(PA_SWAP_PAGE)(%rsi), %r10 + /* Save %rsp and CRs. */ + movq %r13, saved_cr4(%rip) + movq %rsp, saved_rsp(%rip) + movq %rax, saved_cr3(%rip) + movq %cr0, %rax + movq %rax, saved_cr0(%rip) - /* save some information for jumping back */ - movq %r9, CP_PA_TABLE_PAGE(%r11) - movq %r10, CP_PA_SWAP_PAGE(%r11) - movq %rdi, CP_PA_BACKUP_PAGES_MAP(%r11) + /* save indirection list for jumping back */ + movq %rdi, pa_backup_pages_map(%rip) - /* Switch to the identity mapped page tables */ - movq %r9, %cr3 + /* Save the preserve_context to %r11 as swap_pages clobbers %rcx. */ + movq %rcx, %r11 /* setup a new stack at the end of the physical control page */ - lea PAGE_SIZE(%r8), %rsp + lea PAGE_SIZE(%rsi), %rsp /* jump to identity mapped page */ - addq $(identity_mapped - relocate_kernel), %r8 - pushq %r8 - ANNOTATE_UNRET_SAFE - ret - int3 +0: addq $identity_mapped - 0b, %rsi + subq $__relocate_kernel_start - 0b, %rsi + ANNOTATE_RETPOLINE_SAFE + jmp *%rsi SYM_CODE_END(relocate_kernel) SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) UNWIND_HINT_END_OF_STACK - /* set return address to 0 if not preserving context */ - pushq $0 + /* + * %rdi indirection page + * %rdx start address + * %r8 host_mem_enc_active + * %r9 page table page + * %r11 preserve_context + * %r13 original CR4 when relocate_kernel() was invoked + */ + /* store the start address on the stack */ pushq %rdx + /* Create a GDTR (16 bits limit, 64 bits addr) on stack */ + leaq kexec_debug_gdt(%rip), %rax + pushq %rax + pushw (%rax) + + /* Load the GDT, put the stack back */ + lgdt (%rsp) + addq $10, %rsp + + /* Test that we can load segments */ + movq %ds, %rax + movq %rax, %ds + + /* Now an IDTR on the stack to load the IDT the kernel created */ + leaq kexec_debug_idt(%rip), %rsi + pushq %rsi + pushw $0xff + lidt (%rsp) + addq $10, %rsp + + //int3 + /* * Clear X86_CR4_CET (if it was set) such that we can clear CR0_WP * below. @@ -145,16 +186,15 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) * Set cr4 to a known state: * - physical address extension enabled * - 5-level paging, if it was enabled before + * - Machine check exception on TDX guest, if it was enabled before. + * Clearing MCE might not be allowed in TDX guests, depending on setup. + * + * Use R13 that contains the original CR4 value, read in relocate_kernel(). + * PAE is always set in the original CR4. */ - movl $X86_CR4_PAE, %eax - testq $X86_CR4_LA57, %r13 - jz 1f - orl $X86_CR4_LA57, %eax -1: - movq %rax, %cr4 - - jmp 1f -1: + andl $(X86_CR4_PAE | X86_CR4_LA57), %r13d + ALTERNATIVE "", __stringify(orl $X86_CR4_MCE, %r13d), X86_FEATURE_TDX_GUEST + movq %r13, %cr4 /* Flush the TLB (needed?) */ movq %r9, %cr3 @@ -164,12 +204,11 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) * entries that will conflict with the now unencrypted memory * used by kexec. Flush the caches before copying the kernel. */ - testq %r12, %r12 - jz 1f + testq %r8, %r8 + jz .Lsme_off wbinvd -1: +.Lsme_off: - movq %rcx, %r11 call swap_pages /* @@ -181,13 +220,14 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) movq %cr3, %rax movq %rax, %cr3 + testq %r11, %r11 /* preserve_context */ + jnz .Lrelocate + /* * set all of the registers to known values * leave %rsp alone */ - testq %r11, %r11 - jnz 1f xorl %eax, %eax xorl %ebx, %ebx xorl %ecx, %ecx @@ -208,22 +248,36 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) ret int3 -1: +.Lrelocate: popq %rdx + + /* Use the swap page for the callee's stack */ + movq kexec_pa_swap_page(%rip), %r10 leaq PAGE_SIZE(%r10), %rsp + + /* push the existing entry point onto the callee's stack */ + pushq %rdx + ANNOTATE_RETPOLINE_SAFE call *%rdx /* get the re-entry point of the peer system */ - movq 0(%rsp), %rbp - leaq relocate_kernel(%rip), %r8 - movq CP_PA_SWAP_PAGE(%r8), %r10 - movq CP_PA_BACKUP_PAGES_MAP(%r8), %rdi - movq CP_PA_TABLE_PAGE(%r8), %rax + popq %rbp + movq kexec_pa_swap_page(%rip), %r10 + movq pa_backup_pages_map(%rip), %rdi + movq kexec_pa_table_page(%rip), %rax movq %rax, %cr3 + + /* Find start (and end) of this physical mapping of control page */ + leaq (%rip), %r8 + ANNOTATE_NOENDBR + andq $PAGE_MASK, %r8 lea PAGE_SIZE(%r8), %rsp + movl $1, %r11d /* Ensure preserve_context flag is set */ call swap_pages - movq $virtual_mapped, %rax + movq kexec_va_control_page(%rip), %rax +0: addq $virtual_mapped - 0b, %rax + subq $__relocate_kernel_start - 0b, %rax pushq %rax ANNOTATE_UNRET_SAFE ret @@ -233,13 +287,21 @@ SYM_CODE_END(identity_mapped) SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) UNWIND_HINT_END_OF_STACK ANNOTATE_NOENDBR // RET target, above - movq RSP(%r8), %rsp - movq CR4(%r8), %rax + movq saved_rsp(%rip), %rsp + movq saved_cr4(%rip), %rax movq %rax, %cr4 - movq CR3(%r8), %rax - movq CR0(%r8), %r8 + movq saved_cr3(%rip), %rax + movq saved_cr0(%rip), %r8 movq %rax, %cr3 movq %r8, %cr0 + +#ifdef CONFIG_KEXEC_JUMP + /* Saved in save_processor_state. */ + movq $saved_context, %rax + lgdt saved_context_gdt_desc(%rax) +#endif + + /* relocate_kernel() returns the re-entry point for next time */ movq %rbp, %rax popf @@ -257,61 +319,288 @@ SYM_CODE_END(virtual_mapped) /* Do the copies */ SYM_CODE_START_LOCAL_NOALIGN(swap_pages) UNWIND_HINT_END_OF_STACK - movq %rdi, %rcx /* Put the page_list in %rcx */ + /* + * %rdi indirection page + * %r11 preserve_context + */ + movq %rdi, %rcx /* Put the indirection_page in %rcx */ xorl %edi, %edi xorl %esi, %esi - jmp 1f + jmp .Lstart /* Should start with an indirection record */ -0: /* top, read another word for the indirection page */ +.Lloop: /* top, read another word for the indirection page */ movq (%rbx), %rcx addq $8, %rbx -1: +.Lstart: testb $0x1, %cl /* is it a destination page? */ - jz 2f + jz .Lnotdest movq %rcx, %rdi andq $0xfffffffffffff000, %rdi - jmp 0b -2: + jmp .Lloop +.Lnotdest: testb $0x2, %cl /* is it an indirection page? */ - jz 2f + jz .Lnotind movq %rcx, %rbx andq $0xfffffffffffff000, %rbx - jmp 0b -2: + jmp .Lloop +.Lnotind: testb $0x4, %cl /* is it the done indicator? */ - jz 2f - jmp 3f -2: + jz .Lnotdone + jmp .Ldone +.Lnotdone: testb $0x8, %cl /* is it the source indicator? */ - jz 0b /* Ignore it otherwise */ + jz .Lloop /* Ignore it otherwise */ movq %rcx, %rsi /* For ever source page do a copy */ andq $0xfffffffffffff000, %rsi - movq %rdi, %rdx - movq %rsi, %rax + movq %rdi, %rdx /* Save destination page to %rdx */ + movq %rsi, %rax /* Save source page to %rax */ + + testq %r11, %r11 /* Only actually swap for ::preserve_context */ + jz .Lnoswap - movq %r10, %rdi + /* copy source page to swap page */ + movq kexec_pa_swap_page(%rip), %rdi movl $512, %ecx - rep ; movsq + rep movsq + /* copy destination page to source page */ movq %rax, %rdi movq %rdx, %rsi movl $512, %ecx - rep ; movsq + rep movsq + /* copy swap page to destination page */ movq %rdx, %rdi - movq %r10, %rsi + movq kexec_pa_swap_page(%rip), %rsi +.Lnoswap: movl $512, %ecx - rep ; movsq + rep movsq lea PAGE_SIZE(%rax), %rsi - jmp 0b -3: + jmp .Lloop +.Ldone: ANNOTATE_UNRET_SAFE ret int3 SYM_CODE_END(swap_pages) - .skip KEXEC_CONTROL_CODE_MAX_SIZE - (. - relocate_kernel), 0xcc -SYM_CODE_END(relocate_range); +/* + * Generic 'print character' routine + * - %al: Character to be printed (may clobber %rax) + * - %rdx: MMIO address or port. + */ +#define XMTRDY 0x20 + +#define TXR 0 /* Transmit register (WRITE) */ +#define LSR 5 /* Line Status */ + +SYM_CODE_START_LOCAL_NOALIGN(pr_char_8250) + UNWIND_HINT_FUNC + ANNOTATE_NOENDBR + addw $LSR, %dx + xchg %al, %ah +.Lxmtrdy_loop: + inb %dx, %al + testb $XMTRDY, %al + jnz .Lready + pause + jmp .Lxmtrdy_loop + +.Lready: + subw $LSR, %dx + xchg %al, %ah + outb %al, %dx +pr_char_null: + ANNOTATE_NOENDBR + + ANNOTATE_UNRET_SAFE + ret +SYM_CODE_END(pr_char_8250) + +SYM_CODE_START_LOCAL_NOALIGN(pr_char_8250_mmio32) + UNWIND_HINT_FUNC + ANNOTATE_NOENDBR +.Lxmtrdy_loop_mmio: + movb (LSR*4)(%rdx), %ah + testb $XMTRDY, %ah + jnz .Lready_mmio + pause + jmp .Lxmtrdy_loop_mmio + +.Lready_mmio: + movb %al, (%rdx) + ANNOTATE_UNRET_SAFE + ret +SYM_CODE_END(pr_char_8250_mmio32) + +/* + * Load pr_char function pointer into %rsi and load %rdx with whatever + * that function wants to see there (typically port/MMIO address). + */ +.macro pr_setup + leaq pr_char_8250(%rip), %rsi + movw kexec_debug_8250_port(%rip), %dx + testw %dx, %dx + jnz 1f + + leaq pr_char_8250_mmio32(%rip), %rsi + movq kexec_debug_8250_mmio32(%rip), %rdx + testq %rdx, %rdx + jnz 1f + + leaq pr_char_null(%rip), %rsi +1: +.endm + +/* Print the nybble in %bl, clobber %rax */ +SYM_CODE_START_LOCAL_NOALIGN(pr_nybble) + UNWIND_HINT_FUNC + movb %bl, %al + nop + andb $0x0f, %al + addb $0x30, %al + cmpb $0x3a, %al + jb 1f + addb $('a' - '0' - 10), %al + ANNOTATE_RETPOLINE_SAFE +1: jmp *%rsi +SYM_CODE_END(pr_nybble) + +SYM_CODE_START_LOCAL_NOALIGN(pr_qword) + UNWIND_HINT_FUNC + movq $16, %rcx +1: rolq $4, %rbx + call pr_nybble + loop 1b + movb $'\n', %al + ANNOTATE_RETPOLINE_SAFE + jmp *%rsi +SYM_CODE_END(pr_qword) + +.macro print_reg a, b, c, d, r + movb $\a, %al + ANNOTATE_RETPOLINE_SAFE + call *%rsi + movb $\b, %al + ANNOTATE_RETPOLINE_SAFE + call *%rsi + movb $\c, %al + ANNOTATE_RETPOLINE_SAFE + call *%rsi + movb $\d, %al + ANNOTATE_RETPOLINE_SAFE + call *%rsi + movq \r, %rbx + call pr_qword +.endm + +SYM_CODE_START_NOALIGN(kexec_debug_exc_vectors) + /* Each of these is 6 bytes. */ +.macro vec_err exc + UNWIND_HINT_ENTRY + . = kexec_debug_exc_vectors + (\exc * KEXEC_DEBUG_EXC_HANDLER_SIZE) + nop + nop + pushq $\exc + jmp exc_handler +.endm + +.macro vec_noerr exc + UNWIND_HINT_ENTRY + . = kexec_debug_exc_vectors + (\exc * KEXEC_DEBUG_EXC_HANDLER_SIZE) + pushq $0 + pushq $\exc + jmp exc_handler +.endm + + ANNOTATE_NOENDBR + vec_noerr 0 // #DE + vec_noerr 1 // #DB + vec_noerr 2 // #NMI + vec_noerr 3 // #BP + vec_noerr 4 // #OF + vec_noerr 5 // #BR + vec_noerr 6 // #UD + vec_noerr 7 // #NM + vec_err 8 // #DF + vec_noerr 9 + vec_err 10 // #TS + vec_err 11 // #NP + vec_err 12 // #SS + vec_err 13 // #GP + vec_err 14 // #PF + vec_noerr 15 +SYM_CODE_END(kexec_debug_exc_vectors) + +SYM_CODE_START_LOCAL_NOALIGN(exc_handler) + /* No need for RET mitigations during kexec */ + VALIDATE_UNRET_END + + pushq %rax + pushq %rbx + pushq %rcx + pushq %rdx + pushq %rsi + + /* Stack frame */ +#define EXC_SS 0x58 /* Architectural... */ +#define EXC_RSP 0x50 +#define EXC_EFLAGS 0x48 +#define EXC_CS 0x40 +#define EXC_RIP 0x38 +#define EXC_ERRORCODE 0x30 /* Either architectural or zero pushed by handler */ +#define EXC_EXCEPTION 0x28 /* Pushed by handler entry point */ +#define EXC_RAX 0x20 /* Pushed just above in exc_handler */ +#define EXC_RBX 0x18 +#define EXC_RCX 0x10 +#define EXC_RDX 0x08 +#define EXC_RSI 0x00 + + /* Set up %rdx/%rsi for debug output */ + pr_setup + + /* rip and exception info */ + print_reg 'E', 'x', 'c', ':', EXC_EXCEPTION(%rsp) + print_reg 'E', 'r', 'r', ':', EXC_ERRORCODE(%rsp) + print_reg 'r', 'i', 'p', ':', EXC_RIP(%rsp) + print_reg 'r', 's', 'p', ':', EXC_RSP(%rsp) + + /* We spilled these to the stack */ + print_reg 'r', 'a', 'x', ':', EXC_RAX(%rsp) + print_reg 'r', 'b', 'x', ':', EXC_RBX(%rsp) + print_reg 'r', 'c', 'x', ':', EXC_RCX(%rsp) + print_reg 'r', 'd', 'x', ':', EXC_RDX(%rsp) + print_reg 'r', 's', 'i', ':', EXC_RSI(%rsp) + + /* Other registers untouched */ + print_reg 'r', 'd', 'i', ':', %rdi + print_reg 'r', '8', ' ', ':', %r8 + print_reg 'r', '9', ' ', ':', %r9 + print_reg 'r', '1', '0', ':', %r10 + print_reg 'r', '1', '1', ':', %r11 + print_reg 'r', '1', '2', ':', %r12 + print_reg 'r', '1', '3', ':', %r13 + print_reg 'r', '1', '4', ':', %r14 + print_reg 'r', '1', '5', ':', %r15 + print_reg 'c', 'r', '2', ':', %cr2 + + /* Only return from INT3 */ + cmpq $3, EXC_EXCEPTION(%rsp) + jne .Ldie + + popq %rsi + popq %rdx + popq %rcx + popq %rbx + popq %rax + + addq $16, %rsp + iretq + +.Ldie: + hlt + jmp .Ldie + +SYM_CODE_END(exc_handler) |