diff options
Diffstat (limited to 'arch/x86/boot/compressed')
-rw-r--r-- | arch/x86/boot/compressed/Makefile | 28 | ||||
-rw-r--r-- | arch/x86/boot/compressed/efi_mixed.S | 331 | ||||
-rw-r--r-- | arch/x86/boot/compressed/head_64.S | 115 | ||||
-rw-r--r-- | arch/x86/boot/compressed/kaslr.c | 47 | ||||
-rw-r--r-- | arch/x86/boot/compressed/la57toggle.S | 112 | ||||
-rw-r--r-- | arch/x86/boot/compressed/misc.c | 36 | ||||
-rw-r--r-- | arch/x86/boot/compressed/misc.h | 2 | ||||
-rw-r--r-- | arch/x86/boot/compressed/pgtable_64.c | 2 | ||||
-rw-r--r-- | arch/x86/boot/compressed/sev.c | 266 | ||||
-rw-r--r-- | arch/x86/boot/compressed/vmlinux.lds.S | 2 |
10 files changed, 314 insertions, 627 deletions
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index e9522c6893be..fdbce022db55 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -17,15 +17,6 @@ # (see scripts/Makefile.lib size_append) # compressed vmlinux.bin.all + u32 size of vmlinux.bin.all -# Sanitizer runtimes are unavailable and cannot be linked for early boot code. -KASAN_SANITIZE := n -KCSAN_SANITIZE := n -KMSAN_SANITIZE := n -OBJECT_FILES_NON_STANDARD := y - -# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in. -KCOV_INSTRUMENT := n - targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \ vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4 vmlinux.bin.zst @@ -34,6 +25,7 @@ targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \ # avoid errors with '-march=i386', and future flags may depend on the target to # be valid. KBUILD_CFLAGS := -m$(BITS) -O2 $(CLANG_FLAGS) +KBUILD_CFLAGS += -std=gnu11 KBUILD_CFLAGS += -fno-strict-aliasing -fPIE KBUILD_CFLAGS += -Wundef KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING @@ -46,7 +38,6 @@ KBUILD_CFLAGS += -fno-stack-protector KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) KBUILD_CFLAGS += $(call cc-disable-warning, gnu) KBUILD_CFLAGS += -Wno-pointer-sign -KBUILD_CFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=) KBUILD_CFLAGS += -fno-asynchronous-unwind-tables KBUILD_CFLAGS += -D__DISABLE_EXPORTS # Disable relocation relaxation in case the link is not PIE. @@ -59,8 +50,6 @@ KBUILD_CFLAGS += -include $(srctree)/include/linux/hidden.h CFLAGS_sev.o += -I$(objtree)/arch/x86/lib/ KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ -GCOV_PROFILE := n -UBSAN_SANITIZE :=n KBUILD_LDFLAGS := -m elf_$(UTS_MACHINE) KBUILD_LDFLAGS += $(call ld-option,--no-ld-generated-unwind-info) @@ -108,6 +97,7 @@ ifdef CONFIG_X86_64 vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/mem_encrypt.o vmlinux-objs-y += $(obj)/pgtable_64.o vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/sev.o + vmlinux-objs-y += $(obj)/la57toggle.o endif vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o @@ -115,10 +105,9 @@ vmlinux-objs-$(CONFIG_INTEL_TDX_GUEST) += $(obj)/tdx.o $(obj)/tdcall.o $(obj)/td vmlinux-objs-$(CONFIG_UNACCEPTED_MEMORY) += $(obj)/mem.o vmlinux-objs-$(CONFIG_EFI) += $(obj)/efi.o -vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_mixed.o -vmlinux-objs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a +vmlinux-libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a -$(obj)/vmlinux: $(vmlinux-objs-y) FORCE +$(obj)/vmlinux: $(vmlinux-objs-y) $(vmlinux-libs-y) FORCE $(call if_changed,ld) OBJCOPYFLAGS_vmlinux.bin := -R .comment -S @@ -127,9 +116,12 @@ $(obj)/vmlinux.bin: vmlinux FORCE targets += $(patsubst $(obj)/%,%,$(vmlinux-objs-y)) vmlinux.bin.all vmlinux.relocs -# vmlinux.relocs is created by the vmlinux postlink step. -$(obj)/vmlinux.relocs: vmlinux - @true +CMD_RELOCS = arch/x86/tools/relocs +quiet_cmd_relocs = RELOCS $@ + cmd_relocs = $(CMD_RELOCS) $< > $@;$(CMD_RELOCS) --abs-relocs $< + +$(obj)/vmlinux.relocs: vmlinux.unstripped FORCE + $(call if_changed,relocs) vmlinux.bin.all-y := $(obj)/vmlinux.bin vmlinux.bin.all-$(CONFIG_X86_NEED_RELOCS) += $(obj)/vmlinux.relocs diff --git a/arch/x86/boot/compressed/efi_mixed.S b/arch/x86/boot/compressed/efi_mixed.S deleted file mode 100644 index 719e939050cb..000000000000 --- a/arch/x86/boot/compressed/efi_mixed.S +++ /dev/null @@ -1,331 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2014, 2015 Intel Corporation; author Matt Fleming - * - * Early support for invoking 32-bit EFI services from a 64-bit kernel. - * - * Because this thunking occurs before ExitBootServices() we have to - * restore the firmware's 32-bit GDT and IDT before we make EFI service - * calls. - * - * On the plus side, we don't have to worry about mangling 64-bit - * addresses into 32-bits because we're executing with an identity - * mapped pagetable and haven't transitioned to 64-bit virtual addresses - * yet. - */ - -#include <linux/linkage.h> -#include <asm/msr.h> -#include <asm/page_types.h> -#include <asm/processor-flags.h> -#include <asm/segment.h> - - .code64 - .text -/* - * When booting in 64-bit mode on 32-bit EFI firmware, startup_64_mixed_mode() - * is the first thing that runs after switching to long mode. Depending on - * whether the EFI handover protocol or the compat entry point was used to - * enter the kernel, it will either branch to the common 64-bit EFI stub - * entrypoint efi_stub_entry() directly, or via the 64-bit EFI PE/COFF - * entrypoint efi_pe_entry(). In the former case, the bootloader must provide a - * struct bootparams pointer as the third argument, so the presence of such a - * pointer is used to disambiguate. - * - * +--------------+ - * +------------------+ +------------+ +------>| efi_pe_entry | - * | efi32_pe_entry |---->| | | +-----------+--+ - * +------------------+ | | +------+----------------+ | - * | startup_32 |---->| startup_64_mixed_mode | | - * +------------------+ | | +------+----------------+ | - * | efi32_stub_entry |---->| | | | - * +------------------+ +------------+ | | - * V | - * +------------+ +----------------+ | - * | startup_64 |<----| efi_stub_entry |<--------+ - * +------------+ +----------------+ - */ -SYM_FUNC_START(startup_64_mixed_mode) - lea efi32_boot_args(%rip), %rdx - mov 0(%rdx), %edi - mov 4(%rdx), %esi - - /* Switch to the firmware's stack */ - movl efi32_boot_sp(%rip), %esp - andl $~7, %esp - -#ifdef CONFIG_EFI_HANDOVER_PROTOCOL - mov 8(%rdx), %edx // saved bootparams pointer - test %edx, %edx - jnz efi_stub_entry -#endif - /* - * efi_pe_entry uses MS calling convention, which requires 32 bytes of - * shadow space on the stack even if all arguments are passed in - * registers. We also need an additional 8 bytes for the space that - * would be occupied by the return address, and this also results in - * the correct stack alignment for entry. - */ - sub $40, %rsp - mov %rdi, %rcx // MS calling convention - mov %rsi, %rdx - jmp efi_pe_entry -SYM_FUNC_END(startup_64_mixed_mode) - -SYM_FUNC_START(__efi64_thunk) - push %rbp - push %rbx - - movl %ds, %eax - push %rax - movl %es, %eax - push %rax - movl %ss, %eax - push %rax - - /* Copy args passed on stack */ - movq 0x30(%rsp), %rbp - movq 0x38(%rsp), %rbx - movq 0x40(%rsp), %rax - - /* - * Convert x86-64 ABI params to i386 ABI - */ - subq $64, %rsp - movl %esi, 0x0(%rsp) - movl %edx, 0x4(%rsp) - movl %ecx, 0x8(%rsp) - movl %r8d, 0xc(%rsp) - movl %r9d, 0x10(%rsp) - movl %ebp, 0x14(%rsp) - movl %ebx, 0x18(%rsp) - movl %eax, 0x1c(%rsp) - - leaq 0x20(%rsp), %rbx - sgdt (%rbx) - sidt 16(%rbx) - - leaq 1f(%rip), %rbp - - /* - * Switch to IDT and GDT with 32-bit segments. These are the firmware - * GDT and IDT that were installed when the kernel started executing. - * The pointers were saved by the efi32_entry() routine below. - * - * Pass the saved DS selector to the 32-bit code, and use far return to - * restore the saved CS selector. - */ - lidt efi32_boot_idt(%rip) - lgdt efi32_boot_gdt(%rip) - - movzwl efi32_boot_ds(%rip), %edx - movzwq efi32_boot_cs(%rip), %rax - pushq %rax - leaq efi_enter32(%rip), %rax - pushq %rax - lretq - -1: addq $64, %rsp - movq %rdi, %rax - - pop %rbx - movl %ebx, %ss - pop %rbx - movl %ebx, %es - pop %rbx - movl %ebx, %ds - /* Clear out 32-bit selector from FS and GS */ - xorl %ebx, %ebx - movl %ebx, %fs - movl %ebx, %gs - - pop %rbx - pop %rbp - RET -SYM_FUNC_END(__efi64_thunk) - - .code32 -#ifdef CONFIG_EFI_HANDOVER_PROTOCOL -SYM_FUNC_START(efi32_stub_entry) - call 1f -1: popl %ecx - - /* Clear BSS */ - xorl %eax, %eax - leal (_bss - 1b)(%ecx), %edi - leal (_ebss - 1b)(%ecx), %ecx - subl %edi, %ecx - shrl $2, %ecx - cld - rep stosl - - add $0x4, %esp /* Discard return address */ - popl %ecx - popl %edx - popl %esi - jmp efi32_entry -SYM_FUNC_END(efi32_stub_entry) -#endif - -/* - * EFI service pointer must be in %edi. - * - * The stack should represent the 32-bit calling convention. - */ -SYM_FUNC_START_LOCAL(efi_enter32) - /* Load firmware selector into data and stack segment registers */ - movl %edx, %ds - movl %edx, %es - movl %edx, %fs - movl %edx, %gs - movl %edx, %ss - - /* Reload pgtables */ - movl %cr3, %eax - movl %eax, %cr3 - - /* Disable paging */ - movl %cr0, %eax - btrl $X86_CR0_PG_BIT, %eax - movl %eax, %cr0 - - /* Disable long mode via EFER */ - movl $MSR_EFER, %ecx - rdmsr - btrl $_EFER_LME, %eax - wrmsr - - call *%edi - - /* We must preserve return value */ - movl %eax, %edi - - /* - * Some firmware will return with interrupts enabled. Be sure to - * disable them before we switch GDTs and IDTs. - */ - cli - - lidtl 16(%ebx) - lgdtl (%ebx) - - movl %cr4, %eax - btsl $(X86_CR4_PAE_BIT), %eax - movl %eax, %cr4 - - movl %cr3, %eax - movl %eax, %cr3 - - movl $MSR_EFER, %ecx - rdmsr - btsl $_EFER_LME, %eax - wrmsr - - xorl %eax, %eax - lldt %ax - - pushl $__KERNEL_CS - pushl %ebp - - /* Enable paging */ - movl %cr0, %eax - btsl $X86_CR0_PG_BIT, %eax - movl %eax, %cr0 - lret -SYM_FUNC_END(efi_enter32) - -/* - * This is the common EFI stub entry point for mixed mode. - * - * Arguments: %ecx image handle - * %edx EFI system table pointer - * %esi struct bootparams pointer (or NULL when not using - * the EFI handover protocol) - * - * Since this is the point of no return for ordinary execution, no registers - * are considered live except for the function parameters. [Note that the EFI - * stub may still exit and return to the firmware using the Exit() EFI boot - * service.] - */ -SYM_FUNC_START_LOCAL(efi32_entry) - call 1f -1: pop %ebx - - /* Save firmware GDTR and code/data selectors */ - sgdtl (efi32_boot_gdt - 1b)(%ebx) - movw %cs, (efi32_boot_cs - 1b)(%ebx) - movw %ds, (efi32_boot_ds - 1b)(%ebx) - - /* Store firmware IDT descriptor */ - sidtl (efi32_boot_idt - 1b)(%ebx) - - /* Store firmware stack pointer */ - movl %esp, (efi32_boot_sp - 1b)(%ebx) - - /* Store boot arguments */ - leal (efi32_boot_args - 1b)(%ebx), %ebx - movl %ecx, 0(%ebx) - movl %edx, 4(%ebx) - movl %esi, 8(%ebx) - movb $0x0, 12(%ebx) // efi_is64 - - /* Disable paging */ - movl %cr0, %eax - btrl $X86_CR0_PG_BIT, %eax - movl %eax, %cr0 - - jmp startup_32 -SYM_FUNC_END(efi32_entry) - -/* - * efi_status_t efi32_pe_entry(efi_handle_t image_handle, - * efi_system_table_32_t *sys_table) - */ -SYM_FUNC_START(efi32_pe_entry) - pushl %ebp - movl %esp, %ebp - pushl %ebx // save callee-save registers - pushl %edi - - call verify_cpu // check for long mode support - testl %eax, %eax - movl $0x80000003, %eax // EFI_UNSUPPORTED - jnz 2f - - movl 8(%ebp), %ecx // image_handle - movl 12(%ebp), %edx // sys_table - xorl %esi, %esi - jmp efi32_entry // pass %ecx, %edx, %esi - // no other registers remain live - -2: popl %edi // restore callee-save registers - popl %ebx - leave - RET -SYM_FUNC_END(efi32_pe_entry) - -#ifdef CONFIG_EFI_HANDOVER_PROTOCOL - .org efi32_stub_entry + 0x200 - .code64 -SYM_FUNC_START_NOALIGN(efi64_stub_entry) - jmp efi_handover_entry -SYM_FUNC_END(efi64_stub_entry) -#endif - - .data - .balign 8 -SYM_DATA_START_LOCAL(efi32_boot_gdt) - .word 0 - .quad 0 -SYM_DATA_END(efi32_boot_gdt) - -SYM_DATA_START_LOCAL(efi32_boot_idt) - .word 0 - .quad 0 -SYM_DATA_END(efi32_boot_idt) - -SYM_DATA_LOCAL(efi32_boot_cs, .word 0) -SYM_DATA_LOCAL(efi32_boot_ds, .word 0) -SYM_DATA_LOCAL(efi32_boot_sp, .long 0) -SYM_DATA_LOCAL(efi32_boot_args, .long 0, 0, 0) -SYM_DATA(efi_is64, .byte 1) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index bf4a10a5794f..eafd4f185e77 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -263,13 +263,6 @@ SYM_FUNC_START(startup_32) * used to perform that far jump. */ leal rva(startup_64)(%ebp), %eax -#ifdef CONFIG_EFI_MIXED - cmpb $1, rva(efi_is64)(%ebp) - je 1f - leal rva(startup_64_mixed_mode)(%ebp), %eax -1: -#endif - pushl $__KERNEL_CS pushl %eax @@ -398,6 +391,11 @@ SYM_CODE_START(startup_64) call sev_enable #endif + /* Preserve only the CR4 bits that must be preserved, and clear the rest */ + movq %cr4, %rax + andl $(X86_CR4_PAE | X86_CR4_MCE | X86_CR4_LA57), %eax + movq %rax, %cr4 + /* * configure_5level_paging() updates the number of paging levels using * a trampoline in 32-bit addressable memory if the current number does @@ -478,110 +476,7 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) jmp *%rax SYM_FUNC_END(.Lrelocated) -/* - * This is the 32-bit trampoline that will be copied over to low memory. It - * will be called using the ordinary 64-bit calling convention from code - * running in 64-bit mode. - * - * Return address is at the top of the stack (might be above 4G). - * The first argument (EDI) contains the address of the temporary PGD level - * page table in 32-bit addressable memory which will be programmed into - * register CR3. - */ - .section ".rodata", "a", @progbits -SYM_CODE_START(trampoline_32bit_src) - /* - * Preserve callee save 64-bit registers on the stack: this is - * necessary because the architecture does not guarantee that GPRs will - * retain their full 64-bit values across a 32-bit mode switch. - */ - pushq %r15 - pushq %r14 - pushq %r13 - pushq %r12 - pushq %rbp - pushq %rbx - - /* Preserve top half of RSP in a legacy mode GPR to avoid truncation */ - movq %rsp, %rbx - shrq $32, %rbx - - /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */ - pushq $__KERNEL32_CS - leaq 0f(%rip), %rax - pushq %rax - lretq - - /* - * The 32-bit code below will do a far jump back to long mode and end - * up here after reconfiguring the number of paging levels. First, the - * stack pointer needs to be restored to its full 64-bit value before - * the callee save register contents can be popped from the stack. - */ -.Lret: - shlq $32, %rbx - orq %rbx, %rsp - - /* Restore the preserved 64-bit registers */ - popq %rbx - popq %rbp - popq %r12 - popq %r13 - popq %r14 - popq %r15 - retq - .code32 -0: - /* Disable paging */ - movl %cr0, %eax - btrl $X86_CR0_PG_BIT, %eax - movl %eax, %cr0 - - /* Point CR3 to the trampoline's new top level page table */ - movl %edi, %cr3 - - /* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */ - movl $MSR_EFER, %ecx - rdmsr - btsl $_EFER_LME, %eax - /* Avoid writing EFER if no change was made (for TDX guest) */ - jc 1f - wrmsr -1: - /* Toggle CR4.LA57 */ - movl %cr4, %eax - btcl $X86_CR4_LA57_BIT, %eax - movl %eax, %cr4 - - /* Enable paging again. */ - movl %cr0, %eax - btsl $X86_CR0_PG_BIT, %eax - movl %eax, %cr0 - - /* - * Return to the 64-bit calling code using LJMP rather than LRET, to - * avoid the need for a 32-bit addressable stack. The destination - * address will be adjusted after the template code is copied into a - * 32-bit addressable buffer. - */ -.Ljmp: ljmpl $__KERNEL_CS, $(.Lret - trampoline_32bit_src) -SYM_CODE_END(trampoline_32bit_src) - -/* - * This symbol is placed right after trampoline_32bit_src() so its address can - * be used to infer the size of the trampoline code. - */ -SYM_DATA(trampoline_ljmp_imm_offset, .word .Ljmp + 1 - trampoline_32bit_src) - - /* - * The trampoline code has a size limit. - * Make sure we fail to compile if the trampoline code grows - * beyond TRAMPOLINE_32BIT_CODE_SIZE bytes. - */ - .org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE - - .text SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode) /* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */ 1: diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index dec961c6d16a..f03d59ea6e40 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -25,10 +25,6 @@ #include "efi.h" #include <generated/compile.h> -#include <linux/module.h> -#include <linux/uts.h> -#include <linux/utsname.h> -#include <linux/ctype.h> #include <generated/utsversion.h> #include <generated/utsrelease.h> @@ -119,13 +115,8 @@ char *skip_spaces(const char *str) #include "../../../../lib/ctype.c" #include "../../../../lib/cmdline.c" -enum parse_mode { - PARSE_MEMMAP, - PARSE_EFI, -}; - static int -parse_memmap(char *p, u64 *start, u64 *size, enum parse_mode mode) +parse_memmap(char *p, u64 *start, u64 *size) { char *oldp; @@ -148,29 +139,11 @@ parse_memmap(char *p, u64 *start, u64 *size, enum parse_mode mode) *start = memparse(p + 1, &p); return 0; case '@': - if (mode == PARSE_MEMMAP) { - /* - * memmap=nn@ss specifies usable region, should - * be skipped - */ - *size = 0; - } else { - u64 flags; - - /* - * efi_fake_mem=nn@ss:attr the attr specifies - * flags that might imply a soft-reservation. - */ - *start = memparse(p + 1, &p); - if (p && *p == ':') { - p++; - if (kstrtoull(p, 0, &flags) < 0) - *size = 0; - else if (flags & EFI_MEMORY_SP) - return 0; - } - *size = 0; - } + /* + * memmap=nn@ss specifies usable region, should + * be skipped + */ + *size = 0; fallthrough; default: /* @@ -185,7 +158,7 @@ parse_memmap(char *p, u64 *start, u64 *size, enum parse_mode mode) return -EINVAL; } -static void mem_avoid_memmap(enum parse_mode mode, char *str) +static void mem_avoid_memmap(char *str) { static int i; @@ -200,7 +173,7 @@ static void mem_avoid_memmap(enum parse_mode mode, char *str) if (k) *k++ = 0; - rc = parse_memmap(str, &start, &size, mode); + rc = parse_memmap(str, &start, &size); if (rc < 0) break; str = k; @@ -281,7 +254,7 @@ static void handle_mem_options(void) break; if (!strcmp(param, "memmap")) { - mem_avoid_memmap(PARSE_MEMMAP, val); + mem_avoid_memmap(val); } else if (IS_ENABLED(CONFIG_X86_64) && strstr(param, "hugepages")) { parse_gb_huge_pages(param, val); } else if (!strcmp(param, "mem")) { @@ -295,8 +268,6 @@ static void handle_mem_options(void) if (mem_size < mem_limit) mem_limit = mem_size; - } else if (!strcmp(param, "efi_fake_mem")) { - mem_avoid_memmap(PARSE_EFI, val); } } diff --git a/arch/x86/boot/compressed/la57toggle.S b/arch/x86/boot/compressed/la57toggle.S new file mode 100644 index 000000000000..9ee002387eb1 --- /dev/null +++ b/arch/x86/boot/compressed/la57toggle.S @@ -0,0 +1,112 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include <linux/linkage.h> +#include <asm/segment.h> +#include <asm/boot.h> +#include <asm/msr.h> +#include <asm/processor-flags.h> +#include "pgtable.h" + +/* + * This is the 32-bit trampoline that will be copied over to low memory. It + * will be called using the ordinary 64-bit calling convention from code + * running in 64-bit mode. + * + * Return address is at the top of the stack (might be above 4G). + * The first argument (EDI) contains the address of the temporary PGD level + * page table in 32-bit addressable memory which will be programmed into + * register CR3. + */ + + .section ".rodata", "a", @progbits +SYM_CODE_START(trampoline_32bit_src) + /* + * Preserve callee save 64-bit registers on the stack: this is + * necessary because the architecture does not guarantee that GPRs will + * retain their full 64-bit values across a 32-bit mode switch. + */ + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbp + pushq %rbx + + /* Preserve top half of RSP in a legacy mode GPR to avoid truncation */ + movq %rsp, %rbx + shrq $32, %rbx + + /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */ + pushq $__KERNEL32_CS + leaq 0f(%rip), %rax + pushq %rax + lretq + + /* + * The 32-bit code below will do a far jump back to long mode and end + * up here after reconfiguring the number of paging levels. First, the + * stack pointer needs to be restored to its full 64-bit value before + * the callee save register contents can be popped from the stack. + */ +.Lret: + shlq $32, %rbx + orq %rbx, %rsp + + /* Restore the preserved 64-bit registers */ + popq %rbx + popq %rbp + popq %r12 + popq %r13 + popq %r14 + popq %r15 + retq + + .code32 +0: + /* Disable paging */ + movl %cr0, %eax + btrl $X86_CR0_PG_BIT, %eax + movl %eax, %cr0 + + /* Point CR3 to the trampoline's new top level page table */ + movl %edi, %cr3 + + /* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */ + movl $MSR_EFER, %ecx + rdmsr + btsl $_EFER_LME, %eax + /* Avoid writing EFER if no change was made (for TDX guest) */ + jc 1f + wrmsr +1: + /* Toggle CR4.LA57 */ + movl %cr4, %eax + btcl $X86_CR4_LA57_BIT, %eax + movl %eax, %cr4 + + /* Enable paging again. */ + movl %cr0, %eax + btsl $X86_CR0_PG_BIT, %eax + movl %eax, %cr0 + + /* + * Return to the 64-bit calling code using LJMP rather than LRET, to + * avoid the need for a 32-bit addressable stack. The destination + * address will be adjusted after the template code is copied into a + * 32-bit addressable buffer. + */ +.Ljmp: ljmpl $__KERNEL_CS, $(.Lret - trampoline_32bit_src) +SYM_CODE_END(trampoline_32bit_src) + +/* + * This symbol is placed right after trampoline_32bit_src() so its address can + * be used to infer the size of the trampoline code. + */ +SYM_DATA(trampoline_ljmp_imm_offset, .word .Ljmp + 1 - trampoline_32bit_src) + + /* + * The trampoline code has a size limit. + * Make sure we fail to compile if the trampoline code grows + * beyond TRAMPOLINE_32BIT_CODE_SIZE bytes. + */ + .org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index b70e4a21c15f..1cdcd4aaf395 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -235,7 +235,7 @@ static void handle_relocations(void *output, unsigned long output_len, /* * Process relocations: 32 bit relocations first then 64 bit after. - * Three sets of binary relocations are added to the end of the kernel + * Two sets of binary relocations are added to the end of the kernel * before compression. Each relocation table entry is the kernel * address of the location which needs to be updated stored as a * 32-bit value which is sign extended to 64 bits. @@ -245,8 +245,6 @@ static void handle_relocations(void *output, unsigned long output_len, * kernel bits... * 0 - zero terminator for 64 bit relocations * 64 bit relocation repeated - * 0 - zero terminator for inverse 32 bit relocations - * 32 bit inverse relocation repeated * 0 - zero terminator for 32 bit relocations * 32 bit relocation repeated * @@ -263,16 +261,6 @@ static void handle_relocations(void *output, unsigned long output_len, *(uint32_t *)ptr += delta; } #ifdef CONFIG_X86_64 - while (*--reloc) { - long extended = *reloc; - extended += map; - - ptr = (unsigned long)extended; - if (ptr < min_addr || ptr > max_addr) - error("inverse 32-bit relocation outside of kernel!\n"); - - *(int32_t *)ptr -= delta; - } for (reloc--; *reloc; reloc--) { long extended = *reloc; extended += map; @@ -385,6 +373,19 @@ static void parse_mem_encrypt(struct setup_header *hdr) hdr->xloadflags |= XLF_MEM_ENCRYPTION; } +static void early_sev_detect(void) +{ + /* + * Accessing video memory causes guest termination because + * the boot stage2 #VC handler of SEV-ES/SNP guests does not + * support MMIO handling and kexec -c adds screen_info to the + * boot parameters passed to the kexec kernel, which causes + * console output to be dumped to both video and serial. + */ + if (sev_status & MSR_AMD64_SEV_ES_ENABLED) + lines = cols = 0; +} + /* * The compressed kernel image (ZO), has been moved so that its position * is against the end of the buffer used to hold the uncompressed kernel @@ -440,6 +441,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output) */ early_tdx_detect(); + early_sev_detect(); + console_init(); /* @@ -511,7 +514,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output) if (init_unaccepted_memory()) { debug_putstr("Accepting memory... "); - accept_memory(__pa(output), __pa(output) + needed_size); + accept_memory(__pa(output), needed_size); } entry_offset = decompress_kernel(output, virt_addr, error); @@ -531,8 +534,3 @@ asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output) return output + entry_offset; } - -void __fortify_panic(const u8 reason, size_t avail, size_t size) -{ - error("detected buffer overflow"); -} diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index b353a7be380c..dd8d1a85f671 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -256,6 +256,6 @@ static inline bool init_unaccepted_memory(void) { return false; } /* Defined in EFI stub */ extern struct efi_unaccepted_memory *unaccepted_table; -void accept_memory(phys_addr_t start, phys_addr_t end); +void accept_memory(phys_addr_t start, unsigned long size); #endif /* BOOT_COMPRESSED_MISC_H */ diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c index c882e1f67af0..d8c5de40669d 100644 --- a/arch/x86/boot/compressed/pgtable_64.c +++ b/arch/x86/boot/compressed/pgtable_64.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "misc.h" #include <asm/bootparam.h> +#include <asm/bootparam_utils.h> #include <asm/e820/types.h> #include <asm/processor.h> #include "pgtable.h" @@ -107,6 +108,7 @@ asmlinkage void configure_5level_paging(struct boot_params *bp, void *pgtable) bool l5_required = false; /* Initialize boot_params. Required for cmdline_find_option_bool(). */ + sanitize_boot_params(bp); boot_params_ptr = bp; /* diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index ec71846d28c9..bb55934c1cee 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -127,7 +127,35 @@ static bool fault_in_kernel_space(unsigned long address) #include "../../lib/insn.c" /* Include code for early handlers */ -#include "../../kernel/sev-shared.c" +#include "../../coco/sev/shared.c" + +static struct svsm_ca *svsm_get_caa(void) +{ + return boot_svsm_caa; +} + +static u64 svsm_get_caa_pa(void) +{ + return boot_svsm_caa_pa; +} + +static int svsm_perform_call_protocol(struct svsm_call *call) +{ + struct ghcb *ghcb; + int ret; + + if (boot_ghcb) + ghcb = boot_ghcb; + else + ghcb = NULL; + + do { + ret = ghcb ? svsm_perform_ghcb_protocol(ghcb, call) + : svsm_perform_msr_protocol(call); + } while (ret == -EAGAIN); + + return ret; +} bool sev_snp_enabled(void) { @@ -145,8 +173,8 @@ static void __page_state_change(unsigned long paddr, enum psc_op op) * If private -> shared then invalidate the page before requesting the * state change in the RMP table. */ - if (op == SNP_PAGE_STATE_SHARED && pvalidate(paddr, RMP_PG_SIZE_4K, 0)) - sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE); + if (op == SNP_PAGE_STATE_SHARED) + pvalidate_4k_page(paddr, paddr, false); /* Issue VMGEXIT to change the page state in RMP table. */ sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, op)); @@ -161,8 +189,8 @@ static void __page_state_change(unsigned long paddr, enum psc_op op) * Now that page state is changed in the RMP table, validate it so that it is * consistent with the RMP entry. */ - if (op == SNP_PAGE_STATE_PRIVATE && pvalidate(paddr, RMP_PG_SIZE_4K, 1)) - sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE); + if (op == SNP_PAGE_STATE_PRIVATE) + pvalidate_4k_page(paddr, paddr, true); } void snp_set_page_private(unsigned long paddr) @@ -256,6 +284,16 @@ void sev_es_shutdown_ghcb(void) error("SEV-ES CPU Features missing."); /* + * This denotes whether to use the GHCB MSR protocol or the GHCB + * shared page to perform a GHCB request. Since the GHCB page is + * being changed to encrypted, it can't be used to perform GHCB + * requests. Clear the boot_ghcb variable so that the GHCB MSR + * protocol is used to change the GHCB page over to an encrypted + * page. + */ + boot_ghcb = NULL; + + /* * GHCB Page must be flushed from the cache and mapped encrypted again. * Otherwise the running kernel will see strange cache effects when * trying to use that page. @@ -335,26 +373,6 @@ finish: sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); } -static void enforce_vmpl0(void) -{ - u64 attrs; - int err; - - /* - * RMPADJUST modifies RMP permissions of a lesser-privileged (numerically - * higher) privilege level. Here, clear the VMPL1 permission mask of the - * GHCB page. If the guest is not running at VMPL0, this will fail. - * - * If the guest is running at VMPL0, it will succeed. Even if that operation - * modifies permission bits, it is still ok to do so currently because Linux - * SNP guests are supported only on VMPL0 so VMPL1 or higher permission masks - * changing is a don't-care. - */ - attrs = 1; - if (rmpadjust((unsigned long)&boot_ghcb_page, RMP_PG_SIZE_4K, attrs)) - sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_NOT_VMPL0); -} - /* * SNP_FEATURES_IMPL_REQ is the mask of SNP features that will need * guest side implementation for proper functioning of the guest. If any @@ -383,7 +401,8 @@ static void enforce_vmpl0(void) * by the guest kernel. As and when a new feature is implemented in the * guest kernel, a corresponding bit should be added to the mask. */ -#define SNP_FEATURES_PRESENT MSR_AMD64_SNP_DEBUG_SWAP +#define SNP_FEATURES_PRESENT (MSR_AMD64_SNP_DEBUG_SWAP | \ + MSR_AMD64_SNP_SECURE_TSC) u64 snp_get_unsupported_features(u64 status) { @@ -413,6 +432,92 @@ void snp_check_features(void) } } +/* Search for Confidential Computing blob in the EFI config table. */ +static struct cc_blob_sev_info *find_cc_blob_efi(struct boot_params *bp) +{ + unsigned long cfg_table_pa; + unsigned int cfg_table_len; + int ret; + + ret = efi_get_conf_table(bp, &cfg_table_pa, &cfg_table_len); + if (ret) + return NULL; + + return (struct cc_blob_sev_info *)efi_find_vendor_table(bp, cfg_table_pa, + cfg_table_len, + EFI_CC_BLOB_GUID); +} + +/* + * Initial set up of SNP relies on information provided by the + * Confidential Computing blob, which can be passed to the boot kernel + * by firmware/bootloader in the following ways: + * + * - via an entry in the EFI config table + * - via a setup_data structure, as defined by the Linux Boot Protocol + * + * Scan for the blob in that order. + */ +static struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp) +{ + struct cc_blob_sev_info *cc_info; + + cc_info = find_cc_blob_efi(bp); + if (cc_info) + goto found_cc_info; + + cc_info = find_cc_blob_setup_data(bp); + if (!cc_info) + return NULL; + +found_cc_info: + if (cc_info->magic != CC_BLOB_SEV_HDR_MAGIC) + sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); + + return cc_info; +} + +/* + * Indicate SNP based on presence of SNP-specific CC blob. Subsequent checks + * will verify the SNP CPUID/MSR bits. + */ +static bool early_snp_init(struct boot_params *bp) +{ + struct cc_blob_sev_info *cc_info; + + if (!bp) + return false; + + cc_info = find_cc_blob(bp); + if (!cc_info) + return false; + + /* + * If a SNP-specific Confidential Computing blob is present, then + * firmware/bootloader have indicated SNP support. Verifying this + * involves CPUID checks which will be more reliable if the SNP + * CPUID table is used. See comments over snp_setup_cpuid_table() for + * more details. + */ + setup_cpuid_table(cc_info); + + /* + * Record the SVSM Calling Area (CA) address if the guest is not + * running at VMPL0. The CA will be used to communicate with the + * SVSM and request its services. + */ + svsm_setup_ca(cc_info); + + /* + * Pass run-time kernel a pointer to CC info via boot_params so EFI + * config table doesn't need to be searched again during early startup + * phase. + */ + bp->cc_blob_address = (u32)(unsigned long)cc_info; + + return true; +} + /* * sev_check_cpu_support - Check for SEV support in the CPU capabilities * @@ -463,7 +568,7 @@ void sev_enable(struct boot_params *bp) bp->cc_blob_address = 0; /* - * Do an initial SEV capability check before snp_init() which + * Do an initial SEV capability check before early_snp_init() which * loads the CPUID page and the same checks afterwards are done * without the hypervisor and are trustworthy. * @@ -478,7 +583,7 @@ void sev_enable(struct boot_params *bp) * Setup/preliminary detection of SNP. This will be sanity-checked * against CPUID/MSR values later. */ - snp = snp_init(bp); + snp = early_snp_init(bp); /* Now repeat the checks with the SNP CPUID table. */ @@ -506,10 +611,32 @@ void sev_enable(struct boot_params *bp) * features. */ if (sev_status & MSR_AMD64_SEV_SNP_ENABLED) { - if (!(get_hv_features() & GHCB_HV_FT_SNP)) + u64 hv_features; + int ret; + + hv_features = get_hv_features(); + if (!(hv_features & GHCB_HV_FT_SNP)) sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); - enforce_vmpl0(); + /* + * Enforce running at VMPL0 or with an SVSM. + * + * Use RMPADJUST (see the rmpadjust() function for a description of + * what the instruction does) to update the VMPL1 permissions of a + * page. If the guest is running at VMPL0, this will succeed. If the + * guest is running at any other VMPL, this will fail. Linux SNP guests + * only ever run at a single VMPL level so permission mask changes of a + * lesser-privileged VMPL are a don't-care. + */ + ret = rmpadjust((unsigned long)&boot_ghcb_page, RMP_PG_SIZE_4K, 1); + + /* + * Running at VMPL0 is not required if an SVSM is present and the hypervisor + * supports the required SVSM GHCB events. + */ + if (ret && + !(snp_vmpl && (hv_features & GHCB_HV_FT_SNP_MULTI_VMPL))) + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_NOT_VMPL0); } if (snp && !(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) @@ -535,85 +662,6 @@ u64 sev_get_status(void) return m.q; } -/* Search for Confidential Computing blob in the EFI config table. */ -static struct cc_blob_sev_info *find_cc_blob_efi(struct boot_params *bp) -{ - unsigned long cfg_table_pa; - unsigned int cfg_table_len; - int ret; - - ret = efi_get_conf_table(bp, &cfg_table_pa, &cfg_table_len); - if (ret) - return NULL; - - return (struct cc_blob_sev_info *)efi_find_vendor_table(bp, cfg_table_pa, - cfg_table_len, - EFI_CC_BLOB_GUID); -} - -/* - * Initial set up of SNP relies on information provided by the - * Confidential Computing blob, which can be passed to the boot kernel - * by firmware/bootloader in the following ways: - * - * - via an entry in the EFI config table - * - via a setup_data structure, as defined by the Linux Boot Protocol - * - * Scan for the blob in that order. - */ -static struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp) -{ - struct cc_blob_sev_info *cc_info; - - cc_info = find_cc_blob_efi(bp); - if (cc_info) - goto found_cc_info; - - cc_info = find_cc_blob_setup_data(bp); - if (!cc_info) - return NULL; - -found_cc_info: - if (cc_info->magic != CC_BLOB_SEV_HDR_MAGIC) - sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); - - return cc_info; -} - -/* - * Indicate SNP based on presence of SNP-specific CC blob. Subsequent checks - * will verify the SNP CPUID/MSR bits. - */ -bool snp_init(struct boot_params *bp) -{ - struct cc_blob_sev_info *cc_info; - - if (!bp) - return false; - - cc_info = find_cc_blob(bp); - if (!cc_info) - return false; - - /* - * If a SNP-specific Confidential Computing blob is present, then - * firmware/bootloader have indicated SNP support. Verifying this - * involves CPUID checks which will be more reliable if the SNP - * CPUID table is used. See comments over snp_setup_cpuid_table() for - * more details. - */ - setup_cpuid_table(cc_info); - - /* - * Pass run-time kernel a pointer to CC info via boot_params so EFI - * config table doesn't need to be searched again during early startup - * phase. - */ - bp->cc_blob_address = (u32)(unsigned long)cc_info; - - return true; -} - void sev_prep_identity_maps(unsigned long top_level_pgt) { /* diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S index 083ec6d7722a..3b2bc61c9408 100644 --- a/arch/x86/boot/compressed/vmlinux.lds.S +++ b/arch/x86/boot/compressed/vmlinux.lds.S @@ -48,7 +48,7 @@ SECTIONS *(.data) *(.data.*) - /* Add 4 bytes of extra space for a CRC-32 checksum */ + /* Add 4 bytes of extra space for the obsolete CRC-32 checksum */ . = ALIGN(. + 4, 0x200); _edata = . ; } |