summaryrefslogtreecommitdiff
path: root/arch/x86/boot/compressed
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/boot/compressed')
-rw-r--r--arch/x86/boot/compressed/Makefile28
-rw-r--r--arch/x86/boot/compressed/efi_mixed.S331
-rw-r--r--arch/x86/boot/compressed/head_64.S115
-rw-r--r--arch/x86/boot/compressed/kaslr.c47
-rw-r--r--arch/x86/boot/compressed/la57toggle.S112
-rw-r--r--arch/x86/boot/compressed/misc.c36
-rw-r--r--arch/x86/boot/compressed/misc.h2
-rw-r--r--arch/x86/boot/compressed/pgtable_64.c2
-rw-r--r--arch/x86/boot/compressed/sev.c266
-rw-r--r--arch/x86/boot/compressed/vmlinux.lds.S2
10 files changed, 314 insertions, 627 deletions
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index e9522c6893be..fdbce022db55 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -17,15 +17,6 @@
# (see scripts/Makefile.lib size_append)
# compressed vmlinux.bin.all + u32 size of vmlinux.bin.all
-# Sanitizer runtimes are unavailable and cannot be linked for early boot code.
-KASAN_SANITIZE := n
-KCSAN_SANITIZE := n
-KMSAN_SANITIZE := n
-OBJECT_FILES_NON_STANDARD := y
-
-# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
-KCOV_INSTRUMENT := n
-
targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \
vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4 vmlinux.bin.zst
@@ -34,6 +25,7 @@ targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \
# avoid errors with '-march=i386', and future flags may depend on the target to
# be valid.
KBUILD_CFLAGS := -m$(BITS) -O2 $(CLANG_FLAGS)
+KBUILD_CFLAGS += -std=gnu11
KBUILD_CFLAGS += -fno-strict-aliasing -fPIE
KBUILD_CFLAGS += -Wundef
KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
@@ -46,7 +38,6 @@ KBUILD_CFLAGS += -fno-stack-protector
KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
KBUILD_CFLAGS += -Wno-pointer-sign
-KBUILD_CFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=)
KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
KBUILD_CFLAGS += -D__DISABLE_EXPORTS
# Disable relocation relaxation in case the link is not PIE.
@@ -59,8 +50,6 @@ KBUILD_CFLAGS += -include $(srctree)/include/linux/hidden.h
CFLAGS_sev.o += -I$(objtree)/arch/x86/lib/
KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
-GCOV_PROFILE := n
-UBSAN_SANITIZE :=n
KBUILD_LDFLAGS := -m elf_$(UTS_MACHINE)
KBUILD_LDFLAGS += $(call ld-option,--no-ld-generated-unwind-info)
@@ -108,6 +97,7 @@ ifdef CONFIG_X86_64
vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/mem_encrypt.o
vmlinux-objs-y += $(obj)/pgtable_64.o
vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/sev.o
+ vmlinux-objs-y += $(obj)/la57toggle.o
endif
vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o
@@ -115,10 +105,9 @@ vmlinux-objs-$(CONFIG_INTEL_TDX_GUEST) += $(obj)/tdx.o $(obj)/tdcall.o $(obj)/td
vmlinux-objs-$(CONFIG_UNACCEPTED_MEMORY) += $(obj)/mem.o
vmlinux-objs-$(CONFIG_EFI) += $(obj)/efi.o
-vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_mixed.o
-vmlinux-objs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a
+vmlinux-libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a
-$(obj)/vmlinux: $(vmlinux-objs-y) FORCE
+$(obj)/vmlinux: $(vmlinux-objs-y) $(vmlinux-libs-y) FORCE
$(call if_changed,ld)
OBJCOPYFLAGS_vmlinux.bin := -R .comment -S
@@ -127,9 +116,12 @@ $(obj)/vmlinux.bin: vmlinux FORCE
targets += $(patsubst $(obj)/%,%,$(vmlinux-objs-y)) vmlinux.bin.all vmlinux.relocs
-# vmlinux.relocs is created by the vmlinux postlink step.
-$(obj)/vmlinux.relocs: vmlinux
- @true
+CMD_RELOCS = arch/x86/tools/relocs
+quiet_cmd_relocs = RELOCS $@
+ cmd_relocs = $(CMD_RELOCS) $< > $@;$(CMD_RELOCS) --abs-relocs $<
+
+$(obj)/vmlinux.relocs: vmlinux.unstripped FORCE
+ $(call if_changed,relocs)
vmlinux.bin.all-y := $(obj)/vmlinux.bin
vmlinux.bin.all-$(CONFIG_X86_NEED_RELOCS) += $(obj)/vmlinux.relocs
diff --git a/arch/x86/boot/compressed/efi_mixed.S b/arch/x86/boot/compressed/efi_mixed.S
deleted file mode 100644
index 719e939050cb..000000000000
--- a/arch/x86/boot/compressed/efi_mixed.S
+++ /dev/null
@@ -1,331 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2014, 2015 Intel Corporation; author Matt Fleming
- *
- * Early support for invoking 32-bit EFI services from a 64-bit kernel.
- *
- * Because this thunking occurs before ExitBootServices() we have to
- * restore the firmware's 32-bit GDT and IDT before we make EFI service
- * calls.
- *
- * On the plus side, we don't have to worry about mangling 64-bit
- * addresses into 32-bits because we're executing with an identity
- * mapped pagetable and haven't transitioned to 64-bit virtual addresses
- * yet.
- */
-
-#include <linux/linkage.h>
-#include <asm/msr.h>
-#include <asm/page_types.h>
-#include <asm/processor-flags.h>
-#include <asm/segment.h>
-
- .code64
- .text
-/*
- * When booting in 64-bit mode on 32-bit EFI firmware, startup_64_mixed_mode()
- * is the first thing that runs after switching to long mode. Depending on
- * whether the EFI handover protocol or the compat entry point was used to
- * enter the kernel, it will either branch to the common 64-bit EFI stub
- * entrypoint efi_stub_entry() directly, or via the 64-bit EFI PE/COFF
- * entrypoint efi_pe_entry(). In the former case, the bootloader must provide a
- * struct bootparams pointer as the third argument, so the presence of such a
- * pointer is used to disambiguate.
- *
- * +--------------+
- * +------------------+ +------------+ +------>| efi_pe_entry |
- * | efi32_pe_entry |---->| | | +-----------+--+
- * +------------------+ | | +------+----------------+ |
- * | startup_32 |---->| startup_64_mixed_mode | |
- * +------------------+ | | +------+----------------+ |
- * | efi32_stub_entry |---->| | | |
- * +------------------+ +------------+ | |
- * V |
- * +------------+ +----------------+ |
- * | startup_64 |<----| efi_stub_entry |<--------+
- * +------------+ +----------------+
- */
-SYM_FUNC_START(startup_64_mixed_mode)
- lea efi32_boot_args(%rip), %rdx
- mov 0(%rdx), %edi
- mov 4(%rdx), %esi
-
- /* Switch to the firmware's stack */
- movl efi32_boot_sp(%rip), %esp
- andl $~7, %esp
-
-#ifdef CONFIG_EFI_HANDOVER_PROTOCOL
- mov 8(%rdx), %edx // saved bootparams pointer
- test %edx, %edx
- jnz efi_stub_entry
-#endif
- /*
- * efi_pe_entry uses MS calling convention, which requires 32 bytes of
- * shadow space on the stack even if all arguments are passed in
- * registers. We also need an additional 8 bytes for the space that
- * would be occupied by the return address, and this also results in
- * the correct stack alignment for entry.
- */
- sub $40, %rsp
- mov %rdi, %rcx // MS calling convention
- mov %rsi, %rdx
- jmp efi_pe_entry
-SYM_FUNC_END(startup_64_mixed_mode)
-
-SYM_FUNC_START(__efi64_thunk)
- push %rbp
- push %rbx
-
- movl %ds, %eax
- push %rax
- movl %es, %eax
- push %rax
- movl %ss, %eax
- push %rax
-
- /* Copy args passed on stack */
- movq 0x30(%rsp), %rbp
- movq 0x38(%rsp), %rbx
- movq 0x40(%rsp), %rax
-
- /*
- * Convert x86-64 ABI params to i386 ABI
- */
- subq $64, %rsp
- movl %esi, 0x0(%rsp)
- movl %edx, 0x4(%rsp)
- movl %ecx, 0x8(%rsp)
- movl %r8d, 0xc(%rsp)
- movl %r9d, 0x10(%rsp)
- movl %ebp, 0x14(%rsp)
- movl %ebx, 0x18(%rsp)
- movl %eax, 0x1c(%rsp)
-
- leaq 0x20(%rsp), %rbx
- sgdt (%rbx)
- sidt 16(%rbx)
-
- leaq 1f(%rip), %rbp
-
- /*
- * Switch to IDT and GDT with 32-bit segments. These are the firmware
- * GDT and IDT that were installed when the kernel started executing.
- * The pointers were saved by the efi32_entry() routine below.
- *
- * Pass the saved DS selector to the 32-bit code, and use far return to
- * restore the saved CS selector.
- */
- lidt efi32_boot_idt(%rip)
- lgdt efi32_boot_gdt(%rip)
-
- movzwl efi32_boot_ds(%rip), %edx
- movzwq efi32_boot_cs(%rip), %rax
- pushq %rax
- leaq efi_enter32(%rip), %rax
- pushq %rax
- lretq
-
-1: addq $64, %rsp
- movq %rdi, %rax
-
- pop %rbx
- movl %ebx, %ss
- pop %rbx
- movl %ebx, %es
- pop %rbx
- movl %ebx, %ds
- /* Clear out 32-bit selector from FS and GS */
- xorl %ebx, %ebx
- movl %ebx, %fs
- movl %ebx, %gs
-
- pop %rbx
- pop %rbp
- RET
-SYM_FUNC_END(__efi64_thunk)
-
- .code32
-#ifdef CONFIG_EFI_HANDOVER_PROTOCOL
-SYM_FUNC_START(efi32_stub_entry)
- call 1f
-1: popl %ecx
-
- /* Clear BSS */
- xorl %eax, %eax
- leal (_bss - 1b)(%ecx), %edi
- leal (_ebss - 1b)(%ecx), %ecx
- subl %edi, %ecx
- shrl $2, %ecx
- cld
- rep stosl
-
- add $0x4, %esp /* Discard return address */
- popl %ecx
- popl %edx
- popl %esi
- jmp efi32_entry
-SYM_FUNC_END(efi32_stub_entry)
-#endif
-
-/*
- * EFI service pointer must be in %edi.
- *
- * The stack should represent the 32-bit calling convention.
- */
-SYM_FUNC_START_LOCAL(efi_enter32)
- /* Load firmware selector into data and stack segment registers */
- movl %edx, %ds
- movl %edx, %es
- movl %edx, %fs
- movl %edx, %gs
- movl %edx, %ss
-
- /* Reload pgtables */
- movl %cr3, %eax
- movl %eax, %cr3
-
- /* Disable paging */
- movl %cr0, %eax
- btrl $X86_CR0_PG_BIT, %eax
- movl %eax, %cr0
-
- /* Disable long mode via EFER */
- movl $MSR_EFER, %ecx
- rdmsr
- btrl $_EFER_LME, %eax
- wrmsr
-
- call *%edi
-
- /* We must preserve return value */
- movl %eax, %edi
-
- /*
- * Some firmware will return with interrupts enabled. Be sure to
- * disable them before we switch GDTs and IDTs.
- */
- cli
-
- lidtl 16(%ebx)
- lgdtl (%ebx)
-
- movl %cr4, %eax
- btsl $(X86_CR4_PAE_BIT), %eax
- movl %eax, %cr4
-
- movl %cr3, %eax
- movl %eax, %cr3
-
- movl $MSR_EFER, %ecx
- rdmsr
- btsl $_EFER_LME, %eax
- wrmsr
-
- xorl %eax, %eax
- lldt %ax
-
- pushl $__KERNEL_CS
- pushl %ebp
-
- /* Enable paging */
- movl %cr0, %eax
- btsl $X86_CR0_PG_BIT, %eax
- movl %eax, %cr0
- lret
-SYM_FUNC_END(efi_enter32)
-
-/*
- * This is the common EFI stub entry point for mixed mode.
- *
- * Arguments: %ecx image handle
- * %edx EFI system table pointer
- * %esi struct bootparams pointer (or NULL when not using
- * the EFI handover protocol)
- *
- * Since this is the point of no return for ordinary execution, no registers
- * are considered live except for the function parameters. [Note that the EFI
- * stub may still exit and return to the firmware using the Exit() EFI boot
- * service.]
- */
-SYM_FUNC_START_LOCAL(efi32_entry)
- call 1f
-1: pop %ebx
-
- /* Save firmware GDTR and code/data selectors */
- sgdtl (efi32_boot_gdt - 1b)(%ebx)
- movw %cs, (efi32_boot_cs - 1b)(%ebx)
- movw %ds, (efi32_boot_ds - 1b)(%ebx)
-
- /* Store firmware IDT descriptor */
- sidtl (efi32_boot_idt - 1b)(%ebx)
-
- /* Store firmware stack pointer */
- movl %esp, (efi32_boot_sp - 1b)(%ebx)
-
- /* Store boot arguments */
- leal (efi32_boot_args - 1b)(%ebx), %ebx
- movl %ecx, 0(%ebx)
- movl %edx, 4(%ebx)
- movl %esi, 8(%ebx)
- movb $0x0, 12(%ebx) // efi_is64
-
- /* Disable paging */
- movl %cr0, %eax
- btrl $X86_CR0_PG_BIT, %eax
- movl %eax, %cr0
-
- jmp startup_32
-SYM_FUNC_END(efi32_entry)
-
-/*
- * efi_status_t efi32_pe_entry(efi_handle_t image_handle,
- * efi_system_table_32_t *sys_table)
- */
-SYM_FUNC_START(efi32_pe_entry)
- pushl %ebp
- movl %esp, %ebp
- pushl %ebx // save callee-save registers
- pushl %edi
-
- call verify_cpu // check for long mode support
- testl %eax, %eax
- movl $0x80000003, %eax // EFI_UNSUPPORTED
- jnz 2f
-
- movl 8(%ebp), %ecx // image_handle
- movl 12(%ebp), %edx // sys_table
- xorl %esi, %esi
- jmp efi32_entry // pass %ecx, %edx, %esi
- // no other registers remain live
-
-2: popl %edi // restore callee-save registers
- popl %ebx
- leave
- RET
-SYM_FUNC_END(efi32_pe_entry)
-
-#ifdef CONFIG_EFI_HANDOVER_PROTOCOL
- .org efi32_stub_entry + 0x200
- .code64
-SYM_FUNC_START_NOALIGN(efi64_stub_entry)
- jmp efi_handover_entry
-SYM_FUNC_END(efi64_stub_entry)
-#endif
-
- .data
- .balign 8
-SYM_DATA_START_LOCAL(efi32_boot_gdt)
- .word 0
- .quad 0
-SYM_DATA_END(efi32_boot_gdt)
-
-SYM_DATA_START_LOCAL(efi32_boot_idt)
- .word 0
- .quad 0
-SYM_DATA_END(efi32_boot_idt)
-
-SYM_DATA_LOCAL(efi32_boot_cs, .word 0)
-SYM_DATA_LOCAL(efi32_boot_ds, .word 0)
-SYM_DATA_LOCAL(efi32_boot_sp, .long 0)
-SYM_DATA_LOCAL(efi32_boot_args, .long 0, 0, 0)
-SYM_DATA(efi_is64, .byte 1)
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index bf4a10a5794f..eafd4f185e77 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -263,13 +263,6 @@ SYM_FUNC_START(startup_32)
* used to perform that far jump.
*/
leal rva(startup_64)(%ebp), %eax
-#ifdef CONFIG_EFI_MIXED
- cmpb $1, rva(efi_is64)(%ebp)
- je 1f
- leal rva(startup_64_mixed_mode)(%ebp), %eax
-1:
-#endif
-
pushl $__KERNEL_CS
pushl %eax
@@ -398,6 +391,11 @@ SYM_CODE_START(startup_64)
call sev_enable
#endif
+ /* Preserve only the CR4 bits that must be preserved, and clear the rest */
+ movq %cr4, %rax
+ andl $(X86_CR4_PAE | X86_CR4_MCE | X86_CR4_LA57), %eax
+ movq %rax, %cr4
+
/*
* configure_5level_paging() updates the number of paging levels using
* a trampoline in 32-bit addressable memory if the current number does
@@ -478,110 +476,7 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
jmp *%rax
SYM_FUNC_END(.Lrelocated)
-/*
- * This is the 32-bit trampoline that will be copied over to low memory. It
- * will be called using the ordinary 64-bit calling convention from code
- * running in 64-bit mode.
- *
- * Return address is at the top of the stack (might be above 4G).
- * The first argument (EDI) contains the address of the temporary PGD level
- * page table in 32-bit addressable memory which will be programmed into
- * register CR3.
- */
- .section ".rodata", "a", @progbits
-SYM_CODE_START(trampoline_32bit_src)
- /*
- * Preserve callee save 64-bit registers on the stack: this is
- * necessary because the architecture does not guarantee that GPRs will
- * retain their full 64-bit values across a 32-bit mode switch.
- */
- pushq %r15
- pushq %r14
- pushq %r13
- pushq %r12
- pushq %rbp
- pushq %rbx
-
- /* Preserve top half of RSP in a legacy mode GPR to avoid truncation */
- movq %rsp, %rbx
- shrq $32, %rbx
-
- /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
- pushq $__KERNEL32_CS
- leaq 0f(%rip), %rax
- pushq %rax
- lretq
-
- /*
- * The 32-bit code below will do a far jump back to long mode and end
- * up here after reconfiguring the number of paging levels. First, the
- * stack pointer needs to be restored to its full 64-bit value before
- * the callee save register contents can be popped from the stack.
- */
-.Lret:
- shlq $32, %rbx
- orq %rbx, %rsp
-
- /* Restore the preserved 64-bit registers */
- popq %rbx
- popq %rbp
- popq %r12
- popq %r13
- popq %r14
- popq %r15
- retq
-
.code32
-0:
- /* Disable paging */
- movl %cr0, %eax
- btrl $X86_CR0_PG_BIT, %eax
- movl %eax, %cr0
-
- /* Point CR3 to the trampoline's new top level page table */
- movl %edi, %cr3
-
- /* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */
- movl $MSR_EFER, %ecx
- rdmsr
- btsl $_EFER_LME, %eax
- /* Avoid writing EFER if no change was made (for TDX guest) */
- jc 1f
- wrmsr
-1:
- /* Toggle CR4.LA57 */
- movl %cr4, %eax
- btcl $X86_CR4_LA57_BIT, %eax
- movl %eax, %cr4
-
- /* Enable paging again. */
- movl %cr0, %eax
- btsl $X86_CR0_PG_BIT, %eax
- movl %eax, %cr0
-
- /*
- * Return to the 64-bit calling code using LJMP rather than LRET, to
- * avoid the need for a 32-bit addressable stack. The destination
- * address will be adjusted after the template code is copied into a
- * 32-bit addressable buffer.
- */
-.Ljmp: ljmpl $__KERNEL_CS, $(.Lret - trampoline_32bit_src)
-SYM_CODE_END(trampoline_32bit_src)
-
-/*
- * This symbol is placed right after trampoline_32bit_src() so its address can
- * be used to infer the size of the trampoline code.
- */
-SYM_DATA(trampoline_ljmp_imm_offset, .word .Ljmp + 1 - trampoline_32bit_src)
-
- /*
- * The trampoline code has a size limit.
- * Make sure we fail to compile if the trampoline code grows
- * beyond TRAMPOLINE_32BIT_CODE_SIZE bytes.
- */
- .org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE
-
- .text
SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode)
/* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */
1:
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index dec961c6d16a..f03d59ea6e40 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -25,10 +25,6 @@
#include "efi.h"
#include <generated/compile.h>
-#include <linux/module.h>
-#include <linux/uts.h>
-#include <linux/utsname.h>
-#include <linux/ctype.h>
#include <generated/utsversion.h>
#include <generated/utsrelease.h>
@@ -119,13 +115,8 @@ char *skip_spaces(const char *str)
#include "../../../../lib/ctype.c"
#include "../../../../lib/cmdline.c"
-enum parse_mode {
- PARSE_MEMMAP,
- PARSE_EFI,
-};
-
static int
-parse_memmap(char *p, u64 *start, u64 *size, enum parse_mode mode)
+parse_memmap(char *p, u64 *start, u64 *size)
{
char *oldp;
@@ -148,29 +139,11 @@ parse_memmap(char *p, u64 *start, u64 *size, enum parse_mode mode)
*start = memparse(p + 1, &p);
return 0;
case '@':
- if (mode == PARSE_MEMMAP) {
- /*
- * memmap=nn@ss specifies usable region, should
- * be skipped
- */
- *size = 0;
- } else {
- u64 flags;
-
- /*
- * efi_fake_mem=nn@ss:attr the attr specifies
- * flags that might imply a soft-reservation.
- */
- *start = memparse(p + 1, &p);
- if (p && *p == ':') {
- p++;
- if (kstrtoull(p, 0, &flags) < 0)
- *size = 0;
- else if (flags & EFI_MEMORY_SP)
- return 0;
- }
- *size = 0;
- }
+ /*
+ * memmap=nn@ss specifies usable region, should
+ * be skipped
+ */
+ *size = 0;
fallthrough;
default:
/*
@@ -185,7 +158,7 @@ parse_memmap(char *p, u64 *start, u64 *size, enum parse_mode mode)
return -EINVAL;
}
-static void mem_avoid_memmap(enum parse_mode mode, char *str)
+static void mem_avoid_memmap(char *str)
{
static int i;
@@ -200,7 +173,7 @@ static void mem_avoid_memmap(enum parse_mode mode, char *str)
if (k)
*k++ = 0;
- rc = parse_memmap(str, &start, &size, mode);
+ rc = parse_memmap(str, &start, &size);
if (rc < 0)
break;
str = k;
@@ -281,7 +254,7 @@ static void handle_mem_options(void)
break;
if (!strcmp(param, "memmap")) {
- mem_avoid_memmap(PARSE_MEMMAP, val);
+ mem_avoid_memmap(val);
} else if (IS_ENABLED(CONFIG_X86_64) && strstr(param, "hugepages")) {
parse_gb_huge_pages(param, val);
} else if (!strcmp(param, "mem")) {
@@ -295,8 +268,6 @@ static void handle_mem_options(void)
if (mem_size < mem_limit)
mem_limit = mem_size;
- } else if (!strcmp(param, "efi_fake_mem")) {
- mem_avoid_memmap(PARSE_EFI, val);
}
}
diff --git a/arch/x86/boot/compressed/la57toggle.S b/arch/x86/boot/compressed/la57toggle.S
new file mode 100644
index 000000000000..9ee002387eb1
--- /dev/null
+++ b/arch/x86/boot/compressed/la57toggle.S
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/linkage.h>
+#include <asm/segment.h>
+#include <asm/boot.h>
+#include <asm/msr.h>
+#include <asm/processor-flags.h>
+#include "pgtable.h"
+
+/*
+ * This is the 32-bit trampoline that will be copied over to low memory. It
+ * will be called using the ordinary 64-bit calling convention from code
+ * running in 64-bit mode.
+ *
+ * Return address is at the top of the stack (might be above 4G).
+ * The first argument (EDI) contains the address of the temporary PGD level
+ * page table in 32-bit addressable memory which will be programmed into
+ * register CR3.
+ */
+
+ .section ".rodata", "a", @progbits
+SYM_CODE_START(trampoline_32bit_src)
+ /*
+ * Preserve callee save 64-bit registers on the stack: this is
+ * necessary because the architecture does not guarantee that GPRs will
+ * retain their full 64-bit values across a 32-bit mode switch.
+ */
+ pushq %r15
+ pushq %r14
+ pushq %r13
+ pushq %r12
+ pushq %rbp
+ pushq %rbx
+
+ /* Preserve top half of RSP in a legacy mode GPR to avoid truncation */
+ movq %rsp, %rbx
+ shrq $32, %rbx
+
+ /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
+ pushq $__KERNEL32_CS
+ leaq 0f(%rip), %rax
+ pushq %rax
+ lretq
+
+ /*
+ * The 32-bit code below will do a far jump back to long mode and end
+ * up here after reconfiguring the number of paging levels. First, the
+ * stack pointer needs to be restored to its full 64-bit value before
+ * the callee save register contents can be popped from the stack.
+ */
+.Lret:
+ shlq $32, %rbx
+ orq %rbx, %rsp
+
+ /* Restore the preserved 64-bit registers */
+ popq %rbx
+ popq %rbp
+ popq %r12
+ popq %r13
+ popq %r14
+ popq %r15
+ retq
+
+ .code32
+0:
+ /* Disable paging */
+ movl %cr0, %eax
+ btrl $X86_CR0_PG_BIT, %eax
+ movl %eax, %cr0
+
+ /* Point CR3 to the trampoline's new top level page table */
+ movl %edi, %cr3
+
+ /* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */
+ movl $MSR_EFER, %ecx
+ rdmsr
+ btsl $_EFER_LME, %eax
+ /* Avoid writing EFER if no change was made (for TDX guest) */
+ jc 1f
+ wrmsr
+1:
+ /* Toggle CR4.LA57 */
+ movl %cr4, %eax
+ btcl $X86_CR4_LA57_BIT, %eax
+ movl %eax, %cr4
+
+ /* Enable paging again. */
+ movl %cr0, %eax
+ btsl $X86_CR0_PG_BIT, %eax
+ movl %eax, %cr0
+
+ /*
+ * Return to the 64-bit calling code using LJMP rather than LRET, to
+ * avoid the need for a 32-bit addressable stack. The destination
+ * address will be adjusted after the template code is copied into a
+ * 32-bit addressable buffer.
+ */
+.Ljmp: ljmpl $__KERNEL_CS, $(.Lret - trampoline_32bit_src)
+SYM_CODE_END(trampoline_32bit_src)
+
+/*
+ * This symbol is placed right after trampoline_32bit_src() so its address can
+ * be used to infer the size of the trampoline code.
+ */
+SYM_DATA(trampoline_ljmp_imm_offset, .word .Ljmp + 1 - trampoline_32bit_src)
+
+ /*
+ * The trampoline code has a size limit.
+ * Make sure we fail to compile if the trampoline code grows
+ * beyond TRAMPOLINE_32BIT_CODE_SIZE bytes.
+ */
+ .org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index b70e4a21c15f..1cdcd4aaf395 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -235,7 +235,7 @@ static void handle_relocations(void *output, unsigned long output_len,
/*
* Process relocations: 32 bit relocations first then 64 bit after.
- * Three sets of binary relocations are added to the end of the kernel
+ * Two sets of binary relocations are added to the end of the kernel
* before compression. Each relocation table entry is the kernel
* address of the location which needs to be updated stored as a
* 32-bit value which is sign extended to 64 bits.
@@ -245,8 +245,6 @@ static void handle_relocations(void *output, unsigned long output_len,
* kernel bits...
* 0 - zero terminator for 64 bit relocations
* 64 bit relocation repeated
- * 0 - zero terminator for inverse 32 bit relocations
- * 32 bit inverse relocation repeated
* 0 - zero terminator for 32 bit relocations
* 32 bit relocation repeated
*
@@ -263,16 +261,6 @@ static void handle_relocations(void *output, unsigned long output_len,
*(uint32_t *)ptr += delta;
}
#ifdef CONFIG_X86_64
- while (*--reloc) {
- long extended = *reloc;
- extended += map;
-
- ptr = (unsigned long)extended;
- if (ptr < min_addr || ptr > max_addr)
- error("inverse 32-bit relocation outside of kernel!\n");
-
- *(int32_t *)ptr -= delta;
- }
for (reloc--; *reloc; reloc--) {
long extended = *reloc;
extended += map;
@@ -385,6 +373,19 @@ static void parse_mem_encrypt(struct setup_header *hdr)
hdr->xloadflags |= XLF_MEM_ENCRYPTION;
}
+static void early_sev_detect(void)
+{
+ /*
+ * Accessing video memory causes guest termination because
+ * the boot stage2 #VC handler of SEV-ES/SNP guests does not
+ * support MMIO handling and kexec -c adds screen_info to the
+ * boot parameters passed to the kexec kernel, which causes
+ * console output to be dumped to both video and serial.
+ */
+ if (sev_status & MSR_AMD64_SEV_ES_ENABLED)
+ lines = cols = 0;
+}
+
/*
* The compressed kernel image (ZO), has been moved so that its position
* is against the end of the buffer used to hold the uncompressed kernel
@@ -440,6 +441,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output)
*/
early_tdx_detect();
+ early_sev_detect();
+
console_init();
/*
@@ -511,7 +514,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output)
if (init_unaccepted_memory()) {
debug_putstr("Accepting memory... ");
- accept_memory(__pa(output), __pa(output) + needed_size);
+ accept_memory(__pa(output), needed_size);
}
entry_offset = decompress_kernel(output, virt_addr, error);
@@ -531,8 +534,3 @@ asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output)
return output + entry_offset;
}
-
-void __fortify_panic(const u8 reason, size_t avail, size_t size)
-{
- error("detected buffer overflow");
-}
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index b353a7be380c..dd8d1a85f671 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -256,6 +256,6 @@ static inline bool init_unaccepted_memory(void) { return false; }
/* Defined in EFI stub */
extern struct efi_unaccepted_memory *unaccepted_table;
-void accept_memory(phys_addr_t start, phys_addr_t end);
+void accept_memory(phys_addr_t start, unsigned long size);
#endif /* BOOT_COMPRESSED_MISC_H */
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
index c882e1f67af0..d8c5de40669d 100644
--- a/arch/x86/boot/compressed/pgtable_64.c
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include "misc.h"
#include <asm/bootparam.h>
+#include <asm/bootparam_utils.h>
#include <asm/e820/types.h>
#include <asm/processor.h>
#include "pgtable.h"
@@ -107,6 +108,7 @@ asmlinkage void configure_5level_paging(struct boot_params *bp, void *pgtable)
bool l5_required = false;
/* Initialize boot_params. Required for cmdline_find_option_bool(). */
+ sanitize_boot_params(bp);
boot_params_ptr = bp;
/*
diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c
index ec71846d28c9..bb55934c1cee 100644
--- a/arch/x86/boot/compressed/sev.c
+++ b/arch/x86/boot/compressed/sev.c
@@ -127,7 +127,35 @@ static bool fault_in_kernel_space(unsigned long address)
#include "../../lib/insn.c"
/* Include code for early handlers */
-#include "../../kernel/sev-shared.c"
+#include "../../coco/sev/shared.c"
+
+static struct svsm_ca *svsm_get_caa(void)
+{
+ return boot_svsm_caa;
+}
+
+static u64 svsm_get_caa_pa(void)
+{
+ return boot_svsm_caa_pa;
+}
+
+static int svsm_perform_call_protocol(struct svsm_call *call)
+{
+ struct ghcb *ghcb;
+ int ret;
+
+ if (boot_ghcb)
+ ghcb = boot_ghcb;
+ else
+ ghcb = NULL;
+
+ do {
+ ret = ghcb ? svsm_perform_ghcb_protocol(ghcb, call)
+ : svsm_perform_msr_protocol(call);
+ } while (ret == -EAGAIN);
+
+ return ret;
+}
bool sev_snp_enabled(void)
{
@@ -145,8 +173,8 @@ static void __page_state_change(unsigned long paddr, enum psc_op op)
* If private -> shared then invalidate the page before requesting the
* state change in the RMP table.
*/
- if (op == SNP_PAGE_STATE_SHARED && pvalidate(paddr, RMP_PG_SIZE_4K, 0))
- sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
+ if (op == SNP_PAGE_STATE_SHARED)
+ pvalidate_4k_page(paddr, paddr, false);
/* Issue VMGEXIT to change the page state in RMP table. */
sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, op));
@@ -161,8 +189,8 @@ static void __page_state_change(unsigned long paddr, enum psc_op op)
* Now that page state is changed in the RMP table, validate it so that it is
* consistent with the RMP entry.
*/
- if (op == SNP_PAGE_STATE_PRIVATE && pvalidate(paddr, RMP_PG_SIZE_4K, 1))
- sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
+ if (op == SNP_PAGE_STATE_PRIVATE)
+ pvalidate_4k_page(paddr, paddr, true);
}
void snp_set_page_private(unsigned long paddr)
@@ -256,6 +284,16 @@ void sev_es_shutdown_ghcb(void)
error("SEV-ES CPU Features missing.");
/*
+ * This denotes whether to use the GHCB MSR protocol or the GHCB
+ * shared page to perform a GHCB request. Since the GHCB page is
+ * being changed to encrypted, it can't be used to perform GHCB
+ * requests. Clear the boot_ghcb variable so that the GHCB MSR
+ * protocol is used to change the GHCB page over to an encrypted
+ * page.
+ */
+ boot_ghcb = NULL;
+
+ /*
* GHCB Page must be flushed from the cache and mapped encrypted again.
* Otherwise the running kernel will see strange cache effects when
* trying to use that page.
@@ -335,26 +373,6 @@ finish:
sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
}
-static void enforce_vmpl0(void)
-{
- u64 attrs;
- int err;
-
- /*
- * RMPADJUST modifies RMP permissions of a lesser-privileged (numerically
- * higher) privilege level. Here, clear the VMPL1 permission mask of the
- * GHCB page. If the guest is not running at VMPL0, this will fail.
- *
- * If the guest is running at VMPL0, it will succeed. Even if that operation
- * modifies permission bits, it is still ok to do so currently because Linux
- * SNP guests are supported only on VMPL0 so VMPL1 or higher permission masks
- * changing is a don't-care.
- */
- attrs = 1;
- if (rmpadjust((unsigned long)&boot_ghcb_page, RMP_PG_SIZE_4K, attrs))
- sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_NOT_VMPL0);
-}
-
/*
* SNP_FEATURES_IMPL_REQ is the mask of SNP features that will need
* guest side implementation for proper functioning of the guest. If any
@@ -383,7 +401,8 @@ static void enforce_vmpl0(void)
* by the guest kernel. As and when a new feature is implemented in the
* guest kernel, a corresponding bit should be added to the mask.
*/
-#define SNP_FEATURES_PRESENT MSR_AMD64_SNP_DEBUG_SWAP
+#define SNP_FEATURES_PRESENT (MSR_AMD64_SNP_DEBUG_SWAP | \
+ MSR_AMD64_SNP_SECURE_TSC)
u64 snp_get_unsupported_features(u64 status)
{
@@ -413,6 +432,92 @@ void snp_check_features(void)
}
}
+/* Search for Confidential Computing blob in the EFI config table. */
+static struct cc_blob_sev_info *find_cc_blob_efi(struct boot_params *bp)
+{
+ unsigned long cfg_table_pa;
+ unsigned int cfg_table_len;
+ int ret;
+
+ ret = efi_get_conf_table(bp, &cfg_table_pa, &cfg_table_len);
+ if (ret)
+ return NULL;
+
+ return (struct cc_blob_sev_info *)efi_find_vendor_table(bp, cfg_table_pa,
+ cfg_table_len,
+ EFI_CC_BLOB_GUID);
+}
+
+/*
+ * Initial set up of SNP relies on information provided by the
+ * Confidential Computing blob, which can be passed to the boot kernel
+ * by firmware/bootloader in the following ways:
+ *
+ * - via an entry in the EFI config table
+ * - via a setup_data structure, as defined by the Linux Boot Protocol
+ *
+ * Scan for the blob in that order.
+ */
+static struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp)
+{
+ struct cc_blob_sev_info *cc_info;
+
+ cc_info = find_cc_blob_efi(bp);
+ if (cc_info)
+ goto found_cc_info;
+
+ cc_info = find_cc_blob_setup_data(bp);
+ if (!cc_info)
+ return NULL;
+
+found_cc_info:
+ if (cc_info->magic != CC_BLOB_SEV_HDR_MAGIC)
+ sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
+
+ return cc_info;
+}
+
+/*
+ * Indicate SNP based on presence of SNP-specific CC blob. Subsequent checks
+ * will verify the SNP CPUID/MSR bits.
+ */
+static bool early_snp_init(struct boot_params *bp)
+{
+ struct cc_blob_sev_info *cc_info;
+
+ if (!bp)
+ return false;
+
+ cc_info = find_cc_blob(bp);
+ if (!cc_info)
+ return false;
+
+ /*
+ * If a SNP-specific Confidential Computing blob is present, then
+ * firmware/bootloader have indicated SNP support. Verifying this
+ * involves CPUID checks which will be more reliable if the SNP
+ * CPUID table is used. See comments over snp_setup_cpuid_table() for
+ * more details.
+ */
+ setup_cpuid_table(cc_info);
+
+ /*
+ * Record the SVSM Calling Area (CA) address if the guest is not
+ * running at VMPL0. The CA will be used to communicate with the
+ * SVSM and request its services.
+ */
+ svsm_setup_ca(cc_info);
+
+ /*
+ * Pass run-time kernel a pointer to CC info via boot_params so EFI
+ * config table doesn't need to be searched again during early startup
+ * phase.
+ */
+ bp->cc_blob_address = (u32)(unsigned long)cc_info;
+
+ return true;
+}
+
/*
* sev_check_cpu_support - Check for SEV support in the CPU capabilities
*
@@ -463,7 +568,7 @@ void sev_enable(struct boot_params *bp)
bp->cc_blob_address = 0;
/*
- * Do an initial SEV capability check before snp_init() which
+ * Do an initial SEV capability check before early_snp_init() which
* loads the CPUID page and the same checks afterwards are done
* without the hypervisor and are trustworthy.
*
@@ -478,7 +583,7 @@ void sev_enable(struct boot_params *bp)
* Setup/preliminary detection of SNP. This will be sanity-checked
* against CPUID/MSR values later.
*/
- snp = snp_init(bp);
+ snp = early_snp_init(bp);
/* Now repeat the checks with the SNP CPUID table. */
@@ -506,10 +611,32 @@ void sev_enable(struct boot_params *bp)
* features.
*/
if (sev_status & MSR_AMD64_SEV_SNP_ENABLED) {
- if (!(get_hv_features() & GHCB_HV_FT_SNP))
+ u64 hv_features;
+ int ret;
+
+ hv_features = get_hv_features();
+ if (!(hv_features & GHCB_HV_FT_SNP))
sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
- enforce_vmpl0();
+ /*
+ * Enforce running at VMPL0 or with an SVSM.
+ *
+ * Use RMPADJUST (see the rmpadjust() function for a description of
+ * what the instruction does) to update the VMPL1 permissions of a
+ * page. If the guest is running at VMPL0, this will succeed. If the
+ * guest is running at any other VMPL, this will fail. Linux SNP guests
+ * only ever run at a single VMPL level so permission mask changes of a
+ * lesser-privileged VMPL are a don't-care.
+ */
+ ret = rmpadjust((unsigned long)&boot_ghcb_page, RMP_PG_SIZE_4K, 1);
+
+ /*
+ * Running at VMPL0 is not required if an SVSM is present and the hypervisor
+ * supports the required SVSM GHCB events.
+ */
+ if (ret &&
+ !(snp_vmpl && (hv_features & GHCB_HV_FT_SNP_MULTI_VMPL)))
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_NOT_VMPL0);
}
if (snp && !(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
@@ -535,85 +662,6 @@ u64 sev_get_status(void)
return m.q;
}
-/* Search for Confidential Computing blob in the EFI config table. */
-static struct cc_blob_sev_info *find_cc_blob_efi(struct boot_params *bp)
-{
- unsigned long cfg_table_pa;
- unsigned int cfg_table_len;
- int ret;
-
- ret = efi_get_conf_table(bp, &cfg_table_pa, &cfg_table_len);
- if (ret)
- return NULL;
-
- return (struct cc_blob_sev_info *)efi_find_vendor_table(bp, cfg_table_pa,
- cfg_table_len,
- EFI_CC_BLOB_GUID);
-}
-
-/*
- * Initial set up of SNP relies on information provided by the
- * Confidential Computing blob, which can be passed to the boot kernel
- * by firmware/bootloader in the following ways:
- *
- * - via an entry in the EFI config table
- * - via a setup_data structure, as defined by the Linux Boot Protocol
- *
- * Scan for the blob in that order.
- */
-static struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp)
-{
- struct cc_blob_sev_info *cc_info;
-
- cc_info = find_cc_blob_efi(bp);
- if (cc_info)
- goto found_cc_info;
-
- cc_info = find_cc_blob_setup_data(bp);
- if (!cc_info)
- return NULL;
-
-found_cc_info:
- if (cc_info->magic != CC_BLOB_SEV_HDR_MAGIC)
- sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
-
- return cc_info;
-}
-
-/*
- * Indicate SNP based on presence of SNP-specific CC blob. Subsequent checks
- * will verify the SNP CPUID/MSR bits.
- */
-bool snp_init(struct boot_params *bp)
-{
- struct cc_blob_sev_info *cc_info;
-
- if (!bp)
- return false;
-
- cc_info = find_cc_blob(bp);
- if (!cc_info)
- return false;
-
- /*
- * If a SNP-specific Confidential Computing blob is present, then
- * firmware/bootloader have indicated SNP support. Verifying this
- * involves CPUID checks which will be more reliable if the SNP
- * CPUID table is used. See comments over snp_setup_cpuid_table() for
- * more details.
- */
- setup_cpuid_table(cc_info);
-
- /*
- * Pass run-time kernel a pointer to CC info via boot_params so EFI
- * config table doesn't need to be searched again during early startup
- * phase.
- */
- bp->cc_blob_address = (u32)(unsigned long)cc_info;
-
- return true;
-}
-
void sev_prep_identity_maps(unsigned long top_level_pgt)
{
/*
diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S
index 083ec6d7722a..3b2bc61c9408 100644
--- a/arch/x86/boot/compressed/vmlinux.lds.S
+++ b/arch/x86/boot/compressed/vmlinux.lds.S
@@ -48,7 +48,7 @@ SECTIONS
*(.data)
*(.data.*)
- /* Add 4 bytes of extra space for a CRC-32 checksum */
+ /* Add 4 bytes of extra space for the obsolete CRC-32 checksum */
. = ALIGN(. + 4, 0x200);
_edata = . ;
}