diff options
Diffstat (limited to 'arch/x86/boot/compressed')
-rw-r--r-- | arch/x86/boot/compressed/Makefile | 18 | ||||
-rw-r--r-- | arch/x86/boot/compressed/acpi.c | 2 | ||||
-rw-r--r-- | arch/x86/boot/compressed/cmdline.c | 2 | ||||
-rw-r--r-- | arch/x86/boot/compressed/efi.c | 2 | ||||
-rw-r--r-- | arch/x86/boot/compressed/efi.h | 9 | ||||
-rw-r--r-- | arch/x86/boot/compressed/efi_mixed.S | 29 | ||||
-rw-r--r-- | arch/x86/boot/compressed/head_64.S | 5 | ||||
-rw-r--r-- | arch/x86/boot/compressed/ident_map_64.c | 8 | ||||
-rw-r--r-- | arch/x86/boot/compressed/kaslr.c | 47 | ||||
-rw-r--r-- | arch/x86/boot/compressed/misc.c | 78 | ||||
-rw-r--r-- | arch/x86/boot/compressed/misc.h | 5 | ||||
-rw-r--r-- | arch/x86/boot/compressed/pgtable_64.c | 3 | ||||
-rw-r--r-- | arch/x86/boot/compressed/sev.c | 276 |
13 files changed, 287 insertions, 197 deletions
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index f19c038409aa..606c74f27459 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -17,15 +17,6 @@ # (see scripts/Makefile.lib size_append) # compressed vmlinux.bin.all + u32 size of vmlinux.bin.all -# Sanitizer runtimes are unavailable and cannot be linked for early boot code. -KASAN_SANITIZE := n -KCSAN_SANITIZE := n -KMSAN_SANITIZE := n -OBJECT_FILES_NON_STANDARD := y - -# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in. -KCOV_INSTRUMENT := n - targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \ vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4 vmlinux.bin.zst @@ -34,6 +25,7 @@ targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \ # avoid errors with '-march=i386', and future flags may depend on the target to # be valid. KBUILD_CFLAGS := -m$(BITS) -O2 $(CLANG_FLAGS) +KBUILD_CFLAGS += -std=gnu11 KBUILD_CFLAGS += -fno-strict-aliasing -fPIE KBUILD_CFLAGS += -Wundef KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING @@ -59,8 +51,6 @@ KBUILD_CFLAGS += -include $(srctree)/include/linux/hidden.h CFLAGS_sev.o += -I$(objtree)/arch/x86/lib/ KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ -GCOV_PROFILE := n -UBSAN_SANITIZE :=n KBUILD_LDFLAGS := -m elf_$(UTS_MACHINE) KBUILD_LDFLAGS += $(call ld-option,--no-ld-generated-unwind-info) @@ -84,7 +74,7 @@ LDFLAGS_vmlinux += -T hostprogs := mkpiggy HOST_EXTRACFLAGS += -I$(srctree)/tools/include -sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(_text\|__bss_start\|_end\)$$/\#define VO_\2 _AC(0x\1,UL)/p' +sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(_text\|__start_rodata\|__bss_start\|_end\)$$/\#define VO_\2 _AC(0x\1,UL)/p' quiet_cmd_voffset = VOFFSET $@ cmd_voffset = $(NM) $< | sed -n $(sed-voffset) > $@ @@ -116,9 +106,9 @@ vmlinux-objs-$(CONFIG_UNACCEPTED_MEMORY) += $(obj)/mem.o vmlinux-objs-$(CONFIG_EFI) += $(obj)/efi.o vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_mixed.o -vmlinux-objs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a +vmlinux-libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a -$(obj)/vmlinux: $(vmlinux-objs-y) FORCE +$(obj)/vmlinux: $(vmlinux-objs-y) $(vmlinux-libs-y) FORCE $(call if_changed,ld) OBJCOPYFLAGS_vmlinux.bin := -R .comment -S diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c index 18d15d1ce87d..f196b1d1ddf8 100644 --- a/arch/x86/boot/compressed/acpi.c +++ b/arch/x86/boot/compressed/acpi.c @@ -5,6 +5,8 @@ #include "../string.h" #include "efi.h" +#include <asm/bootparam.h> + #include <linux/numa.h> /* diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c index c1bb180973ea..e162d7f59cc5 100644 --- a/arch/x86/boot/compressed/cmdline.c +++ b/arch/x86/boot/compressed/cmdline.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 #include "misc.h" +#include <asm/bootparam.h> + static unsigned long fs; static inline void set_fs(unsigned long seg) { diff --git a/arch/x86/boot/compressed/efi.c b/arch/x86/boot/compressed/efi.c index 6edd034b0b30..f2e50f9758e6 100644 --- a/arch/x86/boot/compressed/efi.c +++ b/arch/x86/boot/compressed/efi.c @@ -7,6 +7,8 @@ #include "misc.h" +#include <asm/bootparam.h> + /** * efi_get_type - Given a pointer to boot_params, determine the type of EFI environment. * diff --git a/arch/x86/boot/compressed/efi.h b/arch/x86/boot/compressed/efi.h index 866c0af8b5b9..b22300970f97 100644 --- a/arch/x86/boot/compressed/efi.h +++ b/arch/x86/boot/compressed/efi.h @@ -97,15 +97,6 @@ typedef struct { u32 tables; } efi_system_table_32_t; -/* kexec external ABI */ -struct efi_setup_data { - u64 fw_vendor; - u64 __unused; - u64 tables; - u64 smbios; - u64 reserved[8]; -}; - struct efi_unaccepted_memory { u32 version; u32 unit_size; diff --git a/arch/x86/boot/compressed/efi_mixed.S b/arch/x86/boot/compressed/efi_mixed.S index f4e22ef774ab..876fc6d46a13 100644 --- a/arch/x86/boot/compressed/efi_mixed.S +++ b/arch/x86/boot/compressed/efi_mixed.S @@ -15,10 +15,12 @@ */ #include <linux/linkage.h> +#include <asm/asm-offsets.h> #include <asm/msr.h> #include <asm/page_types.h> #include <asm/processor-flags.h> #include <asm/segment.h> +#include <asm/setup.h> .code64 .text @@ -49,6 +51,11 @@ SYM_FUNC_START(startup_64_mixed_mode) lea efi32_boot_args(%rip), %rdx mov 0(%rdx), %edi mov 4(%rdx), %esi + + /* Switch to the firmware's stack */ + movl efi32_boot_sp(%rip), %esp + andl $~7, %esp + #ifdef CONFIG_EFI_HANDOVER_PROTOCOL mov 8(%rdx), %edx // saved bootparams pointer test %edx, %edx @@ -144,6 +151,7 @@ SYM_FUNC_END(__efi64_thunk) SYM_FUNC_START(efi32_stub_entry) call 1f 1: popl %ecx + leal (efi32_boot_args - 1b)(%ecx), %ebx /* Clear BSS */ xorl %eax, %eax @@ -158,6 +166,7 @@ SYM_FUNC_START(efi32_stub_entry) popl %ecx popl %edx popl %esi + movl %esi, 8(%ebx) jmp efi32_entry SYM_FUNC_END(efi32_stub_entry) #endif @@ -234,8 +243,6 @@ SYM_FUNC_END(efi_enter32) * * Arguments: %ecx image handle * %edx EFI system table pointer - * %esi struct bootparams pointer (or NULL when not using - * the EFI handover protocol) * * Since this is the point of no return for ordinary execution, no registers * are considered live except for the function parameters. [Note that the EFI @@ -254,13 +261,25 @@ SYM_FUNC_START_LOCAL(efi32_entry) /* Store firmware IDT descriptor */ sidtl (efi32_boot_idt - 1b)(%ebx) + /* Store firmware stack pointer */ + movl %esp, (efi32_boot_sp - 1b)(%ebx) + /* Store boot arguments */ leal (efi32_boot_args - 1b)(%ebx), %ebx movl %ecx, 0(%ebx) movl %edx, 4(%ebx) - movl %esi, 8(%ebx) movb $0x0, 12(%ebx) // efi_is64 + /* + * Allocate some memory for a temporary struct boot_params, which only + * needs the minimal pieces that startup_32() relies on. + */ + subl $PARAM_SIZE, %esp + movl %esp, %esi + movl $PAGE_SIZE, BP_kernel_alignment(%esi) + movl $_end - 1b, BP_init_size(%esi) + subl $startup_32 - 1b, BP_init_size(%esi) + /* Disable paging */ movl %cr0, %eax btrl $X86_CR0_PG_BIT, %eax @@ -286,8 +305,7 @@ SYM_FUNC_START(efi32_pe_entry) movl 8(%ebp), %ecx // image_handle movl 12(%ebp), %edx // sys_table - xorl %esi, %esi - jmp efi32_entry // pass %ecx, %edx, %esi + jmp efi32_entry // pass %ecx, %edx // no other registers remain live 2: popl %edi // restore callee-save registers @@ -318,5 +336,6 @@ SYM_DATA_END(efi32_boot_idt) SYM_DATA_LOCAL(efi32_boot_cs, .word 0) SYM_DATA_LOCAL(efi32_boot_ds, .word 0) +SYM_DATA_LOCAL(efi32_boot_sp, .long 0) SYM_DATA_LOCAL(efi32_boot_args, .long 0, 0, 0) SYM_DATA(efi_is64, .byte 1) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index bf4a10a5794f..1dcb794c5479 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -398,6 +398,11 @@ SYM_CODE_START(startup_64) call sev_enable #endif + /* Preserve only the CR4 bits that must be preserved, and clear the rest */ + movq %cr4, %rax + andl $(X86_CR4_PAE | X86_CR4_MCE | X86_CR4_LA57), %eax + movq %rax, %cr4 + /* * configure_5level_paging() updates the number of paging levels using * a trampoline in 32-bit addressable memory if the current number does diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c index d040080d7edb..dfb9c2deb77c 100644 --- a/arch/x86/boot/compressed/ident_map_64.c +++ b/arch/x86/boot/compressed/ident_map_64.c @@ -8,8 +8,8 @@ * Copyright (C) 2016 Kees Cook */ -/* No PAGE_TABLE_ISOLATION support needed either: */ -#undef CONFIG_PAGE_TABLE_ISOLATION +/* No MITIGATION_PAGE_TABLE_ISOLATION support needed either: */ +#undef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION #include "error.h" #include "misc.h" @@ -284,7 +284,7 @@ static int set_clr_page_flags(struct x86_mapping_info *info, pudp = pud_offset(p4dp, address); pmdp = pmd_offset(pudp, address); - if (pmd_large(*pmdp)) + if (pmd_leaf(*pmdp)) ptep = split_large_pmd(info, pmdp, address); else ptep = pte_offset_kernel(pmdp, address); @@ -389,5 +389,5 @@ void do_boot_page_fault(struct pt_regs *regs, unsigned long error_code) void do_boot_nmi_trap(struct pt_regs *regs, unsigned long error_code) { - /* Empty handler to ignore NMI during early boot */ + spurious_nmi_count++; } diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index dec961c6d16a..f03d59ea6e40 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -25,10 +25,6 @@ #include "efi.h" #include <generated/compile.h> -#include <linux/module.h> -#include <linux/uts.h> -#include <linux/utsname.h> -#include <linux/ctype.h> #include <generated/utsversion.h> #include <generated/utsrelease.h> @@ -119,13 +115,8 @@ char *skip_spaces(const char *str) #include "../../../../lib/ctype.c" #include "../../../../lib/cmdline.c" -enum parse_mode { - PARSE_MEMMAP, - PARSE_EFI, -}; - static int -parse_memmap(char *p, u64 *start, u64 *size, enum parse_mode mode) +parse_memmap(char *p, u64 *start, u64 *size) { char *oldp; @@ -148,29 +139,11 @@ parse_memmap(char *p, u64 *start, u64 *size, enum parse_mode mode) *start = memparse(p + 1, &p); return 0; case '@': - if (mode == PARSE_MEMMAP) { - /* - * memmap=nn@ss specifies usable region, should - * be skipped - */ - *size = 0; - } else { - u64 flags; - - /* - * efi_fake_mem=nn@ss:attr the attr specifies - * flags that might imply a soft-reservation. - */ - *start = memparse(p + 1, &p); - if (p && *p == ':') { - p++; - if (kstrtoull(p, 0, &flags) < 0) - *size = 0; - else if (flags & EFI_MEMORY_SP) - return 0; - } - *size = 0; - } + /* + * memmap=nn@ss specifies usable region, should + * be skipped + */ + *size = 0; fallthrough; default: /* @@ -185,7 +158,7 @@ parse_memmap(char *p, u64 *start, u64 *size, enum parse_mode mode) return -EINVAL; } -static void mem_avoid_memmap(enum parse_mode mode, char *str) +static void mem_avoid_memmap(char *str) { static int i; @@ -200,7 +173,7 @@ static void mem_avoid_memmap(enum parse_mode mode, char *str) if (k) *k++ = 0; - rc = parse_memmap(str, &start, &size, mode); + rc = parse_memmap(str, &start, &size); if (rc < 0) break; str = k; @@ -281,7 +254,7 @@ static void handle_mem_options(void) break; if (!strcmp(param, "memmap")) { - mem_avoid_memmap(PARSE_MEMMAP, val); + mem_avoid_memmap(val); } else if (IS_ENABLED(CONFIG_X86_64) && strstr(param, "hugepages")) { parse_gb_huge_pages(param, val); } else if (!strcmp(param, "mem")) { @@ -295,8 +268,6 @@ static void handle_mem_options(void) if (mem_size < mem_limit) mem_limit = mem_size; - } else if (!strcmp(param, "efi_fake_mem")) { - mem_avoid_memmap(PARSE_EFI, val); } } diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index b99e08e6815b..0d37420cad02 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -52,6 +52,7 @@ struct port_io_ops pio_ops; memptr free_mem_ptr; memptr free_mem_end_ptr; +int spurious_nmi_count; static char *vidmem; static int vidport; @@ -164,21 +165,34 @@ void __putstr(const char *s) outb(0xff & (pos >> 1), vidport+1); } -void __puthex(unsigned long value) +static noinline void __putnum(unsigned long value, unsigned int base, + int mindig) { - char alpha[2] = "0"; - int bits; + char buf[8*sizeof(value)+1]; + char *p; - for (bits = sizeof(value) * 8 - 4; bits >= 0; bits -= 4) { - unsigned long digit = (value >> bits) & 0xf; + p = buf + sizeof(buf); + *--p = '\0'; - if (digit < 0xA) - alpha[0] = '0' + digit; - else - alpha[0] = 'a' + (digit - 0xA); + while (mindig-- > 0 || value) { + unsigned char digit = value % base; + digit += (digit >= 10) ? ('a'-10) : '0'; + *--p = digit; - __putstr(alpha); + value /= base; } + + __putstr(p); +} + +void __puthex(unsigned long value) +{ + __putnum(value, 16, sizeof(value)*2); +} + +void __putdec(unsigned long value) +{ + __putnum(value, 10, 1); } #ifdef CONFIG_X86_NEED_RELOCS @@ -330,6 +344,7 @@ static size_t parse_elf(void *output) return ehdr.e_entry - LOAD_PHYSICAL_ADDR; } +const unsigned long kernel_text_size = VO___start_rodata - VO__text; const unsigned long kernel_total_size = VO__end - VO__text; static u8 boot_heap[BOOT_HEAP_SIZE] __aligned(4); @@ -358,6 +373,32 @@ unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr, } /* + * Set the memory encryption xloadflag based on the mem_encrypt= command line + * parameter, if provided. + */ +static void parse_mem_encrypt(struct setup_header *hdr) +{ + int on = cmdline_find_option_bool("mem_encrypt=on"); + int off = cmdline_find_option_bool("mem_encrypt=off"); + + if (on > off) + hdr->xloadflags |= XLF_MEM_ENCRYPTION; +} + +static void early_sev_detect(void) +{ + /* + * Accessing video memory causes guest termination because + * the boot stage2 #VC handler of SEV-ES/SNP guests does not + * support MMIO handling and kexec -c adds screen_info to the + * boot parameters passed to the kexec kernel, which causes + * console output to be dumped to both video and serial. + */ + if (sev_status & MSR_AMD64_SEV_ES_ENABLED) + lines = cols = 0; +} + +/* * The compressed kernel image (ZO), has been moved so that its position * is against the end of the buffer used to hold the uncompressed kernel * image (VO) and the execution environment (.bss, .brk), which makes sure @@ -387,6 +428,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output) /* Clear flags intended for solely in-kernel use. */ boot_params_ptr->hdr.loadflags &= ~KASLR_FLAG; + parse_mem_encrypt(&boot_params_ptr->hdr); + sanitize_boot_params(boot_params_ptr); if (boot_params_ptr->screen_info.orig_video_mode == 7) { @@ -410,6 +453,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output) */ early_tdx_detect(); + early_sev_detect(); + console_init(); /* @@ -481,7 +526,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output) if (init_unaccepted_memory()) { debug_putstr("Accepting memory... "); - accept_memory(__pa(output), __pa(output) + needed_size); + accept_memory(__pa(output), needed_size); } entry_offset = decompress_kernel(output, virt_addr, error); @@ -493,10 +538,11 @@ asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output) /* Disable exception handling before booting the kernel */ cleanup_exception_handling(); - return output + entry_offset; -} + if (spurious_nmi_count) { + error_putstr("Spurious early NMIs ignored: "); + error_putdec(spurious_nmi_count); + error_putstr("\n"); + } -void fortify_panic(const char *name) -{ - error("detected buffer overflow"); + return output + entry_offset; } diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index bc2f0f17fb90..dd8d1a85f671 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -59,12 +59,15 @@ extern char _head[], _end[]; /* misc.c */ extern memptr free_mem_ptr; extern memptr free_mem_end_ptr; +extern int spurious_nmi_count; void *malloc(int size); void free(void *where); void __putstr(const char *s); void __puthex(unsigned long value); +void __putdec(unsigned long value); #define error_putstr(__x) __putstr(__x) #define error_puthex(__x) __puthex(__x) +#define error_putdec(__x) __putdec(__x) #ifdef CONFIG_X86_VERBOSE_BOOTUP @@ -253,6 +256,6 @@ static inline bool init_unaccepted_memory(void) { return false; } /* Defined in EFI stub */ extern struct efi_unaccepted_memory *unaccepted_table; -void accept_memory(phys_addr_t start, phys_addr_t end); +void accept_memory(phys_addr_t start, unsigned long size); #endif /* BOOT_COMPRESSED_MISC_H */ diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c index 51f957b24ba7..d8c5de40669d 100644 --- a/arch/x86/boot/compressed/pgtable_64.c +++ b/arch/x86/boot/compressed/pgtable_64.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "misc.h" +#include <asm/bootparam.h> +#include <asm/bootparam_utils.h> #include <asm/e820/types.h> #include <asm/processor.h> #include "pgtable.h" @@ -106,6 +108,7 @@ asmlinkage void configure_5level_paging(struct boot_params *bp, void *pgtable) bool l5_required = false; /* Initialize boot_params. Required for cmdline_find_option_bool(). */ + sanitize_boot_params(bp); boot_params_ptr = bp; /* diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index 454acd7a2daf..bb55934c1cee 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -12,6 +12,7 @@ */ #include "misc.h" +#include <asm/bootparam.h> #include <asm/pgtable_types.h> #include <asm/sev.h> #include <asm/trapnr.h> @@ -116,6 +117,9 @@ static bool fault_in_kernel_space(unsigned long address) #undef __init #define __init +#undef __head +#define __head + #define __BOOT_COMPRESSED /* Basic instruction decoding support needed */ @@ -123,7 +127,35 @@ static bool fault_in_kernel_space(unsigned long address) #include "../../lib/insn.c" /* Include code for early handlers */ -#include "../../kernel/sev-shared.c" +#include "../../coco/sev/shared.c" + +static struct svsm_ca *svsm_get_caa(void) +{ + return boot_svsm_caa; +} + +static u64 svsm_get_caa_pa(void) +{ + return boot_svsm_caa_pa; +} + +static int svsm_perform_call_protocol(struct svsm_call *call) +{ + struct ghcb *ghcb; + int ret; + + if (boot_ghcb) + ghcb = boot_ghcb; + else + ghcb = NULL; + + do { + ret = ghcb ? svsm_perform_ghcb_protocol(ghcb, call) + : svsm_perform_msr_protocol(call); + } while (ret == -EAGAIN); + + return ret; +} bool sev_snp_enabled(void) { @@ -141,8 +173,8 @@ static void __page_state_change(unsigned long paddr, enum psc_op op) * If private -> shared then invalidate the page before requesting the * state change in the RMP table. */ - if (op == SNP_PAGE_STATE_SHARED && pvalidate(paddr, RMP_PG_SIZE_4K, 0)) - sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE); + if (op == SNP_PAGE_STATE_SHARED) + pvalidate_4k_page(paddr, paddr, false); /* Issue VMGEXIT to change the page state in RMP table. */ sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, op)); @@ -157,8 +189,8 @@ static void __page_state_change(unsigned long paddr, enum psc_op op) * Now that page state is changed in the RMP table, validate it so that it is * consistent with the RMP entry. */ - if (op == SNP_PAGE_STATE_PRIVATE && pvalidate(paddr, RMP_PG_SIZE_4K, 1)) - sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE); + if (op == SNP_PAGE_STATE_PRIVATE) + pvalidate_4k_page(paddr, paddr, true); } void snp_set_page_private(unsigned long paddr) @@ -252,6 +284,16 @@ void sev_es_shutdown_ghcb(void) error("SEV-ES CPU Features missing."); /* + * This denotes whether to use the GHCB MSR protocol or the GHCB + * shared page to perform a GHCB request. Since the GHCB page is + * being changed to encrypted, it can't be used to perform GHCB + * requests. Clear the boot_ghcb variable so that the GHCB MSR + * protocol is used to change the GHCB page over to an encrypted + * page. + */ + boot_ghcb = NULL; + + /* * GHCB Page must be flushed from the cache and mapped encrypted again. * Otherwise the running kernel will see strange cache effects when * trying to use that page. @@ -304,6 +346,10 @@ void do_boot_stage2_vc(struct pt_regs *regs, unsigned long exit_code) if (result != ES_OK) goto finish; + result = vc_check_opcode_bytes(&ctxt, exit_code); + if (result != ES_OK) + goto finish; + switch (exit_code) { case SVM_EXIT_RDTSC: case SVM_EXIT_RDTSCP: @@ -327,26 +373,6 @@ finish: sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); } -static void enforce_vmpl0(void) -{ - u64 attrs; - int err; - - /* - * RMPADJUST modifies RMP permissions of a lesser-privileged (numerically - * higher) privilege level. Here, clear the VMPL1 permission mask of the - * GHCB page. If the guest is not running at VMPL0, this will fail. - * - * If the guest is running at VMPL0, it will succeed. Even if that operation - * modifies permission bits, it is still ok to do so currently because Linux - * SNP guests are supported only on VMPL0 so VMPL1 or higher permission masks - * changing is a don't-care. - */ - attrs = 1; - if (rmpadjust((unsigned long)&boot_ghcb_page, RMP_PG_SIZE_4K, attrs)) - sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_NOT_VMPL0); -} - /* * SNP_FEATURES_IMPL_REQ is the mask of SNP features that will need * guest side implementation for proper functioning of the guest. If any @@ -365,7 +391,7 @@ static void enforce_vmpl0(void) MSR_AMD64_SNP_VMPL_SSS | \ MSR_AMD64_SNP_SECURE_TSC | \ MSR_AMD64_SNP_VMGEXIT_PARAM | \ - MSR_AMD64_SNP_VMSA_REG_PROTECTION | \ + MSR_AMD64_SNP_VMSA_REG_PROT | \ MSR_AMD64_SNP_RESERVED_BIT13 | \ MSR_AMD64_SNP_RESERVED_BIT15 | \ MSR_AMD64_SNP_RESERVED_MASK) @@ -375,7 +401,8 @@ static void enforce_vmpl0(void) * by the guest kernel. As and when a new feature is implemented in the * guest kernel, a corresponding bit should be added to the mask. */ -#define SNP_FEATURES_PRESENT MSR_AMD64_SNP_DEBUG_SWAP +#define SNP_FEATURES_PRESENT (MSR_AMD64_SNP_DEBUG_SWAP | \ + MSR_AMD64_SNP_SECURE_TSC) u64 snp_get_unsupported_features(u64 status) { @@ -405,6 +432,92 @@ void snp_check_features(void) } } +/* Search for Confidential Computing blob in the EFI config table. */ +static struct cc_blob_sev_info *find_cc_blob_efi(struct boot_params *bp) +{ + unsigned long cfg_table_pa; + unsigned int cfg_table_len; + int ret; + + ret = efi_get_conf_table(bp, &cfg_table_pa, &cfg_table_len); + if (ret) + return NULL; + + return (struct cc_blob_sev_info *)efi_find_vendor_table(bp, cfg_table_pa, + cfg_table_len, + EFI_CC_BLOB_GUID); +} + +/* + * Initial set up of SNP relies on information provided by the + * Confidential Computing blob, which can be passed to the boot kernel + * by firmware/bootloader in the following ways: + * + * - via an entry in the EFI config table + * - via a setup_data structure, as defined by the Linux Boot Protocol + * + * Scan for the blob in that order. + */ +static struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp) +{ + struct cc_blob_sev_info *cc_info; + + cc_info = find_cc_blob_efi(bp); + if (cc_info) + goto found_cc_info; + + cc_info = find_cc_blob_setup_data(bp); + if (!cc_info) + return NULL; + +found_cc_info: + if (cc_info->magic != CC_BLOB_SEV_HDR_MAGIC) + sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); + + return cc_info; +} + +/* + * Indicate SNP based on presence of SNP-specific CC blob. Subsequent checks + * will verify the SNP CPUID/MSR bits. + */ +static bool early_snp_init(struct boot_params *bp) +{ + struct cc_blob_sev_info *cc_info; + + if (!bp) + return false; + + cc_info = find_cc_blob(bp); + if (!cc_info) + return false; + + /* + * If a SNP-specific Confidential Computing blob is present, then + * firmware/bootloader have indicated SNP support. Verifying this + * involves CPUID checks which will be more reliable if the SNP + * CPUID table is used. See comments over snp_setup_cpuid_table() for + * more details. + */ + setup_cpuid_table(cc_info); + + /* + * Record the SVSM Calling Area (CA) address if the guest is not + * running at VMPL0. The CA will be used to communicate with the + * SVSM and request its services. + */ + svsm_setup_ca(cc_info); + + /* + * Pass run-time kernel a pointer to CC info via boot_params so EFI + * config table doesn't need to be searched again during early startup + * phase. + */ + bp->cc_blob_address = (u32)(unsigned long)cc_info; + + return true; +} + /* * sev_check_cpu_support - Check for SEV support in the CPU capabilities * @@ -455,7 +568,7 @@ void sev_enable(struct boot_params *bp) bp->cc_blob_address = 0; /* - * Do an initial SEV capability check before snp_init() which + * Do an initial SEV capability check before early_snp_init() which * loads the CPUID page and the same checks afterwards are done * without the hypervisor and are trustworthy. * @@ -470,7 +583,7 @@ void sev_enable(struct boot_params *bp) * Setup/preliminary detection of SNP. This will be sanity-checked * against CPUID/MSR values later. */ - snp = snp_init(bp); + snp = early_snp_init(bp); /* Now repeat the checks with the SNP CPUID table. */ @@ -498,10 +611,32 @@ void sev_enable(struct boot_params *bp) * features. */ if (sev_status & MSR_AMD64_SEV_SNP_ENABLED) { - if (!(get_hv_features() & GHCB_HV_FT_SNP)) + u64 hv_features; + int ret; + + hv_features = get_hv_features(); + if (!(hv_features & GHCB_HV_FT_SNP)) sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); - enforce_vmpl0(); + /* + * Enforce running at VMPL0 or with an SVSM. + * + * Use RMPADJUST (see the rmpadjust() function for a description of + * what the instruction does) to update the VMPL1 permissions of a + * page. If the guest is running at VMPL0, this will succeed. If the + * guest is running at any other VMPL, this will fail. Linux SNP guests + * only ever run at a single VMPL level so permission mask changes of a + * lesser-privileged VMPL are a don't-care. + */ + ret = rmpadjust((unsigned long)&boot_ghcb_page, RMP_PG_SIZE_4K, 1); + + /* + * Running at VMPL0 is not required if an SVSM is present and the hypervisor + * supports the required SVSM GHCB events. + */ + if (ret && + !(snp_vmpl && (hv_features & GHCB_HV_FT_SNP_MULTI_VMPL))) + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_NOT_VMPL0); } if (snp && !(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) @@ -527,85 +662,6 @@ u64 sev_get_status(void) return m.q; } -/* Search for Confidential Computing blob in the EFI config table. */ -static struct cc_blob_sev_info *find_cc_blob_efi(struct boot_params *bp) -{ - unsigned long cfg_table_pa; - unsigned int cfg_table_len; - int ret; - - ret = efi_get_conf_table(bp, &cfg_table_pa, &cfg_table_len); - if (ret) - return NULL; - - return (struct cc_blob_sev_info *)efi_find_vendor_table(bp, cfg_table_pa, - cfg_table_len, - EFI_CC_BLOB_GUID); -} - -/* - * Initial set up of SNP relies on information provided by the - * Confidential Computing blob, which can be passed to the boot kernel - * by firmware/bootloader in the following ways: - * - * - via an entry in the EFI config table - * - via a setup_data structure, as defined by the Linux Boot Protocol - * - * Scan for the blob in that order. - */ -static struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp) -{ - struct cc_blob_sev_info *cc_info; - - cc_info = find_cc_blob_efi(bp); - if (cc_info) - goto found_cc_info; - - cc_info = find_cc_blob_setup_data(bp); - if (!cc_info) - return NULL; - -found_cc_info: - if (cc_info->magic != CC_BLOB_SEV_HDR_MAGIC) - sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); - - return cc_info; -} - -/* - * Indicate SNP based on presence of SNP-specific CC blob. Subsequent checks - * will verify the SNP CPUID/MSR bits. - */ -bool snp_init(struct boot_params *bp) -{ - struct cc_blob_sev_info *cc_info; - - if (!bp) - return false; - - cc_info = find_cc_blob(bp); - if (!cc_info) - return false; - - /* - * If a SNP-specific Confidential Computing blob is present, then - * firmware/bootloader have indicated SNP support. Verifying this - * involves CPUID checks which will be more reliable if the SNP - * CPUID table is used. See comments over snp_setup_cpuid_table() for - * more details. - */ - setup_cpuid_table(cc_info); - - /* - * Pass run-time kernel a pointer to CC info via boot_params so EFI - * config table doesn't need to be searched again during early startup - * phase. - */ - bp->cc_blob_address = (u32)(unsigned long)cc_info; - - return true; -} - void sev_prep_identity_maps(unsigned long top_level_pgt) { /* |