diff options
Diffstat (limited to 'arch/x86/boot/compressed/misc.c')
| -rw-r--r-- | arch/x86/boot/compressed/misc.c | 503 |
1 files changed, 335 insertions, 168 deletions
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 0319c88290a5..0f41ca0e52c0 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -1,8 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 /* * misc.c * - * This is a collection of several routines from gzip-1.0.3 - * adapted for Linux. + * This is a collection of several routines used to extract the kernel + * which includes KASLR relocation, decompression, ELF parsing, and + * relocation processing. Additionally included are the screen and serial + * output functions and related debugging support functions. * * malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994 * puts by Nick Holloway 1993, better puts by Martin Mares 1995 @@ -10,120 +13,52 @@ */ #include "misc.h" - -/* WARNING!! - * This code is compiled with -fPIC and it is relocated dynamically - * at run time, but no relocation processing is performed. - * This means that it is not safe to place pointers in static structures. - */ +#include "error.h" +#include "../string.h" +#include "../voffset.h" +#include <asm/bootparam_utils.h> /* - * Getting to provable safe in place decompression is hard. - * Worst case behaviours need to be analyzed. - * Background information: - * - * The file layout is: - * magic[2] - * method[1] - * flags[1] - * timestamp[4] - * extraflags[1] - * os[1] - * compressed data blocks[N] - * crc[4] orig_len[4] - * - * resulting in 18 bytes of non compressed data overhead. - * - * Files divided into blocks - * 1 bit (last block flag) - * 2 bits (block type) - * - * 1 block occurs every 32K -1 bytes or when there 50% compression - * has been achieved. The smallest block type encoding is always used. - * - * stored: - * 32 bits length in bytes. - * - * fixed: - * magic fixed tree. - * symbols. - * - * dynamic: - * dynamic tree encoding. - * symbols. - * - * - * The buffer for decompression in place is the length of the - * uncompressed data, plus a small amount extra to keep the algorithm safe. - * The compressed data is placed at the end of the buffer. The output - * pointer is placed at the start of the buffer and the input pointer - * is placed where the compressed data starts. Problems will occur - * when the output pointer overruns the input pointer. - * - * The output pointer can only overrun the input pointer if the input - * pointer is moving faster than the output pointer. A condition only - * triggered by data whose compressed form is larger than the uncompressed - * form. - * - * The worst case at the block level is a growth of the compressed data - * of 5 bytes per 32767 bytes. - * - * The worst case internal to a compressed block is very hard to figure. - * The worst case can at least be boundined by having one bit that represents - * 32764 bytes and then all of the rest of the bytes representing the very - * very last byte. - * - * All of which is enough to compute an amount of extra data that is required - * to be safe. To avoid problems at the block level allocating 5 extra bytes - * per 32767 bytes of data is sufficient. To avoind problems internal to a - * block adding an extra 32767 bytes (the worst case uncompressed block size) - * is sufficient, to ensure that in the worst case the decompressed data for - * block will stop the byte before the compressed data for a block begins. - * To avoid problems with the compressed data's meta information an extra 18 - * bytes are needed. Leading to the formula: - * - * extra_bytes = (uncompressed_size >> 12) + 32768 + 18 + decompressor_size. - * - * Adding 8 bytes per 32K is a bit excessive but much easier to calculate. - * Adding 32768 instead of 32767 just makes for round numbers. - * Adding the decompressor_size is necessary as it musht live after all - * of the data as well. Last I measured the decompressor is about 14K. - * 10K of actual data and 4K of bss. - * + * WARNING!! + * This code is compiled with -fPIC and it is relocated dynamically at + * run time, but no relocation processing is performed. This means that + * it is not safe to place pointers in static structures. */ -/* - * gzip declarations - */ +/* Macros used by the included decompressor code below. */ #define STATIC static +/* Define an externally visible malloc()/free(). */ +#define MALLOC_VISIBLE +#include <linux/decompress/mm.h> -#undef memset -#undef memcpy +/* + * Provide definitions of memzero and memmove as some of the decompressors will + * try to define their own functions if these are not defined as macros. + */ #define memzero(s, n) memset((s), 0, (n)) - - -static void error(char *m); +#ifndef memmove +#define memmove memmove +/* Functions used by the included decompressor code below. */ +void *memmove(void *dest, const void *src, size_t n); +#endif /* * This is set up by the setup-routine at boot-time */ -struct boot_params *real_mode; /* Pointer to real-mode data */ +struct boot_params *boot_params_ptr; -void *memset(void *s, int c, size_t n); -void *memcpy(void *dest, const void *src, size_t n); +struct port_io_ops pio_ops; -#ifdef CONFIG_X86_64 -#define memptr long -#else -#define memptr unsigned -#endif - -static memptr free_mem_ptr; -static memptr free_mem_end_ptr; +memptr free_mem_ptr; +memptr free_mem_end_ptr; +int spurious_nmi_count; static char *vidmem; static int vidport; -static int lines, cols; + +/* These might be accessed before .bss is cleared, so use .data instead. */ +static int lines __section(".data"); +static int cols __section(".data"); #ifdef CONFIG_KERNEL_GZIP #include "../../../../lib/decompress_inflate.c" @@ -149,11 +84,19 @@ static int lines, cols; #include "../../../../lib/decompress_unlz4.c" #endif +#ifdef CONFIG_KERNEL_ZSTD +#include "../../../../lib/decompress_unzstd.c" +#endif +/* + * NOTE: When adding a new decompressor, please update the analysis in + * ../header.S. + */ + static void scroll(void) { int i; - memcpy(vidmem, vidmem + cols * 2, (lines - 1) * cols * 2); + memmove(vidmem, vidmem + cols * 2, (lines - 1) * cols * 2); for (i = (lines - 1) * cols * 2; i < lines * cols * 2; i += 2) vidmem[i] = ' '; } @@ -186,12 +129,11 @@ void __putstr(const char *s) } } - if (real_mode->screen_info.orig_video_mode == 0 && - lines == 0 && cols == 0) + if (lines == 0 || cols == 0) return; - x = real_mode->screen_info.orig_x; - y = real_mode->screen_info.orig_y; + x = boot_params_ptr->screen_info.orig_x; + y = boot_params_ptr->screen_info.orig_y; while ((c = *s++) != '\0') { if (c == '\n') { @@ -212,8 +154,8 @@ void __putstr(const char *s) } } - real_mode->screen_info.orig_x = x; - real_mode->screen_info.orig_y = y; + boot_params_ptr->screen_info.orig_x = x; + boot_params_ptr->screen_info.orig_y = y; pos = (x + cols * y) * 2; /* Update cursor position */ outb(14, vidport); @@ -222,56 +164,121 @@ void __putstr(const char *s) outb(0xff & (pos >> 1), vidport+1); } -void *memset(void *s, int c, size_t n) +static noinline void __putnum(unsigned long value, unsigned int base, + int mindig) { - int i; - char *ss = s; + char buf[8*sizeof(value)+1]; + char *p; + + p = buf + sizeof(buf); + *--p = '\0'; + + while (mindig-- > 0 || value) { + unsigned char digit = value % base; + digit += (digit >= 10) ? ('a'-10) : '0'; + *--p = digit; - for (i = 0; i < n; i++) - ss[i] = c; - return s; + value /= base; + } + + __putstr(p); } -#ifdef CONFIG_X86_32 -void *memcpy(void *dest, const void *src, size_t n) + +void __puthex(unsigned long value) { - int d0, d1, d2; - asm volatile( - "rep ; movsl\n\t" - "movl %4,%%ecx\n\t" - "rep ; movsb\n\t" - : "=&c" (d0), "=&D" (d1), "=&S" (d2) - : "0" (n >> 2), "g" (n & 3), "1" (dest), "2" (src) - : "memory"); - - return dest; + __putnum(value, 16, sizeof(value)*2); } -#else -void *memcpy(void *dest, const void *src, size_t n) + +void __putdec(unsigned long value) { - long d0, d1, d2; - asm volatile( - "rep ; movsq\n\t" - "movq %4,%%rcx\n\t" - "rep ; movsb\n\t" - : "=&c" (d0), "=&D" (d1), "=&S" (d2) - : "0" (n >> 3), "g" (n & 7), "1" (dest), "2" (src) - : "memory"); - - return dest; + __putnum(value, 10, 1); } -#endif -static void error(char *x) +#ifdef CONFIG_X86_NEED_RELOCS +static void handle_relocations(void *output, unsigned long output_len, + unsigned long virt_addr) { - error_putstr("\n\n"); - error_putstr(x); - error_putstr("\n\n -- System halted"); + int *reloc; + unsigned long delta, map, ptr; + unsigned long min_addr = (unsigned long)output; + unsigned long max_addr = min_addr + (VO___bss_start - VO__text); + + /* + * Calculate the delta between where vmlinux was linked to load + * and where it was actually loaded. + */ + delta = min_addr - LOAD_PHYSICAL_ADDR; + + /* + * The kernel contains a table of relocation addresses. Those + * addresses have the final load address of the kernel in virtual + * memory. We are currently working in the self map. So we need to + * create an adjustment for kernel memory addresses to the self map. + * This will involve subtracting out the base address of the kernel. + */ + map = delta - __START_KERNEL_map; + + /* + * 32-bit always performs relocations. 64-bit relocations are only + * needed if KASLR has chosen a different starting address offset + * from __START_KERNEL_map. + */ + if (IS_ENABLED(CONFIG_X86_64)) + delta = virt_addr - LOAD_PHYSICAL_ADDR; + + if (!delta) { + debug_putstr("No relocation needed... "); + return; + } + debug_putstr("Performing relocations... "); + + /* + * Process relocations: 32 bit relocations first then 64 bit after. + * Two sets of binary relocations are added to the end of the kernel + * before compression. Each relocation table entry is the kernel + * address of the location which needs to be updated stored as a + * 32-bit value which is sign extended to 64 bits. + * + * Format is: + * + * kernel bits... + * 0 - zero terminator for 64 bit relocations + * 64 bit relocation repeated + * 0 - zero terminator for 32 bit relocations + * 32 bit relocation repeated + * + * So we work backwards from the end of the decompressed image. + */ + for (reloc = output + output_len - sizeof(*reloc); *reloc; reloc--) { + long extended = *reloc; + extended += map; + + ptr = (unsigned long)extended; + if (ptr < min_addr || ptr > max_addr) + error("32-bit relocation outside of kernel!\n"); + + *(uint32_t *)ptr += delta; + } +#ifdef CONFIG_X86_64 + for (reloc--; *reloc; reloc--) { + long extended = *reloc; + extended += map; - while (1) - asm("hlt"); + ptr = (unsigned long)extended; + if (ptr < min_addr || ptr > max_addr) + error("64-bit relocation outside of kernel!\n"); + + *(uint64_t *)ptr += delta; + } +#endif } +#else +static inline void handle_relocations(void *output, unsigned long output_len, + unsigned long virt_addr) +{ } +#endif -static void parse_elf(void *output) +static size_t parse_elf(void *output) { #ifdef CONFIG_X86_64 Elf64_Ehdr ehdr; @@ -287,10 +294,8 @@ static void parse_elf(void *output) if (ehdr.e_ident[EI_MAG0] != ELFMAG0 || ehdr.e_ident[EI_MAG1] != ELFMAG1 || ehdr.e_ident[EI_MAG2] != ELFMAG2 || - ehdr.e_ident[EI_MAG3] != ELFMAG3) { + ehdr.e_ident[EI_MAG3] != ELFMAG3) error("Kernel is not a valid ELF file"); - return; - } debug_putstr("Parsing ELF... "); @@ -305,33 +310,118 @@ static void parse_elf(void *output) switch (phdr->p_type) { case PT_LOAD: +#ifdef CONFIG_X86_64 + if ((phdr->p_align % 0x200000) != 0) + error("Alignment of LOAD segment isn't multiple of 2MB"); +#endif #ifdef CONFIG_RELOCATABLE dest = output; dest += (phdr->p_paddr - LOAD_PHYSICAL_ADDR); #else dest = (void *)(phdr->p_paddr); #endif - memcpy(dest, - output + phdr->p_offset, - phdr->p_filesz); + memmove(dest, output + phdr->p_offset, phdr->p_filesz); break; default: /* Ignore other PT_* */ break; } } free(phdrs); + + return ehdr.e_entry - LOAD_PHYSICAL_ADDR; +} + +const unsigned long kernel_text_size = VO___start_rodata - VO__text; +const unsigned long kernel_inittext_offset = VO__sinittext - VO__text; +const unsigned long kernel_inittext_size = VO___inittext_end - VO__sinittext; +const unsigned long kernel_total_size = VO__end - VO__text; + +static u8 boot_heap[BOOT_HEAP_SIZE] __aligned(4); + +extern unsigned char input_data[]; +extern unsigned int input_len, output_len; + +unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr, + void (*error)(char *x)) +{ + unsigned long entry; + + if (!free_mem_ptr) { + free_mem_ptr = (unsigned long)boot_heap; + free_mem_end_ptr = (unsigned long)boot_heap + sizeof(boot_heap); + } + + if (__decompress(input_data, input_len, NULL, NULL, outbuf, output_len, + NULL, error) < 0) + return ULONG_MAX; + + entry = parse_elf(outbuf); + handle_relocations(outbuf, output_len, virt_addr); + + return entry; } -asmlinkage void decompress_kernel(void *rmode, memptr heap, - unsigned char *input_data, - unsigned long input_len, - unsigned char *output) +/* + * Set the memory encryption xloadflag based on the mem_encrypt= command line + * parameter, if provided. + */ +static void parse_mem_encrypt(struct setup_header *hdr) +{ + int on = cmdline_find_option_bool("mem_encrypt=on"); + int off = cmdline_find_option_bool("mem_encrypt=off"); + + if (on > off) + hdr->xloadflags |= XLF_MEM_ENCRYPTION; +} + +static void early_sev_detect(void) +{ + /* + * Accessing video memory causes guest termination because + * the boot stage2 #VC handler of SEV-ES/SNP guests does not + * support MMIO handling and kexec -c adds screen_info to the + * boot parameters passed to the kexec kernel, which causes + * console output to be dumped to both video and serial. + */ + if (sev_status & MSR_AMD64_SEV_ES_ENABLED) + lines = cols = 0; +} + +/* + * The compressed kernel image (ZO), has been moved so that its position + * is against the end of the buffer used to hold the uncompressed kernel + * image (VO) and the execution environment (.bss, .brk), which makes sure + * there is room to do the in-place decompression. (See header.S for the + * calculations.) + * + * |-----compressed kernel image------| + * V V + * 0 extract_offset +INIT_SIZE + * |-----------|---------------|-------------------------|--------| + * | | | | + * VO__text startup_32 of ZO VO__end ZO__end + * ^ ^ + * |-------uncompressed kernel image---------| + * + */ +asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output) { - real_mode = rmode; + unsigned long virt_addr = LOAD_PHYSICAL_ADDR; + memptr heap = (memptr)boot_heap; + unsigned long needed_size; + size_t entry_offset; + + /* Retain x86 boot parameters pointer passed from startup_32/64. */ + boot_params_ptr = rmode; + + /* Clear flags intended for solely in-kernel use. */ + boot_params_ptr->hdr.loadflags &= ~KASLR_FLAG; - sanitize_boot_params(real_mode); + parse_mem_encrypt(&boot_params_ptr->hdr); - if (real_mode->screen_info.orig_video_mode == 7) { + sanitize_boot_params(boot_params_ptr); + + if (boot_params_ptr->screen_info.orig_video_mode == 7) { vidmem = (char *) 0xb0000; vidport = 0x3b4; } else { @@ -339,32 +429,109 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, vidport = 0x3d4; } - lines = real_mode->screen_info.orig_video_lines; - cols = real_mode->screen_info.orig_video_cols; + lines = boot_params_ptr->screen_info.orig_video_lines; + cols = boot_params_ptr->screen_info.orig_video_cols; + + init_default_io_ops(); + + /* + * Detect TDX guest environment. + * + * It has to be done before console_init() in order to use + * paravirtualized port I/O operations if needed. + */ + early_tdx_detect(); + + early_sev_detect(); console_init(); - debug_putstr("early console in decompress_kernel\n"); + + /* + * Save RSDP address for later use. Have this after console_init() + * so that early debugging output from the RSDP parsing code can be + * collected. + */ + boot_params_ptr->acpi_rsdp_addr = get_rsdp_addr(); + + debug_putstr("early console in extract_kernel\n"); free_mem_ptr = heap; /* Heap */ free_mem_end_ptr = heap + BOOT_HEAP_SIZE; + /* + * The memory hole needed for the kernel is the larger of either + * the entire decompressed kernel plus relocation table, or the + * entire decompressed kernel plus .bss and .brk sections. + * + * On X86_64, the memory is mapped with PMD pages. Round the + * size up so that the full extent of PMD pages mapped is + * included in the check against the valid memory table + * entries. This ensures the full mapped area is usable RAM + * and doesn't include any reserved areas. + */ + needed_size = max_t(unsigned long, output_len, kernel_total_size); +#ifdef CONFIG_X86_64 + needed_size = ALIGN(needed_size, MIN_KERNEL_ALIGN); +#endif + + /* Report initial kernel position details. */ + debug_putaddr(input_data); + debug_putaddr(input_len); + debug_putaddr(output); + debug_putaddr(output_len); + debug_putaddr(kernel_total_size); + debug_putaddr(needed_size); + +#ifdef CONFIG_X86_64 + /* Report address of 32-bit trampoline */ + debug_putaddr(trampoline_32bit); +#endif + + choose_random_location((unsigned long)input_data, input_len, + (unsigned long *)&output, + needed_size, + &virt_addr); + + /* Validate memory location choices. */ if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1)) - error("Destination address inappropriately aligned"); + error("Destination physical address inappropriately aligned"); + if (virt_addr & (MIN_KERNEL_ALIGN - 1)) + error("Destination virtual address inappropriately aligned"); #ifdef CONFIG_X86_64 if (heap > 0x3fffffffffffUL) error("Destination address too large"); + if (virt_addr + needed_size > KERNEL_IMAGE_SIZE) + error("Destination virtual address is beyond the kernel mapping area"); #else if (heap > ((-__PAGE_OFFSET-(128<<20)-1) & 0x7fffffff)) error("Destination address too large"); #endif #ifndef CONFIG_RELOCATABLE - if ((unsigned long)output != LOAD_PHYSICAL_ADDR) - error("Wrong destination address"); + if (virt_addr != LOAD_PHYSICAL_ADDR) + error("Destination virtual address changed when not relocatable"); #endif debug_putstr("\nDecompressing Linux... "); - decompress(input_data, input_len, NULL, NULL, output, NULL, error); - parse_elf(output); - debug_putstr("done.\nBooting the kernel.\n"); - return; + + if (init_unaccepted_memory()) { + debug_putstr("Accepting memory... "); + accept_memory(__pa(output), needed_size); + } + + entry_offset = decompress_kernel(output, virt_addr, error); + + debug_putstr("done.\nBooting the kernel (entry_offset: 0x"); + debug_puthex(entry_offset); + debug_putstr(").\n"); + + /* Disable exception handling before booting the kernel */ + cleanup_exception_handling(); + + if (spurious_nmi_count) { + error_putstr("Spurious early NMIs ignored: "); + error_putdec(spurious_nmi_count); + error_putstr("\n"); + } + + return output + entry_offset; } |
