diff options
Diffstat (limited to 'arch/s390/boot')
34 files changed, 3166 insertions, 1383 deletions
diff --git a/arch/s390/boot/.gitignore b/arch/s390/boot/.gitignore index 16ff906e4610..af2a6a7bc028 100644 --- a/arch/s390/boot/.gitignore +++ b/arch/s390/boot/.gitignore @@ -1,3 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0-only image bzImage +relocs.S section_cmp.* +vmlinux +vmlinux.lds +vmlinux.map +vmlinux.syms diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index e2c47d3a1c89..bee49626be4b 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -3,53 +3,53 @@ # Makefile for the linux s390-specific parts of the memory manager. # +# Tooling runtimes are unavailable and cannot be linked for early boot code KCOV_INSTRUMENT := n GCOV_PROFILE := n UBSAN_SANITIZE := n KASAN_SANITIZE := n - -KBUILD_AFLAGS := $(KBUILD_AFLAGS_DECOMPRESSOR) -KBUILD_CFLAGS := $(KBUILD_CFLAGS_DECOMPRESSOR) +KCSAN_SANITIZE := n +KMSAN_SANITIZE := n # -# Use minimum architecture for als.c to be able to print an error +# Use minimum architecture level so it is possible to print an error # message if the kernel is started on a machine which is too old # -ifndef CONFIG_CC_IS_CLANG -CC_FLAGS_MARCH_MINIMUM := -march=z900 -else CC_FLAGS_MARCH_MINIMUM := -march=z10 -endif - -ifneq ($(CC_FLAGS_MARCH),$(CC_FLAGS_MARCH_MINIMUM)) -AFLAGS_REMOVE_head.o += $(CC_FLAGS_MARCH) -AFLAGS_head.o += $(CC_FLAGS_MARCH_MINIMUM) -AFLAGS_REMOVE_mem.o += $(CC_FLAGS_MARCH) -AFLAGS_mem.o += $(CC_FLAGS_MARCH_MINIMUM) -CFLAGS_REMOVE_als.o += $(CC_FLAGS_MARCH) -CFLAGS_als.o += $(CC_FLAGS_MARCH_MINIMUM) -CFLAGS_REMOVE_sclp_early_core.o += $(CC_FLAGS_MARCH) -CFLAGS_sclp_early_core.o += $(CC_FLAGS_MARCH_MINIMUM) -endif + +KBUILD_AFLAGS := $(filter-out $(CC_FLAGS_MARCH),$(KBUILD_AFLAGS_DECOMPRESSOR)) +KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_MARCH),$(KBUILD_CFLAGS_DECOMPRESSOR)) +KBUILD_AFLAGS += $(CC_FLAGS_MARCH_MINIMUM) +KBUILD_CFLAGS += $(CC_FLAGS_MARCH_MINIMUM) CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char -obj-y := head.o als.o startup.o mem_detect.o ipl_parm.o ipl_report.o +obj-y := head.o als.o startup.o physmem_info.o ipl_parm.o ipl_report.o vmem.o obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o -obj-y += version.o pgm_check_info.o ctype.o text_dma.o -obj-$(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) += uv.o -obj-$(CONFIG_RELOCATABLE) += machine_kexec_reloc.o +obj-y += version.o pgm_check.o ctype.o ipl_data.o relocs.o alternative.o +obj-y += uv.o printk.o obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o -targets := bzImage startup.a section_cmp.boot.data section_cmp.boot.preserved.data $(obj-y) -subdir- := compressed +obj-y += $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o +obj-$(CONFIG_KERNEL_ZSTD) += clz_ctz.o +obj-$(CONFIG_KMSAN) += kmsan.o +obj-all := $(obj-y) piggy.o syms.o + +targets := bzImage section_cmp.boot.data section_cmp.boot.preserved.data $(obj-y) +targets += vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 +targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4 +targets += vmlinux.bin.zst info.bin syms.bin vmlinux.syms $(obj-all) +targets += relocs.S OBJECTS := $(addprefix $(obj)/,$(obj-y)) +OBJECTS_ALL := $(addprefix $(obj)/,$(obj-all)) + +clean-files += vmlinux.map quiet_cmd_section_cmp = SECTCMP $* define cmd_section_cmp - s1=`$(OBJDUMP) -t -j "$*" "$<" | sort | \ + s1=`$(OBJDUMP) -t "$<" | grep "\s$*\s\+" | sort | \ sed -n "/0000000000000000/! s/.*\s$*\s\+//p" | sha256sum`; \ - s2=`$(OBJDUMP) -t -j "$*" "$(word 2,$^)" | sort | \ + s2=`$(OBJDUMP) -t "$(word 2,$^)" | grep "\s$*\s\+" | sort | \ sed -n "/0000000000000000/! s/.*\s$*\s\+//p" | sha256sum`; \ if [ "$$s1" != "$$s2" ]; then \ echo "error: section $* differs between $< and $(word 2,$^)" >&2; \ @@ -58,22 +58,72 @@ define cmd_section_cmp touch $@ endef -$(obj)/bzImage: $(obj)/compressed/vmlinux $(obj)/section_cmp.boot.data $(obj)/section_cmp.boot.preserved.data FORCE +$(obj)/bzImage: $(obj)/vmlinux $(obj)/section_cmp.boot.data $(obj)/section_cmp.boot.preserved.data FORCE $(call if_changed,objcopy) -$(obj)/section_cmp%: vmlinux $(obj)/compressed/vmlinux FORCE +$(obj)/section_cmp%: vmlinux $(obj)/vmlinux FORCE $(call if_changed,section_cmp) -$(obj)/compressed/vmlinux: $(obj)/startup.a FORCE - $(Q)$(MAKE) $(build)=$(obj)/compressed $@ +LDFLAGS_vmlinux-$(CONFIG_LD_ORPHAN_WARN) := --orphan-handling=$(CONFIG_LD_ORPHAN_WARN_LEVEL) +LDFLAGS_vmlinux := $(LDFLAGS_vmlinux-y) --oformat $(LD_BFD) -e startup $(if $(CONFIG_VMLINUX_MAP),-Map=$(obj)/vmlinux.map) --build-id=sha1 -T +$(obj)/vmlinux: $(obj)/vmlinux.lds $(OBJECTS_ALL) FORCE + $(call if_changed,ld) + +LDFLAGS_vmlinux.syms := $(LDFLAGS_vmlinux-y) --oformat $(LD_BFD) -e startup -T +$(obj)/vmlinux.syms: $(obj)/vmlinux.lds $(OBJECTS) FORCE + $(call if_changed,ld) + +quiet_cmd_dumpsyms = DUMPSYMS $< +define cmd_dumpsyms + $(NM) -n -S --format=bsd "$<" | sed -nE 's/^0*([0-9a-fA-F]+) 0*([0-9a-fA-F]+) [tT] ([^ ]*)$$/\1 \2 \3/p' | tr '\n' '\0' > "$@" +endef + +$(obj)/syms.bin: $(obj)/vmlinux.syms FORCE + $(call if_changed,dumpsyms) + +OBJCOPYFLAGS_syms.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.decompressor.syms +$(obj)/syms.o: $(obj)/syms.bin FORCE + $(call if_changed,objcopy) -$(obj)/startup.a: $(OBJECTS) FORCE - $(call if_changed,ar) +OBJCOPYFLAGS_info.bin := -O binary --only-section=.vmlinux.info --set-section-flags .vmlinux.info=alloc,load +$(obj)/info.bin: vmlinux FORCE + $(call if_changed,objcopy) -install: $(CONFIGURE) $(obj)/bzImage - sh -x $(srctree)/$(obj)/install.sh $(KERNELRELEASE) $(obj)/bzImage \ - System.map "$(INSTALL_PATH)" +OBJCOPYFLAGS_info.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.vmlinux.info +$(obj)/info.o: $(obj)/info.bin FORCE + $(call if_changed,objcopy) + +OBJCOPYFLAGS_vmlinux.bin := -O binary --remove-section=.comment --remove-section=.vmlinux.info -S +$(obj)/vmlinux.bin: vmlinux FORCE + $(call if_changed,objcopy) -chkbss := $(obj-y) -chkbss-target := startup.a -include $(srctree)/arch/s390/scripts/Makefile.chkbss +# relocs.S is created by the vmlinux postlink step. +$(obj)/relocs.S: vmlinux + @true + +suffix-$(CONFIG_KERNEL_GZIP) := .gz +suffix-$(CONFIG_KERNEL_BZIP2) := .bz2 +suffix-$(CONFIG_KERNEL_LZ4) := .lz4 +suffix-$(CONFIG_KERNEL_LZMA) := .lzma +suffix-$(CONFIG_KERNEL_LZO) := .lzo +suffix-$(CONFIG_KERNEL_XZ) := .xz +suffix-$(CONFIG_KERNEL_ZSTD) := .zst + +$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE + $(call if_changed,gzip) +$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin FORCE + $(call if_changed,bzip2_with_size) +$(obj)/vmlinux.bin.lz4: $(obj)/vmlinux.bin FORCE + $(call if_changed,lz4_with_size) +$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin FORCE + $(call if_changed,lzma_with_size) +$(obj)/vmlinux.bin.lzo: $(obj)/vmlinux.bin FORCE + $(call if_changed,lzo_with_size) +$(obj)/vmlinux.bin.xz: $(obj)/vmlinux.bin FORCE + $(call if_changed,xzkern_with_size) +$(obj)/vmlinux.bin.zst: $(obj)/vmlinux.bin FORCE + $(call if_changed,zstd22_with_size) + +OBJCOPYFLAGS_piggy.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.vmlinux.bin.compressed +$(obj)/piggy.o: $(obj)/vmlinux.bin$(suffix-y) FORCE + $(call if_changed,objcopy) diff --git a/arch/s390/boot/als.c b/arch/s390/boot/als.c index ff6801d401c4..79afb5fa7f1f 100644 --- a/arch/s390/boot/als.c +++ b/arch/s390/boot/als.c @@ -9,42 +9,8 @@ #include <asm/sclp.h> #include "boot.h" -/* - * The code within this file will be called very early. It may _not_ - * access anything within the bss section, since that is not cleared - * yet and may contain data (e.g. initrd) that must be saved by other - * code. - * For temporary objects the stack (16k) should be used. - */ - static unsigned long als[] = { FACILITIES_ALS }; -static void u16_to_hex(char *str, u16 val) -{ - int i, num; - - for (i = 1; i <= 4; i++) { - num = (val >> (16 - 4 * i)) & 0xf; - if (num >= 10) - num += 7; - *str++ = '0' + num; - } - *str = '\0'; -} - -static void print_machine_type(void) -{ - static char mach_str[80] = "Detected machine-type number: "; - char type_str[5]; - struct cpuid id; - - get_cpu_id(&id); - u16_to_hex(type_str, id.machine); - strcat(mach_str, type_str); - strcat(mach_str, "\n"); - sclp_early_printk(mach_str); -} - static void u16_to_decimal(char *str, u16 val) { int div = 1; @@ -68,7 +34,7 @@ void print_missing_facilities(void) first = 1; for (i = 0; i < ARRAY_SIZE(als); i++) { - val = ~S390_lowcore.stfle_fac_list[i] & als[i]; + val = ~stfle_fac_list[i] & als[i]; for (j = 0; j < BITS_PER_LONG; j++) { if (!(val & (1UL << (BITS_PER_LONG - 1 - j)))) continue; @@ -80,8 +46,7 @@ void print_missing_facilities(void) * z/VM adds a four character prefix. */ if (strlen(als_str) > 70) { - strcat(als_str, "\n"); - sclp_early_printk(als_str); + boot_emerg("%s\n", als_str); *als_str = '\0'; } u16_to_decimal(val_str, i * BITS_PER_LONG + j); @@ -89,16 +54,18 @@ void print_missing_facilities(void) first = 0; } } - strcat(als_str, "\n"); - sclp_early_printk(als_str); + boot_emerg("%s\n", als_str); } static void facility_mismatch(void) { - sclp_early_printk("The Linux kernel requires more recent processor hardware\n"); - print_machine_type(); + struct cpuid id; + + get_cpu_id(&id); + boot_emerg("The Linux kernel requires more recent processor hardware\n"); + boot_emerg("Detected machine-type number: %4x\n", id.machine); print_missing_facilities(); - sclp_early_printk("See Principles of Operations for facility bits\n"); + boot_emerg("See Principles of Operations for facility bits\n"); disabled_wait(); } @@ -106,9 +73,9 @@ void verify_facilities(void) { int i; - __stfle(S390_lowcore.stfle_fac_list, ARRAY_SIZE(S390_lowcore.stfle_fac_list)); + __stfle(stfle_fac_list, ARRAY_SIZE(stfle_fac_list)); for (i = 0; i < ARRAY_SIZE(als); i++) { - if ((S390_lowcore.stfle_fac_list[i] & als[i]) != als[i]) + if ((stfle_fac_list[i] & als[i]) != als[i]) facility_mismatch(); } } diff --git a/arch/s390/boot/alternative.c b/arch/s390/boot/alternative.c new file mode 100644 index 000000000000..19ea7934b918 --- /dev/null +++ b/arch/s390/boot/alternative.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0 +#define boot_fmt(fmt) "alt: " fmt +#include "boot.h" + +#define a_debug boot_debug + +#include "../kernel/alternative.c" + +static void alt_debug_all(int type) +{ + int i; + + switch (type) { + case ALT_TYPE_FACILITY: + for (i = 0; i < ARRAY_SIZE(alt_debug.facilities); i++) + alt_debug.facilities[i] = -1UL; + break; + case ALT_TYPE_FEATURE: + for (i = 0; i < ARRAY_SIZE(alt_debug.mfeatures); i++) + alt_debug.mfeatures[i] = -1UL; + break; + case ALT_TYPE_SPEC: + alt_debug.spec = 1; + break; + } +} + +static void alt_debug_modify(int type, unsigned int nr, bool clear) +{ + switch (type) { + case ALT_TYPE_FACILITY: + if (clear) + __clear_facility(nr, alt_debug.facilities); + else + __set_facility(nr, alt_debug.facilities); + break; + case ALT_TYPE_FEATURE: + if (clear) + __clear_machine_feature(nr, alt_debug.mfeatures); + else + __set_machine_feature(nr, alt_debug.mfeatures); + break; + } +} + +static char *alt_debug_parse(int type, char *str) +{ + unsigned long val, endval; + char *endp; + bool clear; + int i; + + if (*str == ':') { + str++; + } else { + alt_debug_all(type); + return str; + } + clear = false; + if (*str == '!') { + alt_debug_all(type); + clear = true; + str++; + } + while (*str) { + val = simple_strtoull(str, &endp, 0); + if (str == endp) + break; + str = endp; + if (*str == '-') { + str++; + endval = simple_strtoull(str, &endp, 0); + if (str == endp) + break; + str = endp; + while (val <= endval) { + alt_debug_modify(type, val, clear); + val++; + } + } else { + alt_debug_modify(type, val, clear); + } + if (*str != ',') + break; + str++; + } + return str; +} + +/* + * Use debug-alternative command line parameter for debugging: + * "debug-alternative" + * -> print debug message for every single alternative + * + * "debug-alternative=0;2" + * -> print debug message for all alternatives with type 0 and 2 + * + * "debug-alternative=0:0-7" + * -> print debug message for all alternatives with type 0 and with + * facility numbers within the range of 0-7 + * (if type 0 is ALT_TYPE_FACILITY) + * + * "debug-alternative=0:!8;1" + * -> print debug message for all alternatives with type 0, for all + * facility number, except facility 8, and in addition print all + * alternatives with type 1 + */ +void alt_debug_setup(char *str) +{ + unsigned long type; + char *endp; + int i; + + if (!str) { + alt_debug_all(ALT_TYPE_FACILITY); + alt_debug_all(ALT_TYPE_FEATURE); + alt_debug_all(ALT_TYPE_SPEC); + return; + } + while (*str) { + type = simple_strtoull(str, &endp, 0); + if (str == endp) + break; + str = endp; + switch (type) { + case ALT_TYPE_FACILITY: + case ALT_TYPE_FEATURE: + str = alt_debug_parse(type, str); + break; + case ALT_TYPE_SPEC: + alt_debug_all(ALT_TYPE_SPEC); + break; + } + if (*str != ';') + break; + str++; + } +} diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index 2ea603f70c3b..e045cae6e80a 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -2,20 +2,124 @@ #ifndef BOOT_BOOT_H #define BOOT_BOOT_H +#include <linux/types.h> + +#define IPL_START 0x200 + +#ifndef __ASSEMBLY__ + +#include <linux/printk.h> +#include <asm/physmem_info.h> + +struct vmlinux_info { + unsigned long entry; + unsigned long image_size; /* does not include .bss */ + unsigned long bss_size; /* uncompressed image .bss size */ + unsigned long bootdata_off; + unsigned long bootdata_size; + unsigned long bootdata_preserved_off; + unsigned long bootdata_preserved_size; + unsigned long got_start; + unsigned long got_end; + unsigned long amode31_size; + unsigned long init_mm_off; + unsigned long swapper_pg_dir_off; + unsigned long invalid_pg_dir_off; + unsigned long alt_instructions; + unsigned long alt_instructions_end; +#ifdef CONFIG_KASAN + unsigned long kasan_early_shadow_page_off; + unsigned long kasan_early_shadow_pte_off; + unsigned long kasan_early_shadow_pmd_off; + unsigned long kasan_early_shadow_pud_off; + unsigned long kasan_early_shadow_p4d_off; +#endif +}; + void startup_kernel(void); -void detect_memory(void); +unsigned long detect_max_physmem_end(void); +void detect_physmem_online_ranges(unsigned long max_physmem_end); +void physmem_set_usable_limit(unsigned long limit); +void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size); +void physmem_free(enum reserved_range_type type); +/* for continuous/multiple allocations per type */ +unsigned long physmem_alloc_or_die(enum reserved_range_type type, unsigned long size, + unsigned long align); +unsigned long physmem_alloc(enum reserved_range_type type, unsigned long size, + unsigned long align, bool die_on_oom); +/* for single allocations, 1 per type */ +unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long size, + unsigned long align, unsigned long min, unsigned long max, + bool die_on_oom); +unsigned long get_physmem_alloc_pos(void); +void dump_physmem_reserved(void); +bool ipl_report_certs_intersects(unsigned long addr, unsigned long size, + unsigned long *intersection_start); +bool is_ipl_block_dump(void); void store_ipl_parmblock(void); +int read_ipl_report(void); +void save_ipl_cert_comp_list(void); void setup_boot_command_line(void); void parse_boot_command_line(void); -void setup_memory_end(void); void verify_facilities(void); void print_missing_facilities(void); -void print_pgm_check_info(void); -unsigned long get_random_base(unsigned long safe_addr); +void sclp_early_setup_buffer(void); +void alt_debug_setup(char *str); +void do_pgm_check(struct pt_regs *regs); +unsigned long randomize_within_range(unsigned long size, unsigned long align, + unsigned long min, unsigned long max); +void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned long asce_limit); +int __printf(1, 2) boot_printk(const char *fmt, ...); +void print_stacktrace(unsigned long sp); +void error(char *m); +int get_random(unsigned long limit, unsigned long *value); +void boot_rb_dump(void); -extern int kaslr_enabled; +#ifndef boot_fmt +#define boot_fmt(fmt) fmt +#endif + +#define boot_emerg(fmt, ...) boot_printk(KERN_EMERG boot_fmt(fmt), ##__VA_ARGS__) +#define boot_alert(fmt, ...) boot_printk(KERN_ALERT boot_fmt(fmt), ##__VA_ARGS__) +#define boot_crit(fmt, ...) boot_printk(KERN_CRIT boot_fmt(fmt), ##__VA_ARGS__) +#define boot_err(fmt, ...) boot_printk(KERN_ERR boot_fmt(fmt), ##__VA_ARGS__) +#define boot_warn(fmt, ...) boot_printk(KERN_WARNING boot_fmt(fmt), ##__VA_ARGS__) +#define boot_notice(fmt, ...) boot_printk(KERN_NOTICE boot_fmt(fmt), ##__VA_ARGS__) +#define boot_info(fmt, ...) boot_printk(KERN_INFO boot_fmt(fmt), ##__VA_ARGS__) +#define boot_debug(fmt, ...) boot_printk(KERN_DEBUG boot_fmt(fmt), ##__VA_ARGS__) + +extern struct machine_info machine; +extern int boot_console_loglevel; +extern bool boot_ignore_loglevel; + +/* Symbols defined by linker scripts */ extern const char kernel_version[]; +extern unsigned long memory_limit; +extern unsigned long vmalloc_size; +extern int vmalloc_size_set; +extern char __boot_data_start[], __boot_data_end[]; +extern char __boot_data_preserved_start[], __boot_data_preserved_end[]; +extern char __vmlinux_relocs_64_start[], __vmlinux_relocs_64_end[]; +extern char _decompressor_syms_start[], _decompressor_syms_end[]; +extern char _stack_start[], _stack_end[]; +extern char _end[], _decompressor_end[]; +extern unsigned char _compressed_start[]; +extern unsigned char _compressed_end[]; +extern struct vmlinux_info _vmlinux_info; + +#define vmlinux _vmlinux_info -unsigned long read_ipl_report(unsigned long safe_offset); +#define __lowcore_pa(x) ((unsigned long)(x) % sizeof(struct lowcore)) +#define __abs_lowcore_pa(x) (((unsigned long)(x) - __abs_lowcore) % sizeof(struct lowcore)) +#define __kernel_va(x) ((void *)((unsigned long)(x) - __kaslr_offset_phys + __kaslr_offset)) +#define __kernel_pa(x) ((unsigned long)(x) - __kaslr_offset + __kaslr_offset_phys) +#define __identity_va(x) ((void *)((unsigned long)(x) + __identity_base)) +#define __identity_pa(x) ((unsigned long)(x) - __identity_base) +static inline bool intersects(unsigned long addr0, unsigned long size0, + unsigned long addr1, unsigned long size1) +{ + return addr0 + size0 > addr1 && addr1 + size1 > addr0; +} +#endif /* __ASSEMBLY__ */ #endif /* BOOT_BOOT_H */ diff --git a/arch/s390/boot/clz_ctz.c b/arch/s390/boot/clz_ctz.c new file mode 100644 index 000000000000..c3ebf248596b --- /dev/null +++ b/arch/s390/boot/clz_ctz.c @@ -0,0 +1,2 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../../../../lib/clz_ctz.c" diff --git a/arch/s390/boot/compressed/.gitignore b/arch/s390/boot/compressed/.gitignore deleted file mode 100644 index e72fcd7ecebb..000000000000 --- a/arch/s390/boot/compressed/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -vmlinux -vmlinux.lds diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile deleted file mode 100644 index fa529c5b4486..000000000000 --- a/arch/s390/boot/compressed/Makefile +++ /dev/null @@ -1,68 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# -# linux/arch/s390/boot/compressed/Makefile -# -# create a compressed vmlinux image from the original vmlinux -# - -KCOV_INSTRUMENT := n -GCOV_PROFILE := n -UBSAN_SANITIZE := n -KASAN_SANITIZE := n - -obj-y := $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) piggy.o info.o -targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 -targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4 -targets += info.bin $(obj-y) - -KBUILD_AFLAGS := $(KBUILD_AFLAGS_DECOMPRESSOR) -KBUILD_CFLAGS := $(KBUILD_CFLAGS_DECOMPRESSOR) -OBJCOPYFLAGS := - -OBJECTS := $(addprefix $(obj)/,$(obj-y)) - -LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup -T -$(obj)/vmlinux: $(obj)/vmlinux.lds $(objtree)/arch/s390/boot/startup.a $(OBJECTS) FORCE - $(call if_changed,ld) - -OBJCOPYFLAGS_info.bin := -O binary --only-section=.vmlinux.info --set-section-flags .vmlinux.info=load -$(obj)/info.bin: vmlinux FORCE - $(call if_changed,objcopy) - -OBJCOPYFLAGS_info.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.vmlinux.info -$(obj)/info.o: $(obj)/info.bin FORCE - $(call if_changed,objcopy) - -OBJCOPYFLAGS_vmlinux.bin := -O binary --remove-section=.comment --remove-section=.vmlinux.info -S -$(obj)/vmlinux.bin: vmlinux FORCE - $(call if_changed,objcopy) - -vmlinux.bin.all-y := $(obj)/vmlinux.bin - -suffix-$(CONFIG_KERNEL_GZIP) := .gz -suffix-$(CONFIG_KERNEL_BZIP2) := .bz2 -suffix-$(CONFIG_KERNEL_LZ4) := .lz4 -suffix-$(CONFIG_KERNEL_LZMA) := .lzma -suffix-$(CONFIG_KERNEL_LZO) := .lzo -suffix-$(CONFIG_KERNEL_XZ) := .xz - -$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) FORCE - $(call if_changed,gzip) -$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) FORCE - $(call if_changed,bzip2) -$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y) FORCE - $(call if_changed,lz4) -$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) FORCE - $(call if_changed,lzma) -$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) FORCE - $(call if_changed,lzo) -$(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) FORCE - $(call if_changed,xzkern) - -OBJCOPYFLAGS_piggy.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.vmlinux.bin.compressed -$(obj)/piggy.o: $(obj)/vmlinux.bin$(suffix-y) FORCE - $(call if_changed,objcopy) - -chkbss := $(filter-out piggy.o info.o, $(obj-y)) -chkbss-target := vmlinux.bin -include $(srctree)/arch/s390/scripts/Makefile.chkbss diff --git a/arch/s390/boot/compressed/decompressor.h b/arch/s390/boot/compressed/decompressor.h deleted file mode 100644 index c15eb7114d83..000000000000 --- a/arch/s390/boot/compressed/decompressor.h +++ /dev/null @@ -1,30 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef BOOT_COMPRESSED_DECOMPRESSOR_H -#define BOOT_COMPRESSED_DECOMPRESSOR_H - -#ifdef CONFIG_KERNEL_UNCOMPRESSED -static inline void *decompress_kernel(void) {} -#else -void *decompress_kernel(void); -#endif -unsigned long mem_safe_offset(void); -void error(char *m); - -struct vmlinux_info { - unsigned long default_lma; - void (*entry)(void); - unsigned long image_size; /* does not include .bss */ - unsigned long bss_size; /* uncompressed image .bss size */ - unsigned long bootdata_off; - unsigned long bootdata_size; - unsigned long bootdata_preserved_off; - unsigned long bootdata_preserved_size; - unsigned long dynsym_start; - unsigned long rela_dyn_start; - unsigned long rela_dyn_end; -}; - -extern char _vmlinux_info[]; -#define vmlinux (*(struct vmlinux_info *)_vmlinux_info) - -#endif /* BOOT_COMPRESSED_DECOMPRESSOR_H */ diff --git a/arch/s390/boot/compressed/vmlinux.lds.S b/arch/s390/boot/compressed/vmlinux.lds.S deleted file mode 100644 index 44561b2c3712..000000000000 --- a/arch/s390/boot/compressed/vmlinux.lds.S +++ /dev/null @@ -1,102 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include <asm-generic/vmlinux.lds.h> -#include <asm/vmlinux.lds.h> - -OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390") -OUTPUT_ARCH(s390:64-bit) - -ENTRY(startup) - -SECTIONS -{ - . = 0; - .head.text : { - _head = . ; - HEAD_TEXT - _ehead = . ; - } - .text : { - _text = .; /* Text */ - *(.text) - *(.text.*) - _etext = . ; - } - .rodata : { - _rodata = . ; - *(.rodata) /* read-only data */ - *(.rodata.*) - _erodata = . ; - } - .data : { - _data = . ; - *(.data) - *(.data.*) - _edata = . ; - } - /* - * .dma section for code, data, ex_table that need to stay below 2 GB, - * even when the kernel is relocate: above 2 GB. - */ - . = ALIGN(PAGE_SIZE); - _sdma = .; - .dma.text : { - _stext_dma = .; - *(.dma.text) - . = ALIGN(PAGE_SIZE); - _etext_dma = .; - } - . = ALIGN(16); - .dma.ex_table : { - _start_dma_ex_table = .; - KEEP(*(.dma.ex_table)) - _stop_dma_ex_table = .; - } - .dma.data : { *(.dma.data) } - . = ALIGN(PAGE_SIZE); - _edma = .; - - BOOT_DATA - BOOT_DATA_PRESERVED - - /* - * uncompressed image info used by the decompressor it should match - * struct vmlinux_info. It comes from .vmlinux.info section of - * uncompressed vmlinux in a form of info.o - */ - . = ALIGN(8); - .vmlinux.info : { - _vmlinux_info = .; - *(.vmlinux.info) - } - -#ifdef CONFIG_KERNEL_UNCOMPRESSED - . = 0x100000; -#else - . = ALIGN(8); -#endif - .rodata.compressed : { - _compressed_start = .; - *(.vmlinux.bin.compressed) - _compressed_end = .; - FILL(0xff); - . = ALIGN(4096); - } - . = ALIGN(256); - .bss : { - _bss = . ; - *(.bss) - *(.bss.*) - *(COMMON) - . = ALIGN(8); /* For convenience during zeroing */ - _ebss = .; - } - _end = .; - - /* Sections to be discarded */ - /DISCARD/ : { - *(.eh_frame) - *(__ex_table) - *(*__ksymtab*) - *(___kcrctab*) - } -} diff --git a/arch/s390/boot/compressed/decompressor.c b/arch/s390/boot/decompressor.c index 45046630c56a..03500b9d9fb9 100644 --- a/arch/s390/boot/compressed/decompressor.c +++ b/arch/s390/boot/decompressor.c @@ -9,14 +9,15 @@ #include <linux/kernel.h> #include <linux/string.h> +#include <asm/boot_data.h> #include <asm/page.h> #include "decompressor.h" +#include "boot.h" /* * gzip declarations */ #define STATIC static -#define STATIC_RW_DATA static __section(.data) #undef memset #undef memcpy @@ -24,19 +25,16 @@ #define memmove memmove #define memzero(s, n) memset((s), 0, (n)) -/* Symbols defined by linker scripts */ -extern char _end[]; -extern unsigned char _compressed_start[]; -extern unsigned char _compressed_end[]; - -#ifdef CONFIG_HAVE_KERNEL_BZIP2 -#define HEAP_SIZE 0x400000 +#if defined(CONFIG_KERNEL_BZIP2) +#define BOOT_HEAP_SIZE 0x400000 +#elif defined(CONFIG_KERNEL_ZSTD) +#define BOOT_HEAP_SIZE 0x30000 #else -#define HEAP_SIZE 0x10000 +#define BOOT_HEAP_SIZE 0x10000 #endif static unsigned long free_mem_ptr = (unsigned long) _end; -static unsigned long free_mem_end_ptr = (unsigned long) _end + HEAP_SIZE; +static unsigned long free_mem_end_ptr = (unsigned long) _end + BOOT_HEAP_SIZE; #ifdef CONFIG_KERNEL_GZIP #include "../../../../lib/decompress_inflate.c" @@ -62,24 +60,26 @@ static unsigned long free_mem_end_ptr = (unsigned long) _end + HEAP_SIZE; #include "../../../../lib/decompress_unxz.c" #endif -#define decompress_offset ALIGN((unsigned long)_end + HEAP_SIZE, PAGE_SIZE) +#ifdef CONFIG_KERNEL_ZSTD +#include "../../../../lib/decompress_unzstd.c" +#endif -unsigned long mem_safe_offset(void) +static void decompress_error(char *m) { - /* - * due to 4MB HEAD_SIZE for bzip2 - * 'decompress_offset + vmlinux.image_size' could be larger than - * kernel at final position + its .bss, so take the larger of two - */ - return max(decompress_offset + vmlinux.image_size, - vmlinux.default_lma + vmlinux.image_size + vmlinux.bss_size); + if (bootdebug) + boot_rb_dump(); + boot_emerg("Decompression error: %s\n", m); + boot_emerg(" -- System halted\n"); + disabled_wait(); } -void *decompress_kernel(void) +unsigned long mem_safe_offset(void) { - void *output = (void *)decompress_offset; + return ALIGN(free_mem_end_ptr, PAGE_SIZE); +} +void deploy_kernel(void *output) +{ __decompress(_compressed_start, _compressed_end - _compressed_start, - NULL, NULL, output, 0, NULL, error); - return output; + NULL, NULL, output, vmlinux.image_size, NULL, decompress_error); } diff --git a/arch/s390/boot/decompressor.h b/arch/s390/boot/decompressor.h new file mode 100644 index 000000000000..4f966f06bd65 --- /dev/null +++ b/arch/s390/boot/decompressor.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef BOOT_COMPRESSED_DECOMPRESSOR_H +#define BOOT_COMPRESSED_DECOMPRESSOR_H + +#ifndef CONFIG_KERNEL_UNCOMPRESSED +unsigned long mem_safe_offset(void); +void deploy_kernel(void *output); +#endif + +#endif /* BOOT_COMPRESSED_DECOMPRESSOR_H */ diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S index 4b86a8d3c121..0b511d5c030b 100644 --- a/arch/s390/boot/head.S +++ b/arch/s390/boot/head.S @@ -5,7 +5,6 @@ * Author(s): Hartmut Penner <hp@de.ibm.com> * Martin Schwidefsky <schwidefsky@de.ibm.com> * Rob van der Heij <rvdhei@iae.nl> - * Heiko Carstens <heiko.carstens@de.ibm.com> * * There are 5 different IPL methods * 1) load the image directly into ram at address 0 and do an PSW restart @@ -25,259 +24,201 @@ #include <linux/init.h> #include <linux/linkage.h> #include <asm/asm-offsets.h> -#include <asm/thread_info.h> #include <asm/page.h> #include <asm/ptrace.h> +#include <asm/sclp.h> +#include "boot.h" -#define ARCH_OFFSET 4 +#define EP_OFFSET 0x10008 +#define EP_STRING "S390EP" +#define IPL_BS 0x730 __HEAD - -#define IPL_BS 0x730 - .org 0 - .long 0x00080000,0x80000000+iplstart # The first 24 bytes are loaded - .long 0x02000018,0x60000050 # by ipl to addresses 0-23. - .long 0x02000068,0x60000050 # (a PSW and two CCWs). - .fill 80-24,1,0x40 # bytes 24-79 are discarded !! - .long 0x020000f0,0x60000050 # The next 160 byte are loaded - .long 0x02000140,0x60000050 # to addresses 0x18-0xb7 - .long 0x02000190,0x60000050 # They form the continuation - .long 0x020001e0,0x60000050 # of the CCW program started - .long 0x02000230,0x60000050 # by ipl and load the range - .long 0x02000280,0x60000050 # 0x0f0-0x730 from the image - .long 0x020002d0,0x60000050 # to the range 0x0f0-0x730 - .long 0x02000320,0x60000050 # in memory. At the end of - .long 0x02000370,0x60000050 # the channel program the PSW - .long 0x020003c0,0x60000050 # at location 0 is loaded. - .long 0x02000410,0x60000050 # Initial processing starts - .long 0x02000460,0x60000050 # at 0x200 = iplstart. - .long 0x020004b0,0x60000050 - .long 0x02000500,0x60000050 - .long 0x02000550,0x60000050 - .long 0x020005a0,0x60000050 - .long 0x020005f0,0x60000050 - .long 0x02000640,0x60000050 - .long 0x02000690,0x60000050 - .long 0x020006e0,0x20000050 - - .org __LC_RST_NEW_PSW # 0x1a0 - .quad 0,iplstart - .org __LC_PGM_NEW_PSW # 0x1d0 - .quad 0x0000000180000000,startup_pgm_check_handler - - .org 0x200 - +ipl_start: + mvi __LC_AR_MODE_ID,1 # set esame flag + slr %r0,%r0 # set cpuid to zero + lhi %r1,2 # mode 2 = esame (dump) + sigp %r1,%r0,0x12 # switch to esame mode + sam64 # switch to 64 bit addressing mode + lgh %r1,__LC_SUBCHANNEL_ID # test if subchannel number + brctg %r1,.Lnoload # is valid + llgf %r1,__LC_SUBCHANNEL_ID # load ipl subchannel number + lghi %r2,IPL_BS # load start address + bras %r14,.Lloader # load rest of ipl image + larl %r12,parmarea # pointer to parameter area + stg %r1,IPL_DEVICE-PARMAREA(%r12) # save ipl device number +# +# load parameter file from ipl device +# +.Lagain1: + larl %r2,_end # ramdisk loc. is temp + bras %r14,.Lloader # load parameter file + ltgr %r2,%r2 # got anything ? + jz .Lnopf + lg %r3,MAX_COMMAND_LINE_SIZE-PARMAREA(%r12) + aghi %r3,-1 + clgr %r2,%r3 + jl .Lnotrunc + lgr %r2,%r3 +.Lnotrunc: + larl %r4,_end + larl %r13,.L_hdr + clc 0(3,%r4),0(%r13) # if it is HDRx + jz .Lagain1 # skip dataset header + larl %r13,.L_eof + clc 0(3,%r4),0(%r13) # if it is EOFx + jz .Lagain1 # skip data set trailer + lgr %r5,%r2 + la %r6,COMMAND_LINE-PARMAREA(%r12) + lgr %r7,%r2 + aghi %r7,1 + mvcl %r6,%r4 +.Lnopf: +# +# load ramdisk from ipl device +# +.Lagain2: + larl %r2,_end # addr of ramdisk + stg %r2,INITRD_START-PARMAREA(%r12) + bras %r14,.Lloader # load ramdisk + stg %r2,INITRD_SIZE-PARMAREA(%r12) # store size of rd + ltgr %r2,%r2 + jnz .Lrdcont + stg %r2,INITRD_START-PARMAREA(%r12) # no ramdisk found +.Lrdcont: + larl %r2,_end + larl %r13,.L_hdr # skip HDRx and EOFx + clc 0(3,%r2),0(%r13) + jz .Lagain2 + larl %r13,.L_eof + clc 0(3,%r2),0(%r13) + jz .Lagain2 +# +# reset files in VM reader +# + larl %r13,.Lcpuid + stidp 0(%r13) # store cpuid + tm 0(%r13),0xff # running VM ? + jno .Lnoreset + larl %r2,.Lreset + lghi %r3,26 + diag %r2,%r3,8 + larl %r5,.Lirb + stsch 0(%r5) # check if irq is pending + tm 30(%r5),0x0f # by verifying if any of the + jnz .Lwaitforirq # activity or status control + tm 31(%r5),0xff # bits is set in the schib + jz .Lnoreset +.Lwaitforirq: + bras %r14,.Lirqwait # wait for IO interrupt + c %r1,__LC_SUBCHANNEL_ID # compare subchannel number + jne .Lwaitforirq + larl %r5,.Lirb + tsch 0(%r5) +.Lnoreset: + j .Lnoload +# +# everything loaded, go for it +# +.Lnoload: + jg startup # # subroutine to wait for end I/O # .Lirqwait: - mvc __LC_IO_NEW_PSW(16),.Lnewpsw # set up IO interrupt psw - lpsw .Lwaitpsw + larl %r13,.Lnewpswmask # set up IO interrupt psw + mvc __LC_IO_NEW_PSW(8),0(%r13) + stg %r14,__LC_IO_NEW_PSW+8 + larl %r13,.Lwaitpsw + lpswe 0(%r13) .Lioint: - br %r14 - .align 8 -.Lnewpsw: - .quad 0x0000000080000000,.Lioint -.Lwaitpsw: - .long 0x020a0000,0x80000000+.Lioint - # # subroutine for loading cards from the reader # .Lloader: - la %r4,0(%r14) - la %r3,.Lorb # r2 = address of orb into r2 - la %r5,.Lirb # r4 = address of irb - la %r6,.Lccws - la %r7,20 + lgr %r4,%r14 + larl %r3,.Lorb # r2 = address of orb into r2 + larl %r5,.Lirb # r4 = address of irb + larl %r6,.Lccws + lghi %r7,20 .Linit: st %r2,4(%r6) # initialize CCW data addresses la %r2,0x50(%r2) la %r6,8(%r6) - bct 7,.Linit - - lctl %c6,%c6,.Lcr6 # set IO subclass mask - slr %r2,%r2 + brctg %r7,.Linit + larl %r13,.Lcr6 + lctlg %c6,%c6,0(%r13) + xgr %r2,%r2 .Lldlp: ssch 0(%r3) # load chunk of 1600 bytes - bnz .Llderr + jnz .Llderr .Lwait4irq: - bas %r14,.Lirqwait + bras %r14,.Lirqwait c %r1,__LC_SUBCHANNEL_ID # compare subchannel number - bne .Lwait4irq + jne .Lwait4irq tsch 0(%r5) - - slr %r0,%r0 + xgr %r0,%r0 ic %r0,8(%r5) # get device status - chi %r0,8 # channel end ? - be .Lcont - chi %r0,12 # channel end + device end ? - be .Lcont - - l %r0,4(%r5) - s %r0,8(%r3) # r0/8 = number of ccws executed - mhi %r0,10 # *10 = number of bytes in ccws - lh %r3,10(%r5) # get residual count - sr %r0,%r3 # #ccws*80-residual=#bytes read - ar %r2,%r0 - + cghi %r0,8 # channel end ? + je .Lcont + cghi %r0,12 # channel end + device end ? + je .Lcont + llgf %r0,4(%r5) + sgf %r0,8(%r3) # r0/8 = number of ccws executed + mghi %r0,10 # *10 = number of bytes in ccws + llgh %r3,10(%r5) # get residual count + sgr %r0,%r3 # #ccws*80-residual=#bytes read + agr %r2,%r0 br %r4 # r2 contains the total size - .Lcont: - ahi %r2,0x640 # add 0x640 to total size - la %r6,.Lccws - la %r7,20 + aghi %r2,0x640 # add 0x640 to total size + larl %r6,.Lccws + lghi %r7,20 .Lincr: l %r0,4(%r6) # update CCW data addresses - ahi %r0,0x640 + aghi %r0,0x640 st %r0,4(%r6) - ahi %r6,8 - bct 7,.Lincr - - b .Lldlp + aghi %r6,8 + brctg %r7,.Lincr + j .Lldlp .Llderr: - lpsw .Lcrash + larl %r13,.Lcrash + lpsw 0(%r13) - .align 8 + .balign 8 +.Lwaitpsw: + .quad 0x0202000180000000,.Lioint +.Lnewpswmask: + .quad 0x0000000180000000 + .balign 8 .Lorb: .long 0x00000000,0x0080ff00,.Lccws .Lirb: .long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -.Lcr6: .long 0xff000000 -.Lloadp:.long 0,0 - .align 8 + .balign 8 +.Lcr6: .quad 0x00000000ff000000 + .balign 8 .Lcrash:.long 0x000a0000,0x00000000 - - .align 8 + .balign 8 .Lccws: .rept 19 .long 0x02600050,0x00000000 .endr .long 0x02200050,0x00000000 - -iplstart: - mvi __LC_AR_MODE_ID,1 # set esame flag - slr %r0,%r0 # set cpuid to zero - lhi %r1,2 # mode 2 = esame (dump) - sigp %r1,%r0,0x12 # switch to esame mode - bras %r13,0f - .fill 16,4,0x0 -0: lmh %r0,%r15,0(%r13) # clear high-order half of gprs - sam31 # switch to 31 bit addressing mode - lh %r1,__LC_SUBCHANNEL_ID # test if subchannel number - bct %r1,.Lnoload # is valid - l %r1,__LC_SUBCHANNEL_ID # load ipl subchannel number - la %r2,IPL_BS # load start address - bas %r14,.Lloader # load rest of ipl image - l %r12,.Lparm # pointer to parameter area - st %r1,IPL_DEVICE+ARCH_OFFSET-PARMAREA(%r12) # save ipl device number - -# -# load parameter file from ipl device -# -.Lagain1: - l %r2,.Linitrd # ramdisk loc. is temp - bas %r14,.Lloader # load parameter file - ltr %r2,%r2 # got anything ? - bz .Lnopf - chi %r2,895 - bnh .Lnotrunc - la %r2,895 -.Lnotrunc: - l %r4,.Linitrd - clc 0(3,%r4),.L_hdr # if it is HDRx - bz .Lagain1 # skip dataset header - clc 0(3,%r4),.L_eof # if it is EOFx - bz .Lagain1 # skip dateset trailer - la %r5,0(%r4,%r2) - lr %r3,%r2 - la %r3,COMMAND_LINE-PARMAREA(%r12) # load adr. of command line - mvc 0(256,%r3),0(%r4) - mvc 256(256,%r3),256(%r4) - mvc 512(256,%r3),512(%r4) - mvc 768(122,%r3),768(%r4) - slr %r0,%r0 - b .Lcntlp -.Ldelspc: - ic %r0,0(%r2,%r3) - chi %r0,0x20 # is it a space ? - be .Lcntlp - ahi %r2,1 - b .Leolp -.Lcntlp: - brct %r2,.Ldelspc -.Leolp: - slr %r0,%r0 - stc %r0,0(%r2,%r3) # terminate buffer -.Lnopf: - -# -# load ramdisk from ipl device -# -.Lagain2: - l %r2,.Linitrd # addr of ramdisk - st %r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12) - bas %r14,.Lloader # load ramdisk - st %r2,INITRD_SIZE+ARCH_OFFSET-PARMAREA(%r12) # store size of rd - ltr %r2,%r2 - bnz .Lrdcont - st %r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12) # no ramdisk found -.Lrdcont: - l %r2,.Linitrd - - clc 0(3,%r2),.L_hdr # skip HDRx and EOFx - bz .Lagain2 - clc 0(3,%r2),.L_eof - bz .Lagain2 - -# -# reset files in VM reader -# - stidp .Lcpuid # store cpuid - tm .Lcpuid,0xff # running VM ? - bno .Lnoreset - la %r2,.Lreset - lhi %r3,26 - diag %r2,%r3,8 - la %r5,.Lirb - stsch 0(%r5) # check if irq is pending - tm 30(%r5),0x0f # by verifying if any of the - bnz .Lwaitforirq # activity or status control - tm 31(%r5),0xff # bits is set in the schib - bz .Lnoreset -.Lwaitforirq: - bas %r14,.Lirqwait # wait for IO interrupt - c %r1,__LC_SUBCHANNEL_ID # compare subchannel number - bne .Lwaitforirq - la %r5,.Lirb - tsch 0(%r5) -.Lnoreset: - b .Lnoload - -# -# everything loaded, go for it -# -.Lnoload: - l %r1,.Lstartup - br %r1 - -.Linitrd:.long _end # default address of initrd -.Lparm: .long PARMAREA -.Lstartup: .long startup .Lreset:.byte 0xc3,0xc8,0xc1,0xd5,0xc7,0xc5,0x40,0xd9,0xc4,0xd9,0x40 .byte 0xc1,0xd3,0xd3,0x40,0xd2,0xc5,0xc5,0xd7,0x40,0xd5,0xd6 .byte 0xc8,0xd6,0xd3,0xc4 # "change rdr all keep nohold" .L_eof: .long 0xc5d6c600 /* C'EOF' */ .L_hdr: .long 0xc8c4d900 /* C'HDR' */ - .align 8 + .balign 8 .Lcpuid:.fill 8,1,0 # -# startup-code at 0x10000, running in absolute addressing mode +# normal startup-code, running in absolute addressing mode # this is called either by the ipl loader or directly by PSW restart # or linload or SALIPL # - .org 0x10000 -ENTRY(startup) - j .Lep_startup_normal - .org EP_OFFSET + .org STARTUP_NORMAL_OFFSET - IPL_START +SYM_CODE_START(startup) + j startup_normal + .org EP_OFFSET - IPL_START # # This is a list of s390 kernel entry points. At address 0x1000f the number of # valid entry points is stored. @@ -287,12 +228,12 @@ ENTRY(startup) .ascii EP_STRING .byte 0x00,0x01 # -# kdump startup-code at 0x10010, running in 64 bit absolute addressing mode +# kdump startup-code, running in 64 bit absolute addressing mode # - .org 0x10010 -ENTRY(startup_kdump) - j .Lep_startup_kdump -.Lep_startup_normal: + .org STARTUP_KDUMP_OFFSET - IPL_START + j startup_kdump +SYM_CODE_END(startup) +SYM_CODE_START_LOCAL(startup_normal) mvi __LC_AR_MODE_ID,1 # set esame flag slr %r0,%r0 # set cpuid to zero lhi %r1,2 # mode 2 = esame (dump) @@ -301,103 +242,84 @@ ENTRY(startup_kdump) .fill 16,4,0x0 0: lmh %r0,%r15,0(%r13) # clear high-order half of gprs sam64 # switch to 64 bit addressing mode - basr %r13,0 # get base -.LPG0: + larl %r13,.Lext_new_psw + mvc __LC_EXT_NEW_PSW(16),0(%r13) + larl %r13,.Lpgm_new_psw + mvc __LC_PGM_NEW_PSW(16),0(%r13) + larl %r13,.Lio_new_psw + mvc __LC_IO_NEW_PSW(16),0(%r13) xc 0x200(256),0x200 # partially clear lowcore xc 0x300(256),0x300 xc 0xe00(256),0xe00 xc 0xf00(256),0xf00 - lctlg %c0,%c15,.Lctl-.LPG0(%r13) # load control registers - stcke __LC_BOOT_CLOCK - mvc __LC_LAST_UPDATE_CLOCK(8),__LC_BOOT_CLOCK+1 - spt 6f-.LPG0(%r13) - mvc __LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13) - l %r15,.Lstack-.LPG0(%r13) + larl %r13,.Lctl + lctlg %c0,%c15,0(%r13) # load control registers + larl %r13,tod_clock_base + stcke 0(%r13) + mvc __LC_LAST_UPDATE_CLOCK(8),1(%r13) + larl %r13,6f + spt 0(%r13) + mvc __LC_LAST_UPDATE_TIMER(8),0(%r13) + larl %r15,_stack_end-STACK_FRAME_OVERHEAD + brasl %r14,sclp_early_setup_buffer brasl %r14,verify_facilities brasl %r14,startup_kernel +SYM_CODE_END(startup_normal) -.Lstack: - .long 0x8000 + (1<<(PAGE_SHIFT+BOOT_STACK_ORDER)) - STACK_FRAME_OVERHEAD - .align 8 + .balign 8 6: .long 0x7fffffff,0xffffffff - +.Lext_new_psw: + .quad 0x0002000180000000,0x1b0 # disabled wait +.Lpgm_new_psw: + .quad 0x0000000180000000,startup_pgm_check_handler +.Lio_new_psw: + .quad 0x0002000180000000,0x1f0 # disabled wait .Lctl: .quad 0x04040000 # cr0: AFP registers & secondary space .quad 0 # cr1: primary space segment table - .quad .Lduct # cr2: dispatchable unit control table + .quad 0 # cr2: dispatchable unit control table .quad 0 # cr3: instruction authorization .quad 0xffff # cr4: instruction authorization - .quad .Lduct # cr5: primary-aste origin + .quad 0 # cr5: primary-aste origin .quad 0 # cr6: I/O interrupts .quad 0 # cr7: secondary space segment table - .quad 0 # cr8: access registers translation + .quad 0x0000000000008000 # cr8: access registers translation .quad 0 # cr9: tracing off .quad 0 # cr10: tracing off .quad 0 # cr11: tracing off .quad 0 # cr12: tracing off .quad 0 # cr13: home space segment table .quad 0xc0000000 # cr14: machine check handling off - .quad .Llinkage_stack # cr15: linkage stack operations - - .section .dma.data,"aw",@progbits -.Lduct: .long 0,.Laste,.Laste,0,.Lduald,0,0,0 - .long 0,0,0,0,0,0,0,0 -.Llinkage_stack: - .long 0,0,0x89000000,0,0,0,0x8a000000,0 - .align 64 -.Laste: .quad 0,0xffffffffffffffff,0,0,0,0,0,0 - .align 128 -.Lduald:.rept 8 - .long 0x80000000,0,0,0 # invalid access-list entries - .endr - .previous + .quad 0 # cr15: linkage stack operations #include "head_kdump.S" -# -# This program check is active immediately after kernel start -# and until early_pgm_check_handler is set in kernel/early.c -# It simply saves general/control registers and psw in -# the save area and does disabled wait with a faulty address. -# -ENTRY(startup_pgm_check_handler) - stmg %r0,%r15,__LC_SAVE_AREA_SYNC - la %r1,4095 - stctg %c0,%c15,__LC_CREGS_SAVE_AREA-4095(%r1) - mvc __LC_GPREGS_SAVE_AREA-4095(128,%r1),__LC_SAVE_AREA_SYNC - mvc __LC_PSW_SAVE_AREA-4095(16,%r1),__LC_PGM_OLD_PSW +SYM_CODE_START_LOCAL(startup_pgm_check_handler) + stmg %r8,%r15,__LC_SAVE_AREA + la %r8,4095 + stctg %c0,%c15,__LC_CREGS_SAVE_AREA-4095(%r8) + stmg %r0,%r7,__LC_GPREGS_SAVE_AREA-4095(%r8) + mvc __LC_GPREGS_SAVE_AREA-4095+64(64,%r8),__LC_SAVE_AREA + mvc __LC_PSW_SAVE_AREA-4095(16,%r8),__LC_PGM_OLD_PSW mvc __LC_RETURN_PSW(16),__LC_PGM_OLD_PSW ni __LC_RETURN_PSW,0xfc # remove IO and EX bits ni __LC_RETURN_PSW+1,0xfb # remove MCHK bit oi __LC_RETURN_PSW+1,0x2 # set wait state bit - larl %r2,.Lold_psw_disabled_wait - stg %r2,__LC_PGM_NEW_PSW+8 - l %r15,.Ldump_info_stack-.Lold_psw_disabled_wait(%r2) - brasl %r14,print_pgm_check_info + larl %r9,.Lold_psw_disabled_wait + stg %r9,__LC_PGM_NEW_PSW+8 + larl %r15,_dump_info_stack_end-(STACK_FRAME_OVERHEAD+__PT_SIZE) + la %r2,STACK_FRAME_OVERHEAD(%r15) + mvc __PT_PSW(16,%r2),__LC_PSW_SAVE_AREA-4095(%r8) + mvc __PT_R0(128,%r2),__LC_GPREGS_SAVE_AREA-4095(%r8) + mvc __PT_LAST_BREAK(8,%r2),__LC_PGM_LAST_BREAK + mvc __PT_INT_CODE(4,%r2),__LC_PGM_INT_CODE + brasl %r14,do_pgm_check + larl %r9,startup_pgm_check_handler + stg %r9,__LC_PGM_NEW_PSW+8 + mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) + lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15) + lpswe __LC_RETURN_PSW .Lold_psw_disabled_wait: - la %r1,4095 - lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1) + la %r8,4095 + lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r8) lpswe __LC_RETURN_PSW # disabled wait -.Ldump_info_stack: - .long 0x5000 + PAGE_SIZE - STACK_FRAME_OVERHEAD -ENDPROC(startup_pgm_check_handler) - -# -# params at 10400 (setup.h) -# Must be keept in sync with struct parmarea in setup.h -# - .org PARMAREA - .quad 0 # IPL_DEVICE - .quad 0 # INITRD_START - .quad 0 # INITRD_SIZE - .quad 0 # OLDMEM_BASE - .quad 0 # OLDMEM_SIZE - .quad kernel_version # points to kernel version string - - .org COMMAND_LINE - .byte "root=/dev/ram0 ro" - .byte 0 - - .org EARLY_SCCB_OFFSET - .fill 4096 - - .org HEAD_END +SYM_CODE_END(startup_pgm_check_handler) diff --git a/arch/s390/boot/head_kdump.S b/arch/s390/boot/head_kdump.S index 174d6959bf5b..f7107c76258c 100644 --- a/arch/s390/boot/head_kdump.S +++ b/arch/s390/boot/head_kdump.S @@ -19,8 +19,7 @@ # Note: This code has to be position independent # -.align 2 -.Lep_startup_kdump: +SYM_CODE_START_LOCAL(startup_kdump) lhi %r1,2 # mode 2 = esame (dump) sigp %r1,%r0,SIGP_SET_ARCHITECTURE # Switch to esame mode sam64 # Switch to 64 bit addressing @@ -83,19 +82,20 @@ # # Startup of kdump (relocated new kernel) # -.align 2 + .balign 2 startup_kdump_relocated: basr %r13,0 0: lpswe .Lrestart_psw-0b(%r13) # Start new kernel... -.align 8 +SYM_CODE_END(startup_kdump) + .balign 8 .Lrestart_psw: .quad 0x0000000080000000,0x0000000000000000 + startup #else -.align 2 -.Lep_startup_kdump: +SYM_CODE_START_LOCAL(startup_kdump) larl %r13,startup_kdump_crash lpswe 0(%r13) -.align 8 +SYM_CODE_END(startup_kdump) + .balign 8 startup_kdump_crash: .quad 0x0002000080000000,0x0000000000000000 + startup_kdump_crash #endif /* CONFIG_CRASH_DUMP */ diff --git a/arch/s390/boot/install.sh b/arch/s390/boot/install.sh index bed227f267ae..fa41486258ee 100644..100755 --- a/arch/s390/boot/install.sh +++ b/arch/s390/boot/install.sh @@ -14,22 +14,13 @@ # $2 - kernel image file # $3 - kernel map file # $4 - default install path (blank if root directory) -# - -# User may have a custom install script - -if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi -if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi - -# Default install - same as make zlilo -if [ -f $4/vmlinuz ]; then - mv $4/vmlinuz $4/vmlinuz.old -fi +set -e -if [ -f $4/System.map ]; then - mv $4/System.map $4/System.old -fi +echo "Warning: '${INSTALLKERNEL}' command not available - additional " \ + "bootloader config required" >&2 +if [ -f "$4/vmlinuz-$1" ]; then mv -- "$4/vmlinuz-$1" "$4/vmlinuz-$1.old"; fi +if [ -f "$4/System.map-$1" ]; then mv -- "$4/System.map-$1" "$4/System.map-$1.old"; fi -cat $2 > $4/vmlinuz -cp $3 $4/System.map +cat -- "$2" > "$4/vmlinuz-$1" +cp -- "$3" "$4/System.map-$1" diff --git a/arch/s390/boot/ipl_data.c b/arch/s390/boot/ipl_data.c new file mode 100644 index 000000000000..0846e2b249c6 --- /dev/null +++ b/arch/s390/boot/ipl_data.c @@ -0,0 +1,84 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/compat.h> +#include <linux/ptrace.h> +#include <asm/cio.h> +#include <asm/asm-offsets.h> +#include "boot.h" + +#define CCW0(cmd, addr, cnt, flg) \ + { .cmd_code = cmd, .cda = addr, .count = cnt, .flags = flg, } + +#define PSW_MASK_DISABLED (PSW_MASK_WAIT | PSW_MASK_EA | PSW_MASK_BA) + +struct ipl_lowcore { + psw_t32 ipl_psw; /* 0x0000 */ + struct ccw0 ccwpgm[2]; /* 0x0008 */ + u8 fill[56]; /* 0x0018 */ + struct ccw0 ccwpgmcc[20]; /* 0x0050 */ + u8 pad_0xf0[0x01a0-0x00f0]; /* 0x00f0 */ + psw_t restart_psw; /* 0x01a0 */ + psw_t external_new_psw; /* 0x01b0 */ + psw_t svc_new_psw; /* 0x01c0 */ + psw_t program_new_psw; /* 0x01d0 */ + psw_t mcck_new_psw; /* 0x01e0 */ + psw_t io_new_psw; /* 0x01f0 */ +}; + +/* + * Initial lowcore for IPL: the first 24 bytes are loaded by IPL to + * addresses 0-23 (a PSW and two CCWs). Bytes 24-79 are discarded. + * The next 160 bytes are loaded to addresses 0x18-0xb7. They form + * the continuation of the CCW program started by IPL and load the + * range 0x0f0-0x730 from the image to the range 0x0f0-0x730 in + * memory. At the end of the channel program the PSW at location 0 is + * loaded. + * Initial processing starts at 0x200 = iplstart. + * + * The restart psw points to iplstart which allows to load a kernel + * image into memory and starting it by a psw restart on any cpu. All + * other default psw new locations contain a disabled wait psw where + * the address indicates which psw was loaded. + * + * Note that the 'file' utility can detect s390 kernel images. For + * that to succeed the two initial CCWs, and the 0x40 fill bytes must + * be present. + */ +static struct ipl_lowcore ipl_lowcore __used __section(".ipldata") = { + .ipl_psw = { .mask = PSW32_MASK_BASE, .addr = PSW32_ADDR_AMODE | IPL_START }, + .ccwpgm = { + [ 0] = CCW0(CCW_CMD_READ_IPL, 0x018, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC), + [ 1] = CCW0(CCW_CMD_READ_IPL, 0x068, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC), + }, + .fill = { + [ 0 ... 55] = 0x40, + }, + .ccwpgmcc = { + [ 0] = CCW0(CCW_CMD_READ_IPL, 0x0f0, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC), + [ 1] = CCW0(CCW_CMD_READ_IPL, 0x140, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC), + [ 2] = CCW0(CCW_CMD_READ_IPL, 0x190, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC), + [ 3] = CCW0(CCW_CMD_READ_IPL, 0x1e0, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC), + [ 4] = CCW0(CCW_CMD_READ_IPL, 0x230, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC), + [ 5] = CCW0(CCW_CMD_READ_IPL, 0x280, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC), + [ 6] = CCW0(CCW_CMD_READ_IPL, 0x2d0, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC), + [ 7] = CCW0(CCW_CMD_READ_IPL, 0x320, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC), + [ 8] = CCW0(CCW_CMD_READ_IPL, 0x370, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC), + [ 9] = CCW0(CCW_CMD_READ_IPL, 0x3c0, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC), + [10] = CCW0(CCW_CMD_READ_IPL, 0x410, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC), + [11] = CCW0(CCW_CMD_READ_IPL, 0x460, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC), + [12] = CCW0(CCW_CMD_READ_IPL, 0x4b0, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC), + [13] = CCW0(CCW_CMD_READ_IPL, 0x500, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC), + [14] = CCW0(CCW_CMD_READ_IPL, 0x550, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC), + [15] = CCW0(CCW_CMD_READ_IPL, 0x5a0, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC), + [16] = CCW0(CCW_CMD_READ_IPL, 0x5f0, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC), + [17] = CCW0(CCW_CMD_READ_IPL, 0x640, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC), + [18] = CCW0(CCW_CMD_READ_IPL, 0x690, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC), + [19] = CCW0(CCW_CMD_READ_IPL, 0x6e0, 0x50, CCW_FLAG_SLI), + }, + .restart_psw = { .mask = 0, .addr = IPL_START, }, + .external_new_psw = { .mask = PSW_MASK_DISABLED, .addr = __LC_EXT_NEW_PSW, }, + .svc_new_psw = { .mask = PSW_MASK_DISABLED, .addr = __LC_SVC_NEW_PSW, }, + .program_new_psw = { .mask = PSW_MASK_DISABLED, .addr = __LC_PGM_NEW_PSW, }, + .mcck_new_psw = { .mask = PSW_MASK_DISABLED, .addr = __LC_MCK_NEW_PSW, }, + .io_new_psw = { .mask = PSW_MASK_DISABLED, .addr = __LC_IO_NEW_PSW, }, +}; diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index 24ef67eb1cef..d04e9b89d14a 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -2,48 +2,49 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/ctype.h> +#include <linux/pgtable.h> +#include <asm/abs_lowcore.h> +#include <asm/page-states.h> +#include <asm/machine.h> #include <asm/ebcdic.h> #include <asm/sclp.h> #include <asm/sections.h> #include <asm/boot_data.h> #include <asm/facility.h> -#include <asm/pgtable.h> +#include <asm/setup.h> #include <asm/uv.h> #include "boot.h" +struct parmarea parmarea __section(".parmarea") = { + .kernel_version = (unsigned long)kernel_version, + .max_command_line_size = COMMAND_LINE_SIZE, + .command_line = "root=/dev/ram0 ro", +}; + char __bootdata(early_command_line)[COMMAND_LINE_SIZE]; + +unsigned int __bootdata_preserved(zlib_dfltcc_support) = ZLIB_DFLTCC_FULL; struct ipl_parameter_block __bootdata_preserved(ipl_block); int __bootdata_preserved(ipl_block_valid); +int __bootdata_preserved(__kaslr_enabled); +int __bootdata_preserved(cmma_flag) = 1; -unsigned long __bootdata(vmalloc_size) = VMALLOC_DEFAULT_SIZE; -unsigned long __bootdata(memory_end); -int __bootdata(memory_end_set); -int __bootdata(noexec_disabled); - -int kaslr_enabled __section(.data); +unsigned long vmalloc_size = VMALLOC_DEFAULT_SIZE; +unsigned long memory_limit; +int vmalloc_size_set; static inline int __diag308(unsigned long subcode, void *addr) { - register unsigned long _addr asm("0") = (unsigned long)addr; - register unsigned long _rc asm("1") = 0; - unsigned long reg1, reg2; - psw_t old = S390_lowcore.program_new_psw; - - asm volatile( - " epsw %0,%1\n" - " st %0,%[psw_pgm]\n" - " st %1,%[psw_pgm]+4\n" - " larl %0,1f\n" - " stg %0,%[psw_pgm]+8\n" - " diag %[addr],%[subcode],0x308\n" - "1: nopr %%r7\n" - : "=&d" (reg1), "=&a" (reg2), - [psw_pgm] "=Q" (S390_lowcore.program_new_psw), - [addr] "+d" (_addr), "+d" (_rc) + union register_pair r1 = { .even = (unsigned long)addr, .odd = 0 }; + + asm_inline volatile( + " diag %[r1],%[subcode],0x308\n" + "0:\n" + EX_TABLE(0b, 0b) + : [r1] "+d" (r1.pair) : [subcode] "d" (subcode) : "cc", "memory"); - S390_lowcore.program_new_psw = old; - return _rc; + return r1.odd; } void store_ipl_parmblock(void) @@ -56,6 +57,20 @@ void store_ipl_parmblock(void) ipl_block_valid = 1; } +bool is_ipl_block_dump(void) +{ + if (ipl_block.pb0_hdr.pbt == IPL_PBT_FCP && + ipl_block.fcp.opt == IPL_PB0_FCP_OPT_DUMP) + return true; + if (ipl_block.pb0_hdr.pbt == IPL_PBT_NVME && + ipl_block.nvme.opt == IPL_PB0_NVME_OPT_DUMP) + return true; + if (ipl_block.pb0_hdr.pbt == IPL_PBT_ECKD && + ipl_block.eckd.opt == IPL_PB0_ECKD_OPT_DUMP) + return true; + return false; +} + static size_t scpdata_length(const u8 *buf, size_t count) { while (count) { @@ -69,30 +84,49 @@ static size_t scpdata_length(const u8 *buf, size_t count) static size_t ipl_block_get_ascii_scpdata(char *dest, size_t size, const struct ipl_parameter_block *ipb) { - size_t count; - size_t i; + const __u8 *scp_data; + __u32 scp_data_len; int has_lowercase; + size_t count = 0; + size_t i; - count = min(size - 1, scpdata_length(ipb->fcp.scp_data, - ipb->fcp.scp_data_len)); + switch (ipb->pb0_hdr.pbt) { + case IPL_PBT_FCP: + scp_data_len = ipb->fcp.scp_data_len; + scp_data = ipb->fcp.scp_data; + break; + case IPL_PBT_NVME: + scp_data_len = ipb->nvme.scp_data_len; + scp_data = ipb->nvme.scp_data; + break; + case IPL_PBT_ECKD: + scp_data_len = ipb->eckd.scp_data_len; + scp_data = ipb->eckd.scp_data; + break; + + default: + goto out; + } + + count = min(size - 1, scpdata_length(scp_data, scp_data_len)); if (!count) goto out; has_lowercase = 0; for (i = 0; i < count; i++) { - if (!isascii(ipb->fcp.scp_data[i])) { + if (!isascii(scp_data[i])) { count = 0; goto out; } - if (!has_lowercase && islower(ipb->fcp.scp_data[i])) + if (!has_lowercase && islower(scp_data[i])) has_lowercase = 1; } if (has_lowercase) - memcpy(dest, ipb->fcp.scp_data, count); + memcpy(dest, scp_data, count); else for (i = 0; i < count; i++) - dest[i] = tolower(ipb->fcp.scp_data[i]); + dest[i] = tolower(scp_data[i]); out: dest[count] = '\0'; return count; @@ -114,6 +148,8 @@ static void append_ipl_block_parm(void) parm, COMMAND_LINE_SIZE - len - 1, &ipl_block); break; case IPL_PBT_FCP: + case IPL_PBT_NVME: + case IPL_PBT_ECKD: rc = ipl_block_get_ascii_scpdata( parm, COMMAND_LINE_SIZE - len - 1, &ipl_block); break; @@ -138,12 +174,12 @@ static inline int has_ebcdic_char(const char *str) void setup_boot_command_line(void) { - COMMAND_LINE[ARCH_COMMAND_LINE_SIZE - 1] = 0; + parmarea.command_line[COMMAND_LINE_SIZE - 1] = 0; /* convert arch command line to ascii if necessary */ - if (has_ebcdic_char(COMMAND_LINE)) - EBCASC(COMMAND_LINE, ARCH_COMMAND_LINE_SIZE); + if (has_ebcdic_char(parmarea.command_line)) + EBCASC(parmarea.command_line, COMMAND_LINE_SIZE); /* copy arch command line */ - strcpy(early_command_line, strim(COMMAND_LINE)); + strcpy(early_command_line, strim(parmarea.command_line)); /* append IPL PARM data to the boot command line */ if (!is_prot_virt_guest() && ipl_block_valid) @@ -153,9 +189,9 @@ void setup_boot_command_line(void) static void modify_facility(unsigned long nr, bool clear) { if (clear) - __clear_facility(nr, S390_lowcore.stfle_fac_list); + __clear_facility(nr, stfle_fac_list); else - __set_facility(nr, S390_lowcore.stfle_fac_list); + __set_facility(nr, stfle_fac_list); } static void check_cleared_facilities(void) @@ -164,8 +200,8 @@ static void check_cleared_facilities(void) int i; for (i = 0; i < ARRAY_SIZE(als); i++) { - if ((S390_lowcore.stfle_fac_list[i] & als[i]) != als[i]) { - sclp_early_printk("Warning: The Linux kernel requires facilities cleared via command line option\n"); + if ((stfle_fac_list[i] & als[i]) != als[i]) { + boot_emerg("The Linux kernel requires facilities cleared via command line option\n"); print_missing_facilities(); break; } @@ -208,7 +244,7 @@ static void modify_fac_list(char *str) check_cleared_facilities(); } -static char command_line_buf[COMMAND_LINE_SIZE] __section(.data); +static char command_line_buf[COMMAND_LINE_SIZE]; void parse_boot_command_line(void) { char *param, *val; @@ -216,44 +252,73 @@ void parse_boot_command_line(void) char *args; int rc; - kaslr_enabled = IS_ENABLED(CONFIG_RANDOMIZE_BASE); + __kaslr_enabled = IS_ENABLED(CONFIG_RANDOMIZE_BASE); args = strcpy(command_line_buf, early_command_line); while (*args) { args = next_arg(args, ¶m, &val); - if (!strcmp(param, "mem") && val) { - memory_end = round_down(memparse(val, NULL), PAGE_SIZE); - memory_end_set = 1; - } + if (!strcmp(param, "mem") && val) + memory_limit = round_down(memparse(val, NULL), PAGE_SIZE); - if (!strcmp(param, "vmalloc") && val) - vmalloc_size = round_up(memparse(val, NULL), PAGE_SIZE); + if (!strcmp(param, "vmalloc") && val) { + vmalloc_size = round_up(memparse(val, NULL), _SEGMENT_SIZE); + vmalloc_size_set = 1; + } - if (!strcmp(param, "noexec")) { - rc = kstrtobool(val, &enabled); - if (!rc && !enabled) - noexec_disabled = 1; + if (!strcmp(param, "dfltcc") && val) { + if (!strcmp(val, "off")) + zlib_dfltcc_support = ZLIB_DFLTCC_DISABLED; + else if (!strcmp(val, "on")) + zlib_dfltcc_support = ZLIB_DFLTCC_FULL; + else if (!strcmp(val, "def_only")) + zlib_dfltcc_support = ZLIB_DFLTCC_DEFLATE_ONLY; + else if (!strcmp(val, "inf_only")) + zlib_dfltcc_support = ZLIB_DFLTCC_INFLATE_ONLY; + else if (!strcmp(val, "always")) + zlib_dfltcc_support = ZLIB_DFLTCC_FULL_DEBUG; } if (!strcmp(param, "facilities") && val) modify_fac_list(val); + if (!strcmp(param, "debug-alternative")) + alt_debug_setup(val); + if (!strcmp(param, "nokaslr")) - kaslr_enabled = 0; - } -} + __kaslr_enabled = 0; -void setup_memory_end(void) -{ -#ifdef CONFIG_CRASH_DUMP - if (OLDMEM_BASE) { - kaslr_enabled = 0; - } else if (ipl_block_valid && - ipl_block.pb0_hdr.pbt == IPL_PBT_FCP && - ipl_block.fcp.opt == IPL_PB0_FCP_OPT_DUMP) { - kaslr_enabled = 0; - if (!sclp_early_get_hsa_size(&memory_end) && memory_end) - memory_end_set = 1; - } + if (!strcmp(param, "cmma")) { + rc = kstrtobool(val, &enabled); + if (!rc && !enabled) + cmma_flag = 0; + } + +#if IS_ENABLED(CONFIG_KVM) + if (!strcmp(param, "prot_virt")) { + rc = kstrtobool(val, &enabled); + if (!rc && enabled) + prot_virt_host = 1; + } #endif + if (!strcmp(param, "relocate_lowcore") && test_facility(193)) + set_machine_feature(MFEATURE_LOWCORE); + if (!strcmp(param, "earlyprintk")) + boot_earlyprintk = true; + if (!strcmp(param, "debug")) + boot_console_loglevel = CONSOLE_LOGLEVEL_DEBUG; + if (!strcmp(param, "bootdebug")) { + bootdebug = true; + if (val) + strncpy(bootdebug_filter, val, sizeof(bootdebug_filter) - 1); + } + if (!strcmp(param, "quiet")) + boot_console_loglevel = CONSOLE_LOGLEVEL_QUIET; + if (!strcmp(param, "ignore_loglevel")) + boot_ignore_loglevel = true; + if (!strcmp(param, "loglevel")) { + boot_console_loglevel = simple_strtoull(val, NULL, 10); + if (boot_console_loglevel < CONSOLE_LOGLEVEL_MIN) + boot_console_loglevel = CONSOLE_LOGLEVEL_MIN; + } + } } diff --git a/arch/s390/boot/ipl_report.c b/arch/s390/boot/ipl_report.c index 0b4965573656..f73cd757a5f7 100644 --- a/arch/s390/boot/ipl_report.c +++ b/arch/s390/boot/ipl_report.c @@ -5,6 +5,7 @@ #include <asm/sclp.h> #include <asm/sections.h> #include <asm/boot_data.h> +#include <asm/physmem_info.h> #include <uapi/asm/ipl.h> #include "boot.h" @@ -16,24 +17,19 @@ unsigned long __bootdata_preserved(ipl_cert_list_size); unsigned long __bootdata(early_ipl_comp_list_addr); unsigned long __bootdata(early_ipl_comp_list_size); +static struct ipl_rb_certificates *certs; +static struct ipl_rb_components *comps; +static bool ipl_report_needs_saving; + #define for_each_rb_entry(entry, rb) \ for (entry = rb->entries; \ (void *) entry + sizeof(*entry) <= (void *) rb + rb->len; \ entry++) -static inline bool intersects(unsigned long addr0, unsigned long size0, - unsigned long addr1, unsigned long size1) -{ - return addr0 + size0 > addr1 && addr1 + size1 > addr0; -} - -static unsigned long find_bootdata_space(struct ipl_rb_components *comps, - struct ipl_rb_certificates *certs, - unsigned long safe_addr) +static unsigned long get_cert_comp_list_size(void) { struct ipl_rb_certificate_entry *cert; struct ipl_rb_component_entry *comp; - size_t size; /* * Find the length for the IPL report boot data @@ -44,36 +40,27 @@ static unsigned long find_bootdata_space(struct ipl_rb_components *comps, ipl_cert_list_size = 0; for_each_rb_entry(cert, certs) ipl_cert_list_size += sizeof(unsigned int) + cert->len; - size = ipl_cert_list_size + early_ipl_comp_list_size; + return ipl_cert_list_size + early_ipl_comp_list_size; +} - /* - * Start from safe_addr to find a free memory area large - * enough for the IPL report boot data. This area is used - * for ipl_cert_list_addr/ipl_cert_list_size and - * early_ipl_comp_list_addr/early_ipl_comp_list_size. It must - * not overlap with any component or any certificate. - */ -repeat: - if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && INITRD_START && INITRD_SIZE && - intersects(INITRD_START, INITRD_SIZE, safe_addr, size)) - safe_addr = INITRD_START + INITRD_SIZE; - for_each_rb_entry(comp, comps) - if (intersects(safe_addr, size, comp->addr, comp->len)) { - safe_addr = comp->addr + comp->len; - goto repeat; - } - for_each_rb_entry(cert, certs) - if (intersects(safe_addr, size, cert->addr, cert->len)) { - safe_addr = cert->addr + cert->len; - goto repeat; - } - early_ipl_comp_list_addr = safe_addr; - ipl_cert_list_addr = safe_addr + early_ipl_comp_list_size; +bool ipl_report_certs_intersects(unsigned long addr, unsigned long size, + unsigned long *intersection_start) +{ + struct ipl_rb_certificate_entry *cert; - return safe_addr + size; + if (!ipl_report_needs_saving) + return false; + + for_each_rb_entry(cert, certs) { + if (intersects(addr, size, cert->addr, cert->len)) { + *intersection_start = cert->addr; + return true; + } + } + return false; } -static void copy_components_bootdata(struct ipl_rb_components *comps) +static void copy_components_bootdata(void) { struct ipl_rb_component_entry *comp, *ptr; @@ -82,7 +69,7 @@ static void copy_components_bootdata(struct ipl_rb_components *comps) memcpy(ptr++, comp, sizeof(*ptr)); } -static void copy_certificates_bootdata(struct ipl_rb_certificates *certs) +static void copy_certificates_bootdata(void) { struct ipl_rb_certificate_entry *cert; void *ptr; @@ -96,10 +83,8 @@ static void copy_certificates_bootdata(struct ipl_rb_certificates *certs) } } -unsigned long read_ipl_report(unsigned long safe_addr) +int read_ipl_report(void) { - struct ipl_rb_certificates *certs; - struct ipl_rb_components *comps; struct ipl_pl_hdr *pl_hdr; struct ipl_rl_hdr *rl_hdr; struct ipl_rb_hdr *rb_hdr; @@ -112,7 +97,7 @@ unsigned long read_ipl_report(unsigned long safe_addr) */ if (!ipl_block_valid || !(ipl_block.hdr.flags & IPL_PL_FLAG_IPLSR)) - return safe_addr; + return -1; ipl_secure_flag = !!(ipl_block.hdr.flags & IPL_PL_FLAG_SIPL); /* * There is an IPL report, to find it load the pointer to the @@ -120,7 +105,7 @@ unsigned long read_ipl_report(unsigned long safe_addr) * the IPL parameter list, then align the address to a double * word boundary. */ - tmp = (unsigned long) S390_lowcore.ipl_parmblock_ptr; + tmp = (unsigned long)get_lowcore()->ipl_parmblock_ptr; pl_hdr = (struct ipl_pl_hdr *) tmp; tmp = (tmp + pl_hdr->len + 7) & -8UL; rl_hdr = (struct ipl_rl_hdr *) tmp; @@ -150,16 +135,30 @@ unsigned long read_ipl_report(unsigned long safe_addr) * With either the component list or the certificate list * missing the kernel will stay ignorant of secure IPL. */ - if (!comps || !certs) - return safe_addr; + if (!comps || !certs) { + certs = NULL; + return -1; + } - /* - * Copy component and certificate list to a safe area - * where the decompressed kernel can find them. - */ - safe_addr = find_bootdata_space(comps, certs, safe_addr); - copy_components_bootdata(comps); - copy_certificates_bootdata(certs); + ipl_report_needs_saving = true; + physmem_reserve(RR_IPLREPORT, (unsigned long)pl_hdr, + (unsigned long)rl_end - (unsigned long)pl_hdr); + return 0; +} + +void save_ipl_cert_comp_list(void) +{ + unsigned long size; + + if (!ipl_report_needs_saving) + return; + + size = get_cert_comp_list_size(); + early_ipl_comp_list_addr = physmem_alloc_or_die(RR_CERT_COMP_LIST, size, sizeof(int)); + ipl_cert_list_addr = early_ipl_comp_list_addr + early_ipl_comp_list_size; - return safe_addr; + copy_components_bootdata(); + copy_certificates_bootdata(); + physmem_free(RR_IPLREPORT); + ipl_report_needs_saving = false; } diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c index 5d12352545c5..941f4c9e27cc 100644 --- a/arch/s390/boot/kaslr.c +++ b/arch/s390/boot/kaslr.c @@ -2,12 +2,13 @@ /* * Copyright IBM Corp. 2019 */ -#include <asm/mem_detect.h> -#include <asm/pgtable.h> +#include <linux/pgtable.h> +#include <asm/physmem_info.h> #include <asm/cpacf.h> #include <asm/timex.h> #include <asm/sclp.h> -#include "compressed/decompressor.h" +#include <asm/kasan.h> +#include "decompressor.h" #include "boot.h" #define PRNG_MODE_TDES 1 @@ -31,7 +32,7 @@ struct prng_parm { static int check_prng(void) { if (!cpacf_query_func(CPACF_KMC, CPACF_KMC_PRNG)) { - sclp_early_printk("KASLR disabled: CPU has no PRNG\n"); + boot_warn("KASLR disabled: CPU has no PRNG\n"); return 0; } if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG)) @@ -42,7 +43,7 @@ static int check_prng(void) return PRNG_MODE_TDES; } -static unsigned long get_random(unsigned long limit) +int get_random(unsigned long limit, unsigned long *value) { struct prng_parm prng = { /* initial parameter block for tdes mode, copied from libica */ @@ -75,7 +76,7 @@ static unsigned long get_random(unsigned long limit) *(unsigned long *) prng.parm_block ^= seed; for (i = 0; i < 16; i++) { cpacf_kmc(CPACF_KMC_PRNG, prng.parm_block, - (char *) entropy, (char *) entropy, + (u8 *) entropy, (u8 *) entropy, sizeof(entropy)); memcpy(prng.parm_block, entropy, sizeof(entropy)); } @@ -84,87 +85,114 @@ static unsigned long get_random(unsigned long limit) (u8 *) &random, sizeof(random)); break; default: - random = 0; + return -1; } - return random % limit; + *value = random % limit; + return 0; } -unsigned long get_random_base(unsigned long safe_addr) +static void sort_reserved_ranges(struct reserved_range *res, unsigned long size) { - unsigned long memory_limit = memory_end_set ? memory_end : 0; - unsigned long base, start, end, kernel_size; - unsigned long block_sum, offset; - unsigned long kasan_needs; - int i; + struct reserved_range tmp; + int i, j; - if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && INITRD_START && INITRD_SIZE) { - if (safe_addr < INITRD_START + INITRD_SIZE) - safe_addr = INITRD_START + INITRD_SIZE; + for (i = 1; i < size; i++) { + tmp = res[i]; + for (j = i - 1; j >= 0 && res[j].start > tmp.start; j--) + res[j + 1] = res[j]; + res[j + 1] = tmp; } - safe_addr = ALIGN(safe_addr, THREAD_SIZE); +} - if ((IS_ENABLED(CONFIG_KASAN))) { - /* - * Estimate kasan memory requirements, which it will reserve - * at the very end of available physical memory. To estimate - * that, we take into account that kasan would require - * 1/8 of available physical memory (for shadow memory) + - * creating page tables for the whole memory + shadow memory - * region (1 + 1/8). To keep page tables estimates simple take - * the double of combined ptes size. - */ - memory_limit = get_mem_detect_end(); - if (memory_end_set && memory_limit > memory_end) - memory_limit = memory_end; +static unsigned long iterate_valid_positions(unsigned long size, unsigned long align, + unsigned long _min, unsigned long _max, + struct reserved_range *res, size_t res_count, + bool pos_count, unsigned long find_pos) +{ + unsigned long start, end, tmp_end, range_pos, pos = 0; + struct reserved_range *res_end = res + res_count; + struct reserved_range *skip_res; + int i; - /* for shadow memory */ - kasan_needs = memory_limit / 8; - /* for paging structures */ - kasan_needs += (memory_limit + kasan_needs) / PAGE_SIZE / - _PAGE_ENTRIES * _PAGE_TABLE_SIZE * 2; - memory_limit -= kasan_needs; - } + align = max(align, 8UL); + _min = round_up(_min, align); + for_each_physmem_usable_range(i, &start, &end) { + if (_min >= end) + continue; + start = round_up(start, align); + if (start >= _max) + break; + start = max(_min, start); + end = min(_max, end); - kernel_size = vmlinux.image_size + vmlinux.bss_size; - block_sum = 0; - for_each_mem_detect_block(i, &start, &end) { - if (memory_limit) { - if (start >= memory_limit) + while (start + size <= end) { + /* skip reserved ranges below the start */ + while (res && res->end <= start) { + res++; + if (res >= res_end) + res = NULL; + } + skip_res = NULL; + tmp_end = end; + /* has intersecting reserved range */ + if (res && res->start < end) { + skip_res = res; + tmp_end = res->start; + } + if (start + size <= tmp_end) { + range_pos = (tmp_end - start - size) / align + 1; + if (pos_count) { + pos += range_pos; + } else { + if (range_pos >= find_pos) + return start + (find_pos - 1) * align; + find_pos -= range_pos; + } + } + if (!skip_res) break; - if (end > memory_limit) - end = memory_limit; + start = round_up(skip_res->end, align); } - if (end - start < kernel_size) - continue; - block_sum += end - start - kernel_size; - } - if (!block_sum) { - sclp_early_printk("KASLR disabled: not enough memory\n"); - return 0; } - base = get_random(block_sum); - if (base == 0) + return pos_count ? pos : 0; +} + +/* + * Two types of decompressor memory allocations/reserves are considered + * differently. + * + * "Static" or "single" allocations are done via physmem_alloc_range() and + * physmem_reserve(), and they are listed in physmem_info.reserved[]. Each + * type of "static" allocation can only have one allocation per type and + * cannot have chains. + * + * On the other hand, "dynamic" or "repetitive" allocations are done via + * physmem_alloc_or_die(). These allocations are tightly packed together + * top down from the end of online memory. physmem_alloc_pos represents + * current position where those allocations start. + * + * Functions randomize_within_range() and iterate_valid_positions() + * only consider "dynamic" allocations by never looking above + * physmem_alloc_pos. "Static" allocations, however, are explicitly + * considered by checking the "res" (reserves) array. The first + * reserved_range of a "dynamic" allocation may also be checked along the + * way, but it will always be above the maximum value anyway. + */ +unsigned long randomize_within_range(unsigned long size, unsigned long align, + unsigned long min, unsigned long max) +{ + struct reserved_range res[RR_MAX]; + unsigned long max_pos, pos; + + memcpy(res, physmem_info.reserved, sizeof(res)); + sort_reserved_ranges(res, ARRAY_SIZE(res)); + max = min(max, get_physmem_alloc_pos()); + + max_pos = iterate_valid_positions(size, align, min, max, res, ARRAY_SIZE(res), true, 0); + if (!max_pos) return 0; - if (base < safe_addr) - base = safe_addr; - block_sum = offset = 0; - for_each_mem_detect_block(i, &start, &end) { - if (memory_limit) { - if (start >= memory_limit) - break; - if (end > memory_limit) - end = memory_limit; - } - if (end - start < kernel_size) - continue; - block_sum += end - start - kernel_size; - if (base <= block_sum) { - base = start + base - offset; - base = ALIGN_DOWN(base, THREAD_SIZE); - break; - } - offset = block_sum; - } - return base; + if (get_random(max_pos, &pos)) + return 0; + return iterate_valid_positions(size, align, min, max, res, ARRAY_SIZE(res), false, pos + 1); } diff --git a/arch/s390/boot/kmsan.c b/arch/s390/boot/kmsan.c new file mode 100644 index 000000000000..e7b3ac48143e --- /dev/null +++ b/arch/s390/boot/kmsan.c @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/kmsan-checks.h> + +void kmsan_unpoison_memory(const void *address, size_t size) +{ +} diff --git a/arch/s390/boot/mem_detect.c b/arch/s390/boot/mem_detect.c deleted file mode 100644 index 62e7c13ce85c..000000000000 --- a/arch/s390/boot/mem_detect.c +++ /dev/null @@ -1,175 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <linux/errno.h> -#include <linux/init.h> -#include <asm/sclp.h> -#include <asm/sections.h> -#include <asm/mem_detect.h> -#include <asm/sparsemem.h> -#include "compressed/decompressor.h" -#include "boot.h" - -unsigned long __bootdata(max_physmem_end); -struct mem_detect_info __bootdata(mem_detect); - -/* up to 256 storage elements, 1020 subincrements each */ -#define ENTRIES_EXTENDED_MAX \ - (256 * (1020 / 2) * sizeof(struct mem_detect_block)) - -/* - * To avoid corrupting old kernel memory during dump, find lowest memory - * chunk possible either right after the kernel end (decompressed kernel) or - * after initrd (if it is present and there is no hole between the kernel end - * and initrd) - */ -static void *mem_detect_alloc_extended(void) -{ - unsigned long offset = ALIGN(mem_safe_offset(), sizeof(u64)); - - if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && INITRD_START && INITRD_SIZE && - INITRD_START < offset + ENTRIES_EXTENDED_MAX) - offset = ALIGN(INITRD_START + INITRD_SIZE, sizeof(u64)); - - return (void *)offset; -} - -static struct mem_detect_block *__get_mem_detect_block_ptr(u32 n) -{ - if (n < MEM_INLINED_ENTRIES) - return &mem_detect.entries[n]; - if (unlikely(!mem_detect.entries_extended)) - mem_detect.entries_extended = mem_detect_alloc_extended(); - return &mem_detect.entries_extended[n - MEM_INLINED_ENTRIES]; -} - -/* - * sequential calls to add_mem_detect_block with adjacent memory areas - * are merged together into single memory block. - */ -void add_mem_detect_block(u64 start, u64 end) -{ - struct mem_detect_block *block; - - if (mem_detect.count) { - block = __get_mem_detect_block_ptr(mem_detect.count - 1); - if (block->end == start) { - block->end = end; - return; - } - } - - block = __get_mem_detect_block_ptr(mem_detect.count); - block->start = start; - block->end = end; - mem_detect.count++; -} - -static int __diag260(unsigned long rx1, unsigned long rx2) -{ - register unsigned long _rx1 asm("2") = rx1; - register unsigned long _rx2 asm("3") = rx2; - register unsigned long _ry asm("4") = 0x10; /* storage configuration */ - int rc = -1; /* fail */ - unsigned long reg1, reg2; - psw_t old = S390_lowcore.program_new_psw; - - asm volatile( - " epsw %0,%1\n" - " st %0,%[psw_pgm]\n" - " st %1,%[psw_pgm]+4\n" - " larl %0,1f\n" - " stg %0,%[psw_pgm]+8\n" - " diag %[rx],%[ry],0x260\n" - " ipm %[rc]\n" - " srl %[rc],28\n" - "1:\n" - : "=&d" (reg1), "=&a" (reg2), - [psw_pgm] "=Q" (S390_lowcore.program_new_psw), - [rc] "+&d" (rc), [ry] "+d" (_ry) - : [rx] "d" (_rx1), "d" (_rx2) - : "cc", "memory"); - S390_lowcore.program_new_psw = old; - return rc == 0 ? _ry : -1; -} - -static int diag260(void) -{ - int rc, i; - - struct { - unsigned long start; - unsigned long end; - } storage_extents[8] __aligned(16); /* VM supports up to 8 extends */ - - memset(storage_extents, 0, sizeof(storage_extents)); - rc = __diag260((unsigned long)storage_extents, sizeof(storage_extents)); - if (rc == -1) - return -1; - - for (i = 0; i < min_t(int, rc, ARRAY_SIZE(storage_extents)); i++) - add_mem_detect_block(storage_extents[i].start, storage_extents[i].end + 1); - return 0; -} - -static int tprot(unsigned long addr) -{ - unsigned long pgm_addr; - int rc = -EFAULT; - psw_t old = S390_lowcore.program_new_psw; - - S390_lowcore.program_new_psw.mask = __extract_psw(); - asm volatile( - " larl %[pgm_addr],1f\n" - " stg %[pgm_addr],%[psw_pgm_addr]\n" - " tprot 0(%[addr]),0\n" - " ipm %[rc]\n" - " srl %[rc],28\n" - "1:\n" - : [pgm_addr] "=&d"(pgm_addr), - [psw_pgm_addr] "=Q"(S390_lowcore.program_new_psw.addr), - [rc] "+&d"(rc) - : [addr] "a"(addr) - : "cc", "memory"); - S390_lowcore.program_new_psw = old; - return rc; -} - -static void search_mem_end(void) -{ - unsigned long range = 1 << (MAX_PHYSMEM_BITS - 20); /* in 1MB blocks */ - unsigned long offset = 0; - unsigned long pivot; - - while (range > 1) { - range >>= 1; - pivot = offset + range; - if (!tprot(pivot << 20)) - offset = pivot; - } - - add_mem_detect_block(0, (offset + 1) << 20); -} - -void detect_memory(void) -{ - sclp_early_get_memsize(&max_physmem_end); - - if (!sclp_early_read_storage_info()) { - mem_detect.info_source = MEM_DETECT_SCLP_STOR_INFO; - return; - } - - if (!diag260()) { - mem_detect.info_source = MEM_DETECT_DIAG260; - return; - } - - if (max_physmem_end) { - add_mem_detect_block(0, max_physmem_end); - mem_detect.info_source = MEM_DETECT_SCLP_READ_INFO; - return; - } - - search_mem_end(); - mem_detect.info_source = MEM_DETECT_BIN_SEARCH; - max_physmem_end = get_mem_detect_end(); -} diff --git a/arch/s390/boot/pgm_check.c b/arch/s390/boot/pgm_check.c new file mode 100644 index 000000000000..fa621fa5bc02 --- /dev/null +++ b/arch/s390/boot/pgm_check.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/kernel.h> +#include <linux/stdarg.h> +#include <linux/string.h> +#include <linux/ctype.h> +#include <asm/stacktrace.h> +#include <asm/boot_data.h> +#include <asm/lowcore.h> +#include <asm/setup.h> +#include <asm/sclp.h> +#include <asm/uv.h> +#include "boot.h" + +void print_stacktrace(unsigned long sp) +{ + struct stack_info boot_stack = { STACK_TYPE_TASK, (unsigned long)_stack_start, + (unsigned long)_stack_end }; + bool first = true; + + boot_emerg("Call Trace:\n"); + while (!(sp & 0x7) && on_stack(&boot_stack, sp, sizeof(struct stack_frame))) { + struct stack_frame *sf = (struct stack_frame *)sp; + + if (first) + boot_emerg("(sp:%016lx [<%016lx>] %pS)\n", sp, sf->gprs[8], (void *)sf->gprs[8]); + else + boot_emerg(" sp:%016lx [<%016lx>] %pS\n", sp, sf->gprs[8], (void *)sf->gprs[8]); + if (sf->back_chain <= sp) + break; + sp = sf->back_chain; + first = false; + } +} + +extern struct exception_table_entry __start___ex_table[]; +extern struct exception_table_entry __stop___ex_table[]; + +static inline unsigned long extable_insn(const struct exception_table_entry *x) +{ + return (unsigned long)&x->insn + x->insn; +} + +static bool ex_handler(struct pt_regs *regs) +{ + const struct exception_table_entry *ex; + + for (ex = __start___ex_table; ex < __stop___ex_table; ex++) { + if (extable_insn(ex) != regs->psw.addr) + continue; + if (ex->type != EX_TYPE_FIXUP) + return false; + regs->psw.addr = extable_fixup(ex); + return true; + } + return false; +} + +void do_pgm_check(struct pt_regs *regs) +{ + struct psw_bits *psw = &psw_bits(regs->psw); + unsigned long *gpregs = regs->gprs; + + if (ex_handler(regs)) + return; + if (bootdebug) + boot_rb_dump(); + boot_emerg("Linux version %s\n", kernel_version); + if (!is_prot_virt_guest() && early_command_line[0]) + boot_emerg("Kernel command line: %s\n", early_command_line); + boot_emerg("Kernel fault: interruption code %04x ilc:%d\n", + regs->int_code & 0xffff, regs->int_code >> 17); + if (kaslr_enabled()) { + boot_emerg("Kernel random base: %lx\n", __kaslr_offset); + boot_emerg("Kernel random base phys: %lx\n", __kaslr_offset_phys); + } + boot_emerg("PSW : %016lx %016lx (%pS)\n", + regs->psw.mask, regs->psw.addr, (void *)regs->psw.addr); + boot_emerg(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x P:%x AS:%x CC:%x PM:%x RI:%x EA:%x\n", + psw->per, psw->dat, psw->io, psw->ext, psw->key, psw->mcheck, + psw->wait, psw->pstate, psw->as, psw->cc, psw->pm, psw->ri, psw->eaba); + boot_emerg("GPRS: %016lx %016lx %016lx %016lx\n", gpregs[0], gpregs[1], gpregs[2], gpregs[3]); + boot_emerg(" %016lx %016lx %016lx %016lx\n", gpregs[4], gpregs[5], gpregs[6], gpregs[7]); + boot_emerg(" %016lx %016lx %016lx %016lx\n", gpregs[8], gpregs[9], gpregs[10], gpregs[11]); + boot_emerg(" %016lx %016lx %016lx %016lx\n", gpregs[12], gpregs[13], gpregs[14], gpregs[15]); + print_stacktrace(gpregs[15]); + boot_emerg("Last Breaking-Event-Address:\n"); + boot_emerg(" [<%016lx>] %pS\n", regs->last_break, (void *)regs->last_break); + /* Convert to disabled wait PSW */ + psw->io = 0; + psw->ext = 0; + psw->wait = 1; +} diff --git a/arch/s390/boot/pgm_check_info.c b/arch/s390/boot/pgm_check_info.c deleted file mode 100644 index 83b5b7915c32..000000000000 --- a/arch/s390/boot/pgm_check_info.c +++ /dev/null @@ -1,90 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <linux/kernel.h> -#include <linux/string.h> -#include <asm/lowcore.h> -#include <asm/sclp.h> -#include "boot.h" - -const char hex_asc[] = "0123456789abcdef"; - -#define add_val_as_hex(dst, val) \ - __add_val_as_hex(dst, (const unsigned char *)&val, sizeof(val)) - -static char *__add_val_as_hex(char *dst, const unsigned char *src, size_t count) -{ - while (count--) - dst = hex_byte_pack(dst, *src++); - return dst; -} - -static char *add_str(char *dst, char *src) -{ - strcpy(dst, src); - return dst + strlen(dst); -} - -void print_pgm_check_info(void) -{ - struct psw_bits *psw = &psw_bits(S390_lowcore.psw_save_area); - unsigned short ilc = S390_lowcore.pgm_ilc >> 1; - char buf[256]; - int row, col; - char *p; - - add_str(buf, "Linux version "); - strlcat(buf, kernel_version, sizeof(buf)); - sclp_early_printk(buf); - - p = add_str(buf, "Kernel fault: interruption code "); - p = add_val_as_hex(buf + strlen(buf), S390_lowcore.pgm_code); - p = add_str(p, " ilc:"); - *p++ = hex_asc_lo(ilc); - add_str(p, "\n"); - sclp_early_printk(buf); - - p = add_str(buf, "PSW : "); - p = add_val_as_hex(p, S390_lowcore.psw_save_area.mask); - p = add_str(p, " "); - p = add_val_as_hex(p, S390_lowcore.psw_save_area.addr); - add_str(p, "\n"); - sclp_early_printk(buf); - - p = add_str(buf, " R:"); - *p++ = hex_asc_lo(psw->per); - p = add_str(p, " T:"); - *p++ = hex_asc_lo(psw->dat); - p = add_str(p, " IO:"); - *p++ = hex_asc_lo(psw->io); - p = add_str(p, " EX:"); - *p++ = hex_asc_lo(psw->ext); - p = add_str(p, " Key:"); - *p++ = hex_asc_lo(psw->key); - p = add_str(p, " M:"); - *p++ = hex_asc_lo(psw->mcheck); - p = add_str(p, " W:"); - *p++ = hex_asc_lo(psw->wait); - p = add_str(p, " P:"); - *p++ = hex_asc_lo(psw->pstate); - p = add_str(p, " AS:"); - *p++ = hex_asc_lo(psw->as); - p = add_str(p, " CC:"); - *p++ = hex_asc_lo(psw->cc); - p = add_str(p, " PM:"); - *p++ = hex_asc_lo(psw->pm); - p = add_str(p, " RI:"); - *p++ = hex_asc_lo(psw->ri); - p = add_str(p, " EA:"); - *p++ = hex_asc_lo(psw->eaba); - add_str(p, "\n"); - sclp_early_printk(buf); - - for (row = 0; row < 4; row++) { - p = add_str(buf, row == 0 ? "GPRS:" : " "); - for (col = 0; col < 4; col++) { - p = add_str(p, " "); - p = add_val_as_hex(p, S390_lowcore.gpregs_save_area[row * 4 + col]); - } - add_str(p, "\n"); - sclp_early_printk(buf); - } -} diff --git a/arch/s390/boot/physmem_info.c b/arch/s390/boot/physmem_info.c new file mode 100644 index 000000000000..45e3d057cfaa --- /dev/null +++ b/arch/s390/boot/physmem_info.c @@ -0,0 +1,388 @@ +// SPDX-License-Identifier: GPL-2.0 +#define boot_fmt(fmt) "physmem: " fmt +#include <linux/processor.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <asm/physmem_info.h> +#include <asm/stacktrace.h> +#include <asm/boot_data.h> +#include <asm/sparsemem.h> +#include <asm/sections.h> +#include <asm/setup.h> +#include <asm/sclp.h> +#include <asm/asm.h> +#include <asm/uv.h> +#include "decompressor.h" +#include "boot.h" + +struct physmem_info __bootdata(physmem_info); +static unsigned int physmem_alloc_ranges; +static unsigned long physmem_alloc_pos; + +/* up to 256 storage elements, 1020 subincrements each */ +#define ENTRIES_EXTENDED_MAX \ + (256 * (1020 / 2) * sizeof(struct physmem_range)) + +static struct physmem_range *__get_physmem_range_ptr(u32 n) +{ + if (n < MEM_INLINED_ENTRIES) + return &physmem_info.online[n]; + if (unlikely(!physmem_info.online_extended)) { + physmem_info.online_extended = (struct physmem_range *)physmem_alloc_range( + RR_MEM_DETECT_EXT, ENTRIES_EXTENDED_MAX, sizeof(long), 0, + physmem_alloc_pos, true); + } + return &physmem_info.online_extended[n - MEM_INLINED_ENTRIES]; +} + +/* + * sequential calls to add_physmem_online_range with adjacent memory ranges + * are merged together into single memory range. + */ +void add_physmem_online_range(u64 start, u64 end) +{ + struct physmem_range *range; + + if (physmem_info.range_count) { + range = __get_physmem_range_ptr(physmem_info.range_count - 1); + if (range->end == start) { + range->end = end; + return; + } + } + + range = __get_physmem_range_ptr(physmem_info.range_count); + range->start = start; + range->end = end; + physmem_info.range_count++; +} + +static int __diag260(unsigned long rx1, unsigned long rx2) +{ + union register_pair rx; + int cc, exception; + unsigned long ry; + + rx.even = rx1; + rx.odd = rx2; + ry = 0x10; /* storage configuration */ + exception = 1; + asm_inline volatile( + " diag %[rx],%[ry],0x260\n" + "0: lhi %[exc],0\n" + "1:\n" + CC_IPM(cc) + EX_TABLE(0b, 1b) + : CC_OUT(cc, cc), [exc] "+d" (exception), [ry] "+d" (ry) + : [rx] "d" (rx.pair) + : CC_CLOBBER_LIST("memory")); + cc = exception ? -1 : CC_TRANSFORM(cc); + return cc == 0 ? ry : -1; +} + +static int diag260(void) +{ + int rc, i; + + struct { + unsigned long start; + unsigned long end; + } storage_extents[8] __aligned(16); /* VM supports up to 8 extends */ + + memset(storage_extents, 0, sizeof(storage_extents)); + rc = __diag260((unsigned long)storage_extents, sizeof(storage_extents)); + if (rc == -1) + return -1; + + for (i = 0; i < min_t(int, rc, ARRAY_SIZE(storage_extents)); i++) + add_physmem_online_range(storage_extents[i].start, storage_extents[i].end + 1); + return 0; +} + +#define DIAG500_SC_STOR_LIMIT 4 + +static int diag500_storage_limit(unsigned long *max_physmem_end) +{ + unsigned long storage_limit; + + asm_inline volatile( + " lghi %%r1,%[subcode]\n" + " lghi %%r2,0\n" + " diag %%r2,%%r4,0x500\n" + "0: lgr %[slimit],%%r2\n" + EX_TABLE(0b, 0b) + : [slimit] "=d" (storage_limit) + : [subcode] "i" (DIAG500_SC_STOR_LIMIT) + : "memory", "1", "2"); + if (!storage_limit) + return -EINVAL; + /* Convert inclusive end to exclusive end */ + *max_physmem_end = storage_limit + 1; + return 0; +} + +static int tprot(unsigned long addr) +{ + int cc, exception; + + exception = 1; + asm_inline volatile( + " tprot 0(%[addr]),0\n" + "0: lhi %[exc],0\n" + "1:\n" + CC_IPM(cc) + EX_TABLE(0b, 1b) + : CC_OUT(cc, cc), [exc] "+d" (exception) + : [addr] "a" (addr) + : CC_CLOBBER_LIST("memory")); + cc = exception ? -EFAULT : CC_TRANSFORM(cc); + return cc; +} + +static unsigned long search_mem_end(void) +{ + unsigned long range = 1 << (MAX_PHYSMEM_BITS - 20); /* in 1MB blocks */ + unsigned long offset = 0; + unsigned long pivot; + + while (range > 1) { + range >>= 1; + pivot = offset + range; + if (!tprot(pivot << 20)) + offset = pivot; + } + return (offset + 1) << 20; +} + +unsigned long detect_max_physmem_end(void) +{ + unsigned long max_physmem_end = 0; + + if (!diag500_storage_limit(&max_physmem_end)) { + physmem_info.info_source = MEM_DETECT_DIAG500_STOR_LIMIT; + } else if (!sclp_early_get_memsize(&max_physmem_end)) { + physmem_info.info_source = MEM_DETECT_SCLP_READ_INFO; + } else { + max_physmem_end = search_mem_end(); + physmem_info.info_source = MEM_DETECT_BIN_SEARCH; + } + boot_debug("Max physical memory: 0x%016lx (info source: %s)\n", max_physmem_end, + get_physmem_info_source()); + return max_physmem_end; +} + +void detect_physmem_online_ranges(unsigned long max_physmem_end) +{ + unsigned long start, end; + int i; + + if (!sclp_early_read_storage_info()) { + physmem_info.info_source = MEM_DETECT_SCLP_STOR_INFO; + } else if (physmem_info.info_source == MEM_DETECT_DIAG500_STOR_LIMIT) { + unsigned long online_end; + + if (!sclp_early_get_memsize(&online_end)) { + physmem_info.info_source = MEM_DETECT_SCLP_READ_INFO; + add_physmem_online_range(0, online_end); + } + } else if (!diag260()) { + physmem_info.info_source = MEM_DETECT_DIAG260; + } else if (max_physmem_end) { + add_physmem_online_range(0, max_physmem_end); + } + boot_debug("Online memory ranges (info source: %s):\n", get_physmem_info_source()); + for_each_physmem_online_range(i, &start, &end) + boot_debug(" online [%d]: 0x%016lx-0x%016lx\n", i, start, end); +} + +void physmem_set_usable_limit(unsigned long limit) +{ + physmem_info.usable = limit; + physmem_alloc_pos = limit; + boot_debug("Usable memory limit: 0x%016lx\n", limit); +} + +static void die_oom(unsigned long size, unsigned long align, unsigned long min, unsigned long max) +{ + unsigned long start, end, total_mem = 0, total_reserved_mem = 0; + struct reserved_range *range; + enum reserved_range_type t; + int i; + + boot_emerg("Linux version %s\n", kernel_version); + if (!is_prot_virt_guest() && early_command_line[0]) + boot_emerg("Kernel command line: %s\n", early_command_line); + boot_emerg("Out of memory allocating %lu bytes 0x%lx aligned in range %lx:%lx\n", + size, align, min, max); + boot_emerg("Reserved memory ranges:\n"); + for_each_physmem_reserved_range(t, range, &start, &end) { + boot_emerg("%016lx %016lx %s\n", start, end, get_rr_type_name(t)); + total_reserved_mem += end - start; + } + boot_emerg("Usable online memory ranges (info source: %s [%d]):\n", + get_physmem_info_source(), physmem_info.info_source); + for_each_physmem_usable_range(i, &start, &end) { + boot_emerg("%016lx %016lx\n", start, end); + total_mem += end - start; + } + boot_emerg("Usable online memory total: %lu Reserved: %lu Free: %lu\n", + total_mem, total_reserved_mem, + total_mem > total_reserved_mem ? total_mem - total_reserved_mem : 0); + print_stacktrace(current_frame_address()); + boot_emerg(" -- System halted\n"); + disabled_wait(); +} + +static void _physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size) +{ + physmem_info.reserved[type].start = addr; + physmem_info.reserved[type].end = addr + size; +} + +void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size) +{ + _physmem_reserve(type, addr, size); + boot_debug("%-14s 0x%016lx-0x%016lx %s\n", "Reserve:", addr, addr + size, + get_rr_type_name(type)); +} + +void physmem_free(enum reserved_range_type type) +{ + boot_debug("%-14s 0x%016lx-0x%016lx %s\n", "Free:", physmem_info.reserved[type].start, + physmem_info.reserved[type].end, get_rr_type_name(type)); + physmem_info.reserved[type].start = 0; + physmem_info.reserved[type].end = 0; +} + +static bool __physmem_alloc_intersects(unsigned long addr, unsigned long size, + unsigned long *intersection_start) +{ + unsigned long res_addr, res_size; + int t; + + for (t = 0; t < RR_MAX; t++) { + if (!get_physmem_reserved(t, &res_addr, &res_size)) + continue; + if (intersects(addr, size, res_addr, res_size)) { + *intersection_start = res_addr; + return true; + } + } + return ipl_report_certs_intersects(addr, size, intersection_start); +} + +static unsigned long __physmem_alloc_range(unsigned long size, unsigned long align, + unsigned long min, unsigned long max, + unsigned int from_ranges, unsigned int *ranges_left, + bool die_on_oom) +{ + unsigned int nranges = from_ranges ?: physmem_info.range_count; + unsigned long range_start, range_end; + unsigned long intersection_start; + unsigned long addr, pos = max; + + align = max(align, 8UL); + while (nranges) { + __get_physmem_range(nranges - 1, &range_start, &range_end, false); + pos = min(range_end, pos); + + if (round_up(min, align) + size > pos) + break; + addr = round_down(pos - size, align); + if (range_start > addr) { + nranges--; + continue; + } + if (__physmem_alloc_intersects(addr, size, &intersection_start)) { + pos = intersection_start; + continue; + } + + if (ranges_left) + *ranges_left = nranges; + return addr; + } + if (die_on_oom) + die_oom(size, align, min, max); + return 0; +} + +unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long size, + unsigned long align, unsigned long min, unsigned long max, + bool die_on_oom) +{ + unsigned long addr; + + max = min(max, physmem_alloc_pos); + addr = __physmem_alloc_range(size, align, min, max, 0, NULL, die_on_oom); + if (addr) + _physmem_reserve(type, addr, size); + boot_debug("%-14s 0x%016lx-0x%016lx %s\n", "Alloc range:", addr, addr + size, + get_rr_type_name(type)); + return addr; +} + +unsigned long physmem_alloc(enum reserved_range_type type, unsigned long size, + unsigned long align, bool die_on_oom) +{ + struct reserved_range *range = &physmem_info.reserved[type]; + struct reserved_range *new_range = NULL; + unsigned int ranges_left; + unsigned long addr; + + addr = __physmem_alloc_range(size, align, 0, physmem_alloc_pos, physmem_alloc_ranges, + &ranges_left, die_on_oom); + if (!addr) + return 0; + /* if not a consecutive allocation of the same type or first allocation */ + if (range->start != addr + size) { + if (range->end) { + addr = __physmem_alloc_range(sizeof(struct reserved_range), 0, 0, + physmem_alloc_pos, physmem_alloc_ranges, + &ranges_left, true); + new_range = (struct reserved_range *)addr; + addr = __physmem_alloc_range(size, align, 0, addr, ranges_left, + &ranges_left, die_on_oom); + if (!addr) + return 0; + *new_range = *range; + range->chain = new_range; + } + range->end = addr + size; + } + if (type != RR_VMEM) { + boot_debug("%-14s 0x%016lx-0x%016lx %-20s align 0x%lx split %d\n", "Alloc topdown:", + addr, addr + size, get_rr_type_name(type), align, !!new_range); + } + range->start = addr; + physmem_alloc_pos = addr; + physmem_alloc_ranges = ranges_left; + return addr; +} + +unsigned long physmem_alloc_or_die(enum reserved_range_type type, unsigned long size, + unsigned long align) +{ + return physmem_alloc(type, size, align, true); +} + +unsigned long get_physmem_alloc_pos(void) +{ + return physmem_alloc_pos; +} + +void dump_physmem_reserved(void) +{ + struct reserved_range *range; + enum reserved_range_type t; + unsigned long start, end; + + boot_debug("Reserved memory ranges:\n"); + for_each_physmem_reserved_range(t, range, &start, &end) { + if (end) { + boot_debug("%-14s 0x%016lx-0x%016lx @%012lx chain %012lx\n", + get_rr_type_name(t), start, end, (unsigned long)range, + (unsigned long)range->chain); + } + } +} diff --git a/arch/s390/boot/printk.c b/arch/s390/boot/printk.c new file mode 100644 index 000000000000..8cf6331bc060 --- /dev/null +++ b/arch/s390/boot/printk.c @@ -0,0 +1,298 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/kernel.h> +#include <linux/stdarg.h> +#include <linux/string.h> +#include <linux/ctype.h> +#include <asm/stacktrace.h> +#include <asm/boot_data.h> +#include <asm/sections.h> +#include <asm/lowcore.h> +#include <asm/setup.h> +#include <asm/timex.h> +#include <asm/sclp.h> +#include <asm/uv.h> +#include "boot.h" + +int boot_console_loglevel = CONFIG_CONSOLE_LOGLEVEL_DEFAULT; +bool boot_ignore_loglevel; +char __bootdata(boot_rb)[PAGE_SIZE * 2]; +bool __bootdata(boot_earlyprintk); +size_t __bootdata(boot_rb_off); +char __bootdata(bootdebug_filter)[128]; +bool __bootdata(bootdebug); + +static void boot_rb_add(const char *str, size_t len) +{ + /* leave double '\0' in the end */ + size_t avail = sizeof(boot_rb) - boot_rb_off - 1; + + /* store strings separated by '\0' */ + if (len + 1 > avail) + boot_rb_off = 0; + strcpy(boot_rb + boot_rb_off, str); + boot_rb_off += len + 1; +} + +static void print_rb_entry(const char *str) +{ + sclp_early_printk(printk_skip_level(str)); +} + +static bool debug_messages_printed(void) +{ + return boot_earlyprintk && (boot_ignore_loglevel || boot_console_loglevel > LOGLEVEL_DEBUG); +} + +void boot_rb_dump(void) +{ + if (debug_messages_printed()) + return; + sclp_early_printk("Boot messages ring buffer:\n"); + boot_rb_foreach(print_rb_entry); +} + +const char hex_asc[] = "0123456789abcdef"; + +static char *as_hex(char *dst, unsigned long val, int pad) +{ + char *p = dst + max(pad, (int)__fls(val | 1) / 4 + 1); + + for (*p-- = '\0'; p >= dst; val >>= 4) + *p-- = hex_asc[val & 0x0f]; + return dst; +} + +#define MAX_NUMLEN 21 +static char *as_dec(char *buf, unsigned long val, bool is_signed) +{ + bool negative = false; + char *p = buf + MAX_NUMLEN; + + if (is_signed && (long)val < 0) { + val = (val == LONG_MIN ? LONG_MIN : -(long)val); + negative = true; + } + + *--p = '\0'; + do { + *--p = '0' + (val % 10); + val /= 10; + } while (val); + + if (negative) + *--p = '-'; + return p; +} + +static ssize_t strpad(char *dst, size_t dst_size, const char *src, + int _pad, bool zero_pad, bool decimal) +{ + ssize_t len = strlen(src), pad = _pad; + char *p = dst; + + if (max(len, abs(pad)) >= dst_size) + return -E2BIG; + + if (pad > len) { + if (decimal && zero_pad && *src == '-') { + *p++ = '-'; + src++; + len--; + pad--; + } + memset(p, zero_pad ? '0' : ' ', pad - len); + p += pad - len; + } + memcpy(p, src, len); + p += len; + if (pad < 0 && -pad > len) { + memset(p, ' ', -pad - len); + p += -pad - len; + } + *p = '\0'; + return p - dst; +} + +static char *symstart(char *p) +{ + while (*p) + p--; + return p + 1; +} + +static noinline char *findsym(unsigned long ip, unsigned short *off, unsigned short *len) +{ + /* symbol entries are in a form "10000 c4 startup\0" */ + char *a = _decompressor_syms_start; + char *b = _decompressor_syms_end; + unsigned long start; + unsigned long size; + char *pivot; + char *endp; + + while (a < b) { + pivot = symstart(a + (b - a) / 2); + start = simple_strtoull(pivot, &endp, 16); + size = simple_strtoull(endp + 1, &endp, 16); + if (ip < start) { + b = pivot; + continue; + } + if (ip > start + size) { + a = pivot + strlen(pivot) + 1; + continue; + } + *off = ip - start; + *len = size; + return endp + 1; + } + return NULL; +} + +#define MAX_SYMLEN 64 +static noinline char *strsym(char *buf, void *ip) +{ + unsigned short off; + unsigned short len; + char *p; + + p = findsym((unsigned long)ip, &off, &len); + if (p) { + strncpy(buf, p, MAX_SYMLEN); + /* reserve 15 bytes for offset/len in symbol+0x1234/0x1234 */ + p = buf + strnlen(buf, MAX_SYMLEN - 15); + strcpy(p, "+0x"); + as_hex(p + 3, off, 0); + strcat(p, "/0x"); + as_hex(p + strlen(p), len, 0); + } else { + as_hex(buf, (unsigned long)ip, 16); + } + return buf; +} + +static inline int printk_loglevel(const char *buf) +{ + if (buf[0] == KERN_SOH_ASCII && buf[1]) { + switch (buf[1]) { + case '0' ... '7': + return buf[1] - '0'; + } + } + return MESSAGE_LOGLEVEL_DEFAULT; +} + +static void boot_console_earlyprintk(const char *buf) +{ + int level = printk_loglevel(buf); + + /* always print emergency messages */ + if (level > LOGLEVEL_EMERG && !boot_earlyprintk) + return; + buf = printk_skip_level(buf); + /* print debug messages only when bootdebug is enabled */ + if (level == LOGLEVEL_DEBUG && (!bootdebug || !bootdebug_filter_match(skip_timestamp(buf)))) + return; + if (boot_ignore_loglevel || level < boot_console_loglevel) + sclp_early_printk(buf); +} + +static char *add_timestamp(char *buf) +{ +#ifdef CONFIG_PRINTK_TIME + unsigned long ns = tod_to_ns(__get_tod_clock_monotonic()); + char ts[MAX_NUMLEN]; + + *buf++ = '['; + buf += strpad(buf, MAX_NUMLEN, as_dec(ts, ns / NSEC_PER_SEC, 0), 5, 0, 0); + *buf++ = '.'; + buf += strpad(buf, MAX_NUMLEN, as_dec(ts, (ns % NSEC_PER_SEC) / NSEC_PER_USEC, 0), 6, 1, 0); + *buf++ = ']'; + *buf++ = ' '; +#endif + return buf; +} + +#define va_arg_len_type(args, lenmod, typemod) \ + ((lenmod == 'l') ? va_arg(args, typemod long) : \ + (lenmod == 'h') ? (typemod short)va_arg(args, typemod int) : \ + (lenmod == 'H') ? (typemod char)va_arg(args, typemod int) : \ + (lenmod == 'z') ? va_arg(args, typemod long) : \ + va_arg(args, typemod int)) + +int boot_printk(const char *fmt, ...) +{ + char buf[1024] = { 0 }; + char *end = buf + sizeof(buf) - 1; /* make sure buf is 0 terminated */ + bool zero_pad, decimal; + char *strval, *p = buf; + char valbuf[MAX(MAX_SYMLEN, MAX_NUMLEN)]; + va_list args; + char lenmod; + ssize_t len; + int pad; + + *p++ = KERN_SOH_ASCII; + *p++ = printk_get_level(fmt) ?: '0' + MESSAGE_LOGLEVEL_DEFAULT; + p = add_timestamp(p); + fmt = printk_skip_level(fmt); + + va_start(args, fmt); + for (; p < end && *fmt; fmt++) { + if (*fmt != '%') { + *p++ = *fmt; + continue; + } + if (*++fmt == '%') { + *p++ = '%'; + continue; + } + zero_pad = (*fmt == '0'); + pad = simple_strtol(fmt, (char **)&fmt, 10); + lenmod = (*fmt == 'h' || *fmt == 'l' || *fmt == 'z') ? *fmt++ : 0; + if (lenmod == 'h' && *fmt == 'h') { + lenmod = 'H'; + fmt++; + } + decimal = false; + switch (*fmt) { + case 's': + if (lenmod) + goto out; + strval = va_arg(args, char *); + zero_pad = false; + break; + case 'p': + if (*++fmt != 'S' || lenmod) + goto out; + strval = strsym(valbuf, va_arg(args, void *)); + zero_pad = false; + break; + case 'd': + case 'i': + strval = as_dec(valbuf, va_arg_len_type(args, lenmod, signed), 1); + decimal = true; + break; + case 'u': + strval = as_dec(valbuf, va_arg_len_type(args, lenmod, unsigned), 0); + break; + case 'x': + strval = as_hex(valbuf, va_arg_len_type(args, lenmod, unsigned), 0); + break; + default: + goto out; + } + len = strpad(p, end - p, strval, pad, zero_pad, decimal); + if (len == -E2BIG) + break; + p += len; + } +out: + va_end(args); + len = strlen(buf); + if (len) { + boot_rb_add(buf, len); + boot_console_earlyprintk(buf); + } + return len; +} diff --git a/arch/s390/boot/sclp_early_core.c b/arch/s390/boot/sclp_early_core.c index 5a19fd7020b5..6f30646afbd0 100644 --- a/arch/s390/boot/sclp_early_core.c +++ b/arch/s390/boot/sclp_early_core.c @@ -1,2 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 +#include "boot.h" #include "../../../drivers/s390/char/sclp_early_core.c" + +/* SCLP early buffer must stay page-aligned and below 2GB */ +static char __sclp_early_sccb[EXT_SCCB_READ_SCP] __aligned(PAGE_SIZE); + +void sclp_early_setup_buffer(void) +{ + sclp_early_set_buffer(&__sclp_early_sccb); +} diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 3b3a11f95269..06316fb8e0fa 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -1,82 +1,205 @@ // SPDX-License-Identifier: GPL-2.0 +#define boot_fmt(fmt) "startup: " fmt #include <linux/string.h> #include <linux/elf.h> +#include <asm/page-states.h> +#include <asm/boot_data.h> +#include <asm/extmem.h> #include <asm/sections.h> +#include <asm/maccess.h> +#include <asm/machine.h> +#include <asm/sysinfo.h> +#include <asm/cpu_mf.h> #include <asm/setup.h> +#include <asm/timex.h> +#include <asm/kasan.h> #include <asm/kexec.h> #include <asm/sclp.h> #include <asm/diag.h> #include <asm/uv.h> -#include "compressed/decompressor.h" +#include <asm/abs_lowcore.h> +#include <asm/physmem_info.h> +#include "decompressor.h" #include "boot.h" +#include "uv.h" -extern char __boot_data_start[], __boot_data_end[]; -extern char __boot_data_preserved_start[], __boot_data_preserved_end[]; -unsigned long __bootdata_preserved(__kaslr_offset); +struct vm_layout __bootdata_preserved(vm_layout); +unsigned long __bootdata_preserved(__abs_lowcore); +unsigned long __bootdata_preserved(__memcpy_real_area); +pte_t *__bootdata_preserved(memcpy_real_ptep); +unsigned long __bootdata_preserved(VMALLOC_START); +unsigned long __bootdata_preserved(VMALLOC_END); +struct page *__bootdata_preserved(vmemmap); +unsigned long __bootdata_preserved(vmemmap_size); +unsigned long __bootdata_preserved(MODULES_VADDR); +unsigned long __bootdata_preserved(MODULES_END); +unsigned long __bootdata_preserved(max_mappable); +unsigned long __bootdata_preserved(page_noexec_mask); +unsigned long __bootdata_preserved(segment_noexec_mask); +unsigned long __bootdata_preserved(region_noexec_mask); +union tod_clock __bootdata_preserved(tod_clock_base); +u64 __bootdata_preserved(clock_comparator_max) = -1UL; -/* - * Some code and data needs to stay below 2 GB, even when the kernel would be - * relocated above 2 GB, because it has to use 31 bit addresses. - * Such code and data is part of the .dma section, and its location is passed - * over to the decompressed / relocated kernel via the .boot.preserved.data - * section. - */ -extern char _sdma[], _edma[]; -extern char _stext_dma[], _etext_dma[]; -extern struct exception_table_entry _start_dma_ex_table[]; -extern struct exception_table_entry _stop_dma_ex_table[]; -unsigned long __bootdata_preserved(__sdma) = __pa(&_sdma); -unsigned long __bootdata_preserved(__edma) = __pa(&_edma); -unsigned long __bootdata_preserved(__stext_dma) = __pa(&_stext_dma); -unsigned long __bootdata_preserved(__etext_dma) = __pa(&_etext_dma); -struct exception_table_entry * - __bootdata_preserved(__start_dma_ex_table) = _start_dma_ex_table; -struct exception_table_entry * - __bootdata_preserved(__stop_dma_ex_table) = _stop_dma_ex_table; - -int _diag210_dma(struct diag210 *addr); -int _diag26c_dma(void *req, void *resp, enum diag26c_sc subcode); -int _diag14_dma(unsigned long rx, unsigned long ry1, unsigned long subcode); -void _diag0c_dma(struct hypfs_diag0c_entry *entry); -void _diag308_reset_dma(void); -struct diag_ops __bootdata_preserved(diag_dma_ops) = { - .diag210 = _diag210_dma, - .diag26c = _diag26c_dma, - .diag14 = _diag14_dma, - .diag0c = _diag0c_dma, - .diag308_reset = _diag308_reset_dma -}; -static struct diag210 _diag210_tmp_dma __section(.dma.data); -struct diag210 *__bootdata_preserved(__diag210_tmp_dma) = &_diag210_tmp_dma; -void _swsusp_reset_dma(void); -unsigned long __bootdata_preserved(__swsusp_reset_dma) = __pa(_swsusp_reset_dma); +u64 __bootdata_preserved(stfle_fac_list[16]); +struct oldmem_data __bootdata_preserved(oldmem_data); void error(char *x) { - sclp_early_printk("\n\n"); - sclp_early_printk(x); - sclp_early_printk("\n\n -- System halted"); - + boot_emerg("%s\n", x); + boot_emerg(" -- System halted\n"); disabled_wait(); } +static char sysinfo_page[PAGE_SIZE] __aligned(PAGE_SIZE); + +static void detect_machine_type(void) +{ + struct sysinfo_3_2_2 *vmms = (struct sysinfo_3_2_2 *)&sysinfo_page; + + /* Check current-configuration-level */ + if (stsi(NULL, 0, 0, 0) <= 2) { + set_machine_feature(MFEATURE_LPAR); + return; + } + /* Get virtual-machine cpu information. */ + if (stsi(vmms, 3, 2, 2) || !vmms->count) + return; + /* Detect known hypervisors */ + if (!memcmp(vmms->vm[0].cpi, "\xd2\xe5\xd4", 3)) + set_machine_feature(MFEATURE_KVM); + else if (!memcmp(vmms->vm[0].cpi, "\xa9\x61\xe5\xd4", 4)) + set_machine_feature(MFEATURE_VM); +} + +static void detect_diag9c(void) +{ + unsigned int cpu; + int rc = 1; + + cpu = stap(); + asm_inline volatile( + " diag %[cpu],%%r0,0x9c\n" + "0: lhi %[rc],0\n" + "1:\n" + EX_TABLE(0b, 1b) + : [rc] "+d" (rc) + : [cpu] "d" (cpu) + : "cc", "memory"); + if (!rc) + set_machine_feature(MFEATURE_DIAG9C); +} + +static void reset_tod_clock(void) +{ + union tod_clock clk; + + if (store_tod_clock_ext_cc(&clk) == 0) + return; + /* TOD clock not running. Set the clock to Unix Epoch. */ + if (set_tod_clock(TOD_UNIX_EPOCH) || store_tod_clock_ext_cc(&clk)) + disabled_wait(); + memset(&tod_clock_base, 0, sizeof(tod_clock_base)); + tod_clock_base.tod = TOD_UNIX_EPOCH; + get_lowcore()->last_update_clock = TOD_UNIX_EPOCH; +} + +static void detect_facilities(void) +{ + if (cpu_has_edat1()) + local_ctl_set_bit(0, CR0_EDAT_BIT); + page_noexec_mask = -1UL; + segment_noexec_mask = -1UL; + region_noexec_mask = -1UL; + if (!cpu_has_nx()) { + page_noexec_mask &= ~_PAGE_NOEXEC; + segment_noexec_mask &= ~_SEGMENT_ENTRY_NOEXEC; + region_noexec_mask &= ~_REGION_ENTRY_NOEXEC; + } + if (IS_ENABLED(CONFIG_PCI) && test_facility(153)) + set_machine_feature(MFEATURE_PCI_MIO); + reset_tod_clock(); + if (test_facility(139) && (tod_clock_base.tod >> 63)) { + /* Enable signed clock comparator comparisons */ + set_machine_feature(MFEATURE_SCC); + clock_comparator_max = -1UL >> 1; + local_ctl_set_bit(0, CR0_CLOCK_COMPARATOR_SIGN_BIT); + } + if (test_facility(50) && test_facility(73)) { + set_machine_feature(MFEATURE_TX); + local_ctl_set_bit(0, CR0_TRANSACTIONAL_EXECUTION_BIT); + } + if (cpu_has_vx()) + local_ctl_set_bit(0, CR0_VECTOR_BIT); +} + +static int cmma_test_essa(void) +{ + unsigned long tmp = 0; + int rc = 1; + + /* Test ESSA_GET_STATE */ + asm_inline volatile( + " .insn rrf,0xb9ab0000,%[tmp],%[tmp],%[cmd],0\n" + "0: lhi %[rc],0\n" + "1:\n" + EX_TABLE(0b, 1b) + : [rc] "+d" (rc), [tmp] "+d" (tmp) + : [cmd] "i" (ESSA_GET_STATE) + : "cc", "memory"); + return rc; +} + +static void cmma_init(void) +{ + if (!cmma_flag) + return; + if (cmma_test_essa()) { + cmma_flag = 0; + return; + } + if (test_facility(147)) + cmma_flag = 2; +} + +static void setup_lpp(void) +{ + get_lowcore()->current_pid = 0; + get_lowcore()->lpp = LPP_MAGIC; + if (test_facility(40)) + lpp(&get_lowcore()->lpp); +} + #ifdef CONFIG_KERNEL_UNCOMPRESSED -unsigned long mem_safe_offset(void) +static unsigned long mem_safe_offset(void) +{ + return (unsigned long)_compressed_start; +} + +static void deploy_kernel(void *output) { - return vmlinux.default_lma + vmlinux.image_size + vmlinux.bss_size; + void *uncompressed_start = (void *)_compressed_start; + + if (output == uncompressed_start) + return; + memmove(output, uncompressed_start, vmlinux.image_size); + memset(uncompressed_start, 0, vmlinux.image_size); } #endif -static void rescue_initrd(unsigned long addr) +static void rescue_initrd(unsigned long min, unsigned long max) { + unsigned long old_addr, addr, size; + if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD)) return; - if (!INITRD_START || !INITRD_SIZE) + if (!get_physmem_reserved(RR_INITRD, &addr, &size)) return; - if (addr <= INITRD_START) + if (addr >= min && addr + size <= max) return; - memmove((void *)addr, (void *)INITRD_START, INITRD_SIZE); - INITRD_START = addr; + old_addr = addr; + physmem_free(RR_INITRD); + addr = physmem_alloc_or_die(RR_INITRD, size, 0); + memmove((void *)addr, (void *)old_addr, size); } static void copy_bootdata(void) @@ -89,95 +212,418 @@ static void copy_bootdata(void) memcpy((void *)vmlinux.bootdata_preserved_off, __boot_data_preserved_start, vmlinux.bootdata_preserved_size); } -static void handle_relocs(unsigned long offset) +static void kaslr_adjust_relocs(unsigned long min_addr, unsigned long max_addr, + unsigned long offset, unsigned long phys_offset) +{ + int *reloc; + long loc; + + /* Adjust R_390_64 relocations */ + for (reloc = (int *)__vmlinux_relocs_64_start; reloc < (int *)__vmlinux_relocs_64_end; reloc++) { + loc = (long)*reloc + phys_offset; + if (loc < min_addr || loc > max_addr) + error("64-bit relocation outside of kernel!\n"); + *(u64 *)loc += offset; + } +} + +static void kaslr_adjust_got(unsigned long offset) { - Elf64_Rela *rela_start, *rela_end, *rela; - int r_type, r_sym, rc; - Elf64_Addr loc, val; - Elf64_Sym *dynsym; + u64 *entry; + + /* + * Adjust GOT entries, except for ones for undefined weak symbols + * that resolved to zero. This also skips the first three reserved + * entries on s390x that are zero. + */ + for (entry = (u64 *)vmlinux.got_start; entry < (u64 *)vmlinux.got_end; entry++) { + if (*entry) + *entry += offset; + } +} - rela_start = (Elf64_Rela *) vmlinux.rela_dyn_start; - rela_end = (Elf64_Rela *) vmlinux.rela_dyn_end; - dynsym = (Elf64_Sym *) vmlinux.dynsym_start; - for (rela = rela_start; rela < rela_end; rela++) { - loc = rela->r_offset + offset; - val = rela->r_addend; - r_sym = ELF64_R_SYM(rela->r_info); - if (r_sym) { - if (dynsym[r_sym].st_shndx != SHN_UNDEF) - val += dynsym[r_sym].st_value + offset; +/* + * Merge information from several sources into a single ident_map_size value. + * "ident_map_size" represents the upper limit of physical memory we may ever + * reach. It might not be all online memory, but also include standby (offline) + * memory or memory areas reserved for other means (e.g., memory devices such as + * virtio-mem). + * + * "ident_map_size" could be lower then actual standby/reserved or even online + * memory present, due to limiting factors. We should never go above this limit. + * It is the size of our identity mapping. + * + * Consider the following factors: + * 1. max_physmem_end - end of physical memory online, standby or reserved. + * Always >= end of the last online memory range (get_physmem_online_end()). + * 2. CONFIG_MAX_PHYSMEM_BITS - the maximum size of physical memory the + * kernel is able to support. + * 3. "mem=" kernel command line option which limits physical memory usage. + * 4. OLDMEM_BASE which is a kdump memory limit when the kernel is executed as + * crash kernel. + * 5. "hsa" size which is a memory limit when the kernel is executed during + * zfcp/nvme dump. + */ +static void setup_ident_map_size(unsigned long max_physmem_end) +{ + unsigned long hsa_size; + + ident_map_size = max_physmem_end; + if (memory_limit) + ident_map_size = min(ident_map_size, memory_limit); + ident_map_size = min(ident_map_size, 1UL << MAX_PHYSMEM_BITS); + +#ifdef CONFIG_CRASH_DUMP + if (oldmem_data.start) { + __kaslr_enabled = 0; + ident_map_size = min(ident_map_size, oldmem_data.size); + boot_debug("kdump memory limit: 0x%016lx\n", oldmem_data.size); + } else if (ipl_block_valid && is_ipl_block_dump()) { + __kaslr_enabled = 0; + if (!sclp_early_get_hsa_size(&hsa_size) && hsa_size) { + ident_map_size = min(ident_map_size, hsa_size); + boot_debug("Stand-alone dump limit: 0x%016lx\n", hsa_size); + } + } +#endif + boot_debug("Identity map size: 0x%016lx\n", ident_map_size); +} + +#define FIXMAP_SIZE round_up(MEMCPY_REAL_SIZE + ABS_LOWCORE_MAP_SIZE, sizeof(struct lowcore)) + +static unsigned long get_vmem_size(unsigned long identity_size, + unsigned long vmemmap_size, + unsigned long vmalloc_size, + unsigned long rte_size) +{ + unsigned long max_mappable, vsize; + + max_mappable = max(identity_size, MAX_DCSS_ADDR); + vsize = round_up(SZ_2G + max_mappable, rte_size) + + round_up(vmemmap_size, rte_size) + + FIXMAP_SIZE + MODULES_LEN + KASLR_LEN; + if (IS_ENABLED(CONFIG_KMSAN)) + vsize += MODULES_LEN * 2; + return size_add(vsize, vmalloc_size); +} + +static unsigned long setup_kernel_memory_layout(unsigned long kernel_size) +{ + unsigned long vmemmap_start; + unsigned long kernel_start; + unsigned long asce_limit; + unsigned long rte_size; + unsigned long pages; + unsigned long vsize; + unsigned long vmax; + + pages = ident_map_size / PAGE_SIZE; + /* vmemmap contains a multiple of PAGES_PER_SECTION struct pages */ + vmemmap_size = SECTION_ALIGN_UP(pages) * sizeof(struct page); + + /* choose kernel address space layout: 4 or 3 levels. */ + BUILD_BUG_ON(!IS_ALIGNED(TEXT_OFFSET, THREAD_SIZE)); + BUILD_BUG_ON(!IS_ALIGNED(__NO_KASLR_START_KERNEL, THREAD_SIZE)); + BUILD_BUG_ON(__NO_KASLR_END_KERNEL > _REGION1_SIZE); + vsize = get_vmem_size(ident_map_size, vmemmap_size, vmalloc_size, _REGION3_SIZE); + boot_debug("vmem size estimated: 0x%016lx\n", vsize); + if (IS_ENABLED(CONFIG_KASAN) || __NO_KASLR_END_KERNEL > _REGION2_SIZE || + (vsize > _REGION2_SIZE && kaslr_enabled())) { + asce_limit = _REGION1_SIZE; + if (__NO_KASLR_END_KERNEL > _REGION2_SIZE) { + rte_size = _REGION2_SIZE; + vsize = get_vmem_size(ident_map_size, vmemmap_size, vmalloc_size, _REGION2_SIZE); } else { - /* - * 0 == undefined symbol table index (STN_UNDEF), - * used for R_390_RELATIVE, only add KASLR offset - */ - val += offset; + rte_size = _REGION3_SIZE; } - r_type = ELF64_R_TYPE(rela->r_info); - rc = arch_kexec_do_relocs(r_type, (void *) loc, val, 0); - if (rc) - error("Unknown relocation type"); + } else { + asce_limit = _REGION2_SIZE; + rte_size = _REGION3_SIZE; + } + + /* + * Forcing modules and vmalloc area under the ultravisor + * secure storage limit, so that any vmalloc allocation + * we do could be used to back secure guest storage. + * + * Assume the secure storage limit always exceeds _REGION2_SIZE, + * otherwise asce_limit and rte_size would have been adjusted. + */ + vmax = adjust_to_uv_max(asce_limit); + boot_debug("%d level paging 0x%016lx vmax\n", vmax == _REGION1_SIZE ? 4 : 3, vmax); +#ifdef CONFIG_KASAN + BUILD_BUG_ON(__NO_KASLR_END_KERNEL > KASAN_SHADOW_START); + boot_debug("KASAN shadow area: 0x%016lx-0x%016lx\n", KASAN_SHADOW_START, KASAN_SHADOW_END); + /* force vmalloc and modules below kasan shadow */ + vmax = min(vmax, KASAN_SHADOW_START); +#endif + vsize = min(vsize, vmax); + if (kaslr_enabled()) { + unsigned long kernel_end, kaslr_len, slots, pos; + + kaslr_len = max(KASLR_LEN, vmax - vsize); + slots = DIV_ROUND_UP(kaslr_len - kernel_size, THREAD_SIZE); + if (get_random(slots, &pos)) + pos = 0; + kernel_end = vmax - pos * THREAD_SIZE; + kernel_start = round_down(kernel_end - kernel_size, THREAD_SIZE); + boot_debug("Randomization range: 0x%016lx-0x%016lx\n", vmax - kaslr_len, vmax); + boot_debug("kernel image: 0x%016lx-0x%016lx (kaslr)\n", kernel_start, + kernel_size + kernel_size); + } else if (vmax < __NO_KASLR_END_KERNEL || vsize > __NO_KASLR_END_KERNEL) { + kernel_start = round_down(vmax - kernel_size, THREAD_SIZE); + boot_debug("kernel image: 0x%016lx-0x%016lx (constrained)\n", kernel_start, + kernel_start + kernel_size); + } else { + kernel_start = __NO_KASLR_START_KERNEL; + boot_debug("kernel image: 0x%016lx-0x%016lx (nokaslr)\n", kernel_start, + kernel_start + kernel_size); + } + __kaslr_offset = kernel_start; + boot_debug("__kaslr_offset: 0x%016lx\n", __kaslr_offset); + + MODULES_END = round_down(kernel_start, _SEGMENT_SIZE); + MODULES_VADDR = MODULES_END - MODULES_LEN; + VMALLOC_END = MODULES_VADDR; + if (IS_ENABLED(CONFIG_KMSAN)) + VMALLOC_END -= MODULES_LEN * 2; + boot_debug("modules area: 0x%016lx-0x%016lx\n", MODULES_VADDR, MODULES_END); + + /* allow vmalloc area to occupy up to about 1/2 of the rest virtual space left */ + vsize = (VMALLOC_END - FIXMAP_SIZE) / 2; + vsize = round_down(vsize, _SEGMENT_SIZE); + vmalloc_size = min(vmalloc_size, vsize); + if (IS_ENABLED(CONFIG_KMSAN)) { + /* take 2/3 of vmalloc area for KMSAN shadow and origins */ + vmalloc_size = round_down(vmalloc_size / 3, _SEGMENT_SIZE); + VMALLOC_END -= vmalloc_size * 2; + } + VMALLOC_START = VMALLOC_END - vmalloc_size; + boot_debug("vmalloc area: 0x%016lx-0x%016lx\n", VMALLOC_START, VMALLOC_END); + + __memcpy_real_area = round_down(VMALLOC_START - MEMCPY_REAL_SIZE, PAGE_SIZE); + boot_debug("memcpy real area: 0x%016lx-0x%016lx\n", __memcpy_real_area, + __memcpy_real_area + MEMCPY_REAL_SIZE); + __abs_lowcore = round_down(__memcpy_real_area - ABS_LOWCORE_MAP_SIZE, + sizeof(struct lowcore)); + boot_debug("abs lowcore: 0x%016lx-0x%016lx\n", __abs_lowcore, + __abs_lowcore + ABS_LOWCORE_MAP_SIZE); + + /* split remaining virtual space between 1:1 mapping & vmemmap array */ + pages = __abs_lowcore / (PAGE_SIZE + sizeof(struct page)); + pages = SECTION_ALIGN_UP(pages); + /* keep vmemmap_start aligned to a top level region table entry */ + vmemmap_start = round_down(__abs_lowcore - pages * sizeof(struct page), rte_size); + /* make sure identity map doesn't overlay with vmemmap */ + ident_map_size = min(ident_map_size, vmemmap_start); + vmemmap_size = SECTION_ALIGN_UP(ident_map_size / PAGE_SIZE) * sizeof(struct page); + /* make sure vmemmap doesn't overlay with absolute lowcore area */ + if (vmemmap_start + vmemmap_size > __abs_lowcore) { + vmemmap_size = SECTION_ALIGN_DOWN(ident_map_size / PAGE_SIZE) * sizeof(struct page); + ident_map_size = vmemmap_size / sizeof(struct page) * PAGE_SIZE; } + vmemmap = (struct page *)vmemmap_start; + /* maximum address for which linear mapping could be created (DCSS, memory) */ + BUILD_BUG_ON(MAX_DCSS_ADDR > (1UL << MAX_PHYSMEM_BITS)); + max_mappable = max(ident_map_size, MAX_DCSS_ADDR); + max_mappable = min(max_mappable, vmemmap_start); +#ifdef CONFIG_RANDOMIZE_IDENTITY_BASE + __identity_base = round_down(vmemmap_start - max_mappable, rte_size); +#endif + boot_debug("identity map: 0x%016lx-0x%016lx\n", __identity_base, + __identity_base + ident_map_size); + + return asce_limit; } -static void clear_bss_section(void) +/* + * This function clears the BSS section of the decompressed Linux kernel and NOT the decompressor's. + */ +static void clear_bss_section(unsigned long kernel_start) +{ + memset((void *)kernel_start + vmlinux.image_size, 0, vmlinux.bss_size); +} + +/* + * Set vmalloc area size to an 8th of (potential) physical memory + * size, unless size has been set by kernel command line parameter. + */ +static void setup_vmalloc_size(void) +{ + unsigned long size; + + if (vmalloc_size_set) + return; + size = round_up(ident_map_size / 8, _SEGMENT_SIZE); + vmalloc_size = max(size, vmalloc_size); +} + +static void kaslr_adjust_vmlinux_info(long offset) { - memset((void *)vmlinux.default_lma + vmlinux.image_size, 0, vmlinux.bss_size); + vmlinux.bootdata_off += offset; + vmlinux.bootdata_preserved_off += offset; + vmlinux.got_start += offset; + vmlinux.got_end += offset; + vmlinux.init_mm_off += offset; + vmlinux.swapper_pg_dir_off += offset; + vmlinux.invalid_pg_dir_off += offset; + vmlinux.alt_instructions += offset; + vmlinux.alt_instructions_end += offset; +#ifdef CONFIG_KASAN + vmlinux.kasan_early_shadow_page_off += offset; + vmlinux.kasan_early_shadow_pte_off += offset; + vmlinux.kasan_early_shadow_pmd_off += offset; + vmlinux.kasan_early_shadow_pud_off += offset; + vmlinux.kasan_early_shadow_p4d_off += offset; +#endif } void startup_kernel(void) { - unsigned long random_lma; + unsigned long vmlinux_size = vmlinux.image_size + vmlinux.bss_size; + unsigned long nokaslr_text_lma, text_lma = 0, amode31_lma = 0; + unsigned long kernel_size = TEXT_OFFSET + vmlinux_size; + unsigned long kaslr_large_page_offset; + unsigned long max_physmem_end; + unsigned long asce_limit; unsigned long safe_addr; - void *img; + psw_t psw; + setup_lpp(); store_ipl_parmblock(); - safe_addr = mem_safe_offset(); - safe_addr = read_ipl_report(safe_addr); uv_query_info(); - rescue_initrd(safe_addr); - sclp_early_read_info(); setup_boot_command_line(); parse_boot_command_line(); - setup_memory_end(); - detect_memory(); - - random_lma = __kaslr_offset = 0; - if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_enabled) { - random_lma = get_random_base(safe_addr); - if (random_lma) { - __kaslr_offset = random_lma - vmlinux.default_lma; - img = (void *)vmlinux.default_lma; - vmlinux.default_lma += __kaslr_offset; - vmlinux.entry += __kaslr_offset; - vmlinux.bootdata_off += __kaslr_offset; - vmlinux.bootdata_preserved_off += __kaslr_offset; - vmlinux.rela_dyn_start += __kaslr_offset; - vmlinux.rela_dyn_end += __kaslr_offset; - vmlinux.dynsym_start += __kaslr_offset; - } + + /* + * Non-randomized kernel physical start address must be _SEGMENT_SIZE + * aligned (see blow). + */ + nokaslr_text_lma = ALIGN(mem_safe_offset(), _SEGMENT_SIZE); + safe_addr = PAGE_ALIGN(nokaslr_text_lma + vmlinux_size); + + /* + * Reserve decompressor memory together with decompression heap, + * buffer and memory which might be occupied by uncompressed kernel + * (if KASLR is off or failed). + */ + physmem_reserve(RR_DECOMPRESSOR, 0, safe_addr); + if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && parmarea.initrd_size) + physmem_reserve(RR_INITRD, parmarea.initrd_start, parmarea.initrd_size); + oldmem_data.start = parmarea.oldmem_base; + oldmem_data.size = parmarea.oldmem_size; + + read_ipl_report(); + sclp_early_read_info(); + sclp_early_detect_machine_features(); + detect_facilities(); + detect_diag9c(); + detect_machine_type(); + cmma_init(); + sanitize_prot_virt_host(); + max_physmem_end = detect_max_physmem_end(); + setup_ident_map_size(max_physmem_end); + setup_vmalloc_size(); + asce_limit = setup_kernel_memory_layout(kernel_size); + /* got final ident_map_size, physmem allocations could be performed now */ + physmem_set_usable_limit(ident_map_size); + detect_physmem_online_ranges(max_physmem_end); + save_ipl_cert_comp_list(); + rescue_initrd(safe_addr, ident_map_size); + + /* + * __kaslr_offset_phys must be _SEGMENT_SIZE aligned, so the lower + * 20 bits (the offset within a large page) are zero. Copy the last + * 20 bits of __kaslr_offset, which is THREAD_SIZE aligned, to + * __kaslr_offset_phys. + * + * With this the last 20 bits of __kaslr_offset_phys and __kaslr_offset + * are identical, which is required to allow for large mappings of the + * kernel image. + */ + kaslr_large_page_offset = __kaslr_offset & ~_SEGMENT_MASK; + if (kaslr_enabled()) { + unsigned long size = vmlinux_size + kaslr_large_page_offset; + + text_lma = randomize_within_range(size, _SEGMENT_SIZE, TEXT_OFFSET, ident_map_size); } + if (!text_lma) + text_lma = nokaslr_text_lma; + text_lma |= kaslr_large_page_offset; - if (!IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED)) { - img = decompress_kernel(); - memmove((void *)vmlinux.default_lma, img, vmlinux.image_size); - } else if (__kaslr_offset) - memcpy((void *)vmlinux.default_lma, img, vmlinux.image_size); + /* + * [__kaslr_offset_phys..__kaslr_offset_phys + TEXT_OFFSET] region is + * never accessed via the kernel image mapping as per the linker script: + * + * . = TEXT_OFFSET; + * + * Therefore, this region could be used for something else and does + * not need to be reserved. See how it is skipped in setup_vmem(). + */ + __kaslr_offset_phys = text_lma - TEXT_OFFSET; + kaslr_adjust_vmlinux_info(__kaslr_offset_phys); + physmem_reserve(RR_VMLINUX, text_lma, vmlinux_size); + deploy_kernel((void *)text_lma); - clear_bss_section(); - copy_bootdata(); - if (IS_ENABLED(CONFIG_RELOCATABLE)) - handle_relocs(__kaslr_offset); - - if (__kaslr_offset) { - /* - * Save KASLR offset for early dumps, before vmcore_info is set. - * Mark as uneven to distinguish from real vmcore_info pointer. - */ - S390_lowcore.vmcore_info = __kaslr_offset | 0x1UL; - /* Clear non-relocated kernel */ - if (IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED)) - memset(img, 0, vmlinux.image_size); + /* vmlinux decompression is done, shrink reserved low memory */ + physmem_reserve(RR_DECOMPRESSOR, 0, (unsigned long)_decompressor_end); + + /* + * In case KASLR is enabled the randomized location of .amode31 + * section might overlap with .vmlinux.relocs section. To avoid that + * the below randomize_within_range() could have been called with + * __vmlinux_relocs_64_end as the lower range address. However, + * .amode31 section is written to by the decompressed kernel - at + * that time the contents of .vmlinux.relocs is not needed anymore. + * Conversely, .vmlinux.relocs is read only by the decompressor, even + * before the kernel started. Therefore, in case the two sections + * overlap there is no risk of corrupting any data. + */ + if (kaslr_enabled()) { + unsigned long amode31_min; + + amode31_min = (unsigned long)_decompressor_end; + amode31_lma = randomize_within_range(vmlinux.amode31_size, PAGE_SIZE, amode31_min, SZ_2G); } - vmlinux.entry(); + if (!amode31_lma) + amode31_lma = text_lma - vmlinux.amode31_size; + physmem_reserve(RR_AMODE31, amode31_lma, vmlinux.amode31_size); + + /* + * The order of the following operations is important: + * + * - kaslr_adjust_relocs() must follow clear_bss_section() to establish + * static memory references to data in .bss to be used by setup_vmem() + * (i.e init_mm.pgd) + * + * - setup_vmem() must follow kaslr_adjust_relocs() to be able using + * static memory references to data in .bss (i.e init_mm.pgd) + * + * - copy_bootdata() must follow setup_vmem() to propagate changes + * to bootdata made by setup_vmem() + */ + clear_bss_section(text_lma); + kaslr_adjust_relocs(text_lma, text_lma + vmlinux.image_size, + __kaslr_offset, __kaslr_offset_phys); + kaslr_adjust_got(__kaslr_offset); + setup_vmem(__kaslr_offset, __kaslr_offset + kernel_size, asce_limit); + dump_physmem_reserved(); + copy_bootdata(); + __apply_alternatives((struct alt_instr *)_vmlinux_info.alt_instructions, + (struct alt_instr *)_vmlinux_info.alt_instructions_end, + ALT_CTX_EARLY); + + /* + * Save KASLR offset for early dumps, before vmcore_info is set. + * Mark as uneven to distinguish from real vmcore_info pointer. + */ + get_lowcore()->vmcore_info = __kaslr_offset_phys ? __kaslr_offset_phys | 0x1UL : 0; + + /* + * Jump to the decompressed kernel entry point and switch DAT mode on. + */ + psw.addr = __kaslr_offset + vmlinux.entry; + psw.mask = PSW_KERNEL_BITS; + boot_debug("Starting kernel at: 0x%016lx\n", psw.addr); + __load_psw(psw); } diff --git a/arch/s390/boot/string.c b/arch/s390/boot/string.c index b11e8108773a..f6b9b1df48a8 100644 --- a/arch/s390/boot/string.c +++ b/arch/s390/boot/string.c @@ -1,10 +1,18 @@ // SPDX-License-Identifier: GPL-2.0 +#define IN_BOOT_STRING_C 1 #include <linux/ctype.h> #include <linux/kernel.h> #include <linux/errno.h> #undef CONFIG_KASAN +#undef CONFIG_KASAN_GENERIC +#undef CONFIG_KMSAN #include "../lib/string.c" +/* + * Duplicate some functions from the common lib/string.c + * instead of fully including it. + */ + int strncmp(const char *cs, const char *ct, size_t count) { unsigned char c1, c2; @@ -21,6 +29,15 @@ int strncmp(const char *cs, const char *ct, size_t count) return 0; } +void *memset64(uint64_t *s, uint64_t v, size_t count) +{ + uint64_t *xs = s; + + while (count--) + *xs++ = v; + return s; +} + char *skip_spaces(const char *str) { while (isspace(*str)) diff --git a/arch/s390/boot/text_dma.S b/arch/s390/boot/text_dma.S deleted file mode 100644 index 9715715c4c28..000000000000 --- a/arch/s390/boot/text_dma.S +++ /dev/null @@ -1,184 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Code that needs to run below 2 GB. - * - * Copyright IBM Corp. 2019 - */ - -#include <linux/linkage.h> -#include <asm/errno.h> -#include <asm/sigp.h> - -#ifdef CC_USING_EXPOLINE - .pushsection .dma.text.__s390_indirect_jump_r14,"axG" -__dma__s390_indirect_jump_r14: - larl %r1,0f - ex 0,0(%r1) - j . -0: br %r14 - .popsection -#endif - - .section .dma.text,"ax" -/* - * Simplified version of expoline thunk. The normal thunks can not be used here, - * because they might be more than 2 GB away, and not reachable by the relative - * branch. No comdat, exrl, etc. optimizations used here, because it only - * affects a few functions that are not performance-relevant. - */ - .macro BR_EX_DMA_r14 -#ifdef CC_USING_EXPOLINE - jg __dma__s390_indirect_jump_r14 -#else - br %r14 -#endif - .endm - -/* - * int _diag14_dma(unsigned long rx, unsigned long ry1, unsigned long subcode) - */ -ENTRY(_diag14_dma) - lgr %r1,%r2 - lgr %r2,%r3 - lgr %r3,%r4 - lhi %r5,-EIO - sam31 - diag %r1,%r2,0x14 -.Ldiag14_ex: - ipm %r5 - srl %r5,28 -.Ldiag14_fault: - sam64 - lgfr %r2,%r5 - BR_EX_DMA_r14 - EX_TABLE_DMA(.Ldiag14_ex, .Ldiag14_fault) -ENDPROC(_diag14_dma) - -/* - * int _diag210_dma(struct diag210 *addr) - */ -ENTRY(_diag210_dma) - lgr %r1,%r2 - lhi %r2,-1 - sam31 - diag %r1,%r0,0x210 -.Ldiag210_ex: - ipm %r2 - srl %r2,28 -.Ldiag210_fault: - sam64 - lgfr %r2,%r2 - BR_EX_DMA_r14 - EX_TABLE_DMA(.Ldiag210_ex, .Ldiag210_fault) -ENDPROC(_diag210_dma) - -/* - * int _diag26c_dma(void *req, void *resp, enum diag26c_sc subcode) - */ -ENTRY(_diag26c_dma) - lghi %r5,-EOPNOTSUPP - sam31 - diag %r2,%r4,0x26c -.Ldiag26c_ex: - sam64 - lgfr %r2,%r5 - BR_EX_DMA_r14 - EX_TABLE_DMA(.Ldiag26c_ex, .Ldiag26c_ex) -ENDPROC(_diag26c_dma) - -/* - * void _diag0c_dma(struct hypfs_diag0c_entry *entry) - */ -ENTRY(_diag0c_dma) - sam31 - diag %r2,%r2,0x0c - sam64 - BR_EX_DMA_r14 -ENDPROC(_diag0c_dma) - -/* - * void _swsusp_reset_dma(void) - */ -ENTRY(_swsusp_reset_dma) - larl %r1,restart_entry - larl %r2,.Lrestart_diag308_psw - og %r1,0(%r2) - stg %r1,0(%r0) - lghi %r0,0 - diag %r0,%r0,0x308 -restart_entry: - lhi %r1,1 - sigp %r1,%r0,SIGP_SET_ARCHITECTURE - sam64 - BR_EX_DMA_r14 -ENDPROC(_swsusp_reset_dma) - -/* - * void _diag308_reset_dma(void) - * - * Calls diag 308 subcode 1 and continues execution - */ -ENTRY(_diag308_reset_dma) - larl %r4,.Lctlregs # Save control registers - stctg %c0,%c15,0(%r4) - lg %r2,0(%r4) # Disable lowcore protection - nilh %r2,0xefff - larl %r4,.Lctlreg0 - stg %r2,0(%r4) - lctlg %c0,%c0,0(%r4) - larl %r4,.Lfpctl # Floating point control register - stfpc 0(%r4) - larl %r4,.Lprefix # Save prefix register - stpx 0(%r4) - larl %r4,.Lprefix_zero # Set prefix register to 0 - spx 0(%r4) - larl %r4,.Lcontinue_psw # Save PSW flags - epsw %r2,%r3 - stm %r2,%r3,0(%r4) - larl %r4,restart_part2 # Setup restart PSW at absolute 0 - larl %r3,.Lrestart_diag308_psw - og %r4,0(%r3) # Save PSW - lghi %r3,0 - sturg %r4,%r3 # Use sturg, because of large pages - lghi %r1,1 - lghi %r0,0 - diag %r0,%r1,0x308 -restart_part2: - lhi %r0,0 # Load r0 with zero - lhi %r1,2 # Use mode 2 = ESAME (dump) - sigp %r1,%r0,SIGP_SET_ARCHITECTURE # Switch to ESAME mode - sam64 # Switch to 64 bit addressing mode - larl %r4,.Lctlregs # Restore control registers - lctlg %c0,%c15,0(%r4) - larl %r4,.Lfpctl # Restore floating point ctl register - lfpc 0(%r4) - larl %r4,.Lprefix # Restore prefix register - spx 0(%r4) - larl %r4,.Lcontinue_psw # Restore PSW flags - lpswe 0(%r4) -.Lcontinue: - BR_EX_DMA_r14 -ENDPROC(_diag308_reset_dma) - - .section .dma.data,"aw",@progbits -.align 8 -.Lrestart_diag308_psw: - .long 0x00080000,0x80000000 - -.align 8 -.Lcontinue_psw: - .quad 0,.Lcontinue - -.align 8 -.Lctlreg0: - .quad 0 -.Lctlregs: - .rept 16 - .quad 0 - .endr -.Lfpctl: - .long 0 -.Lprefix: - .long 0 -.Lprefix_zero: - .long 0 diff --git a/arch/s390/boot/uv.c b/arch/s390/boot/uv.c index ed007f4a6444..4568e8f81dac 100644 --- a/arch/s390/boot/uv.c +++ b/arch/s390/boot/uv.c @@ -1,9 +1,16 @@ // SPDX-License-Identifier: GPL-2.0 #include <asm/uv.h> +#include <asm/boot_data.h> #include <asm/facility.h> #include <asm/sections.h> +#include "boot.h" +#include "uv.h" + +/* will be used in arch/s390/kernel/uv.c */ int __bootdata_preserved(prot_virt_guest); +int __bootdata_preserved(prot_virt_host); +struct uv_info __bootdata_preserved(uv_info); void uv_query_info(void) { @@ -15,10 +22,67 @@ void uv_query_info(void) if (!test_facility(158)) return; - if (uv_call(0, (uint64_t)&uvcb)) + /* Ignore that there might be more data we do not process */ + if (uv_call(0, (uint64_t)&uvcb) && uvcb.header.rc != UVC_RC_MORE_DATA) return; + if (IS_ENABLED(CONFIG_KVM)) { + memcpy(uv_info.inst_calls_list, uvcb.inst_calls_list, sizeof(uv_info.inst_calls_list)); + uv_info.uv_base_stor_len = uvcb.uv_base_stor_len; + uv_info.guest_base_stor_len = uvcb.conf_base_phys_stor_len; + uv_info.guest_virt_base_stor_len = uvcb.conf_base_virt_stor_len; + uv_info.guest_virt_var_stor_len = uvcb.conf_virt_var_stor_len; + uv_info.guest_cpu_stor_len = uvcb.cpu_stor_len; + uv_info.max_sec_stor_addr = ALIGN(uvcb.max_guest_stor_addr, PAGE_SIZE); + uv_info.max_num_sec_conf = uvcb.max_num_sec_conf; + uv_info.max_guest_cpu_id = uvcb.max_guest_cpu_id; + uv_info.uv_feature_indications = uvcb.uv_feature_indications; + uv_info.supp_se_hdr_ver = uvcb.supp_se_hdr_versions; + uv_info.supp_se_hdr_pcf = uvcb.supp_se_hdr_pcf; + uv_info.conf_dump_storage_state_len = uvcb.conf_dump_storage_state_len; + uv_info.conf_dump_finalize_len = uvcb.conf_dump_finalize_len; + uv_info.supp_att_req_hdr_ver = uvcb.supp_att_req_hdr_ver; + uv_info.supp_att_pflags = uvcb.supp_att_pflags; + uv_info.supp_add_secret_req_ver = uvcb.supp_add_secret_req_ver; + uv_info.supp_add_secret_pcf = uvcb.supp_add_secret_pcf; + uv_info.supp_secret_types = uvcb.supp_secret_types; + uv_info.max_assoc_secrets = uvcb.max_assoc_secrets; + uv_info.max_retr_secrets = uvcb.max_retr_secrets; + } + if (test_bit_inv(BIT_UVC_CMD_SET_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list) && test_bit_inv(BIT_UVC_CMD_REMOVE_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list)) prot_virt_guest = 1; } + +unsigned long adjust_to_uv_max(unsigned long limit) +{ + if (is_prot_virt_host() && uv_info.max_sec_stor_addr) + limit = min_t(unsigned long, limit, uv_info.max_sec_stor_addr); + return limit; +} + +static int is_prot_virt_host_capable(void) +{ + /* disable if no prot_virt=1 given on command-line */ + if (!is_prot_virt_host()) + return 0; + /* disable if protected guest virtualization is enabled */ + if (is_prot_virt_guest()) + return 0; + /* disable if no hardware support */ + if (!test_facility(158)) + return 0; + /* disable if kdump */ + if (oldmem_data.start) + return 0; + /* disable if stand-alone dump */ + if (ipl_block_valid && is_ipl_block_dump()) + return 0; + return 1; +} + +void sanitize_prot_virt_host(void) +{ + prot_virt_host = is_prot_virt_host_capable(); +} diff --git a/arch/s390/boot/uv.h b/arch/s390/boot/uv.h new file mode 100644 index 000000000000..da4a4a8d48e0 --- /dev/null +++ b/arch/s390/boot/uv.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef BOOT_UV_H +#define BOOT_UV_H + +unsigned long adjust_to_uv_max(unsigned long limit); +void sanitize_prot_virt_host(void); +void uv_query_info(void); + +#endif /* BOOT_UV_H */ diff --git a/arch/s390/boot/version.c b/arch/s390/boot/version.c index d32e58bdda6a..fd32f038777f 100644 --- a/arch/s390/boot/version.c +++ b/arch/s390/boot/version.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include <generated/utsversion.h> #include <generated/utsrelease.h> #include <generated/compile.h> #include "boot.h" diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c new file mode 100644 index 000000000000..1d073acd05a7 --- /dev/null +++ b/arch/s390/boot/vmem.c @@ -0,0 +1,566 @@ +// SPDX-License-Identifier: GPL-2.0 +#define boot_fmt(fmt) "vmem: " fmt +#include <linux/cpufeature.h> +#include <linux/sched/task.h> +#include <linux/pgtable.h> +#include <linux/kasan.h> +#include <asm/page-states.h> +#include <asm/pgalloc.h> +#include <asm/facility.h> +#include <asm/sections.h> +#include <asm/ctlreg.h> +#include <asm/physmem_info.h> +#include <asm/maccess.h> +#include <asm/machine.h> +#include <asm/abs_lowcore.h> +#include "decompressor.h" +#include "boot.h" + +#define INVALID_PHYS_ADDR (~(phys_addr_t)0) +struct ctlreg __bootdata_preserved(s390_invalid_asce); + +#ifdef CONFIG_PROC_FS +atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]); +#endif + +#define init_mm (*(struct mm_struct *)vmlinux.init_mm_off) +#define swapper_pg_dir vmlinux.swapper_pg_dir_off +#define invalid_pg_dir vmlinux.invalid_pg_dir_off + +enum populate_mode { + POPULATE_NONE, + POPULATE_DIRECT, + POPULATE_LOWCORE, + POPULATE_ABS_LOWCORE, + POPULATE_IDENTITY, + POPULATE_KERNEL, +#ifdef CONFIG_KASAN + /* KASAN modes should be last and grouped together, see is_kasan_populate_mode() */ + POPULATE_KASAN_MAP_SHADOW, + POPULATE_KASAN_ZERO_SHADOW, + POPULATE_KASAN_SHALLOW +#endif +}; + +#define POPULATE_MODE_NAME(t) case POPULATE_ ## t: return #t +static inline const char *get_populate_mode_name(enum populate_mode t) +{ + switch (t) { + POPULATE_MODE_NAME(NONE); + POPULATE_MODE_NAME(DIRECT); + POPULATE_MODE_NAME(LOWCORE); + POPULATE_MODE_NAME(ABS_LOWCORE); + POPULATE_MODE_NAME(IDENTITY); + POPULATE_MODE_NAME(KERNEL); +#ifdef CONFIG_KASAN + POPULATE_MODE_NAME(KASAN_MAP_SHADOW); + POPULATE_MODE_NAME(KASAN_ZERO_SHADOW); + POPULATE_MODE_NAME(KASAN_SHALLOW); +#endif + default: + return "UNKNOWN"; + } +} + +static bool is_kasan_populate_mode(enum populate_mode mode) +{ +#ifdef CONFIG_KASAN + return mode >= POPULATE_KASAN_MAP_SHADOW; +#else + return false; +#endif +} + +static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode); + +#ifdef CONFIG_KASAN + +#define kasan_early_shadow_page vmlinux.kasan_early_shadow_page_off +#define kasan_early_shadow_pte ((pte_t *)vmlinux.kasan_early_shadow_pte_off) +#define kasan_early_shadow_pmd ((pmd_t *)vmlinux.kasan_early_shadow_pmd_off) +#define kasan_early_shadow_pud ((pud_t *)vmlinux.kasan_early_shadow_pud_off) +#define kasan_early_shadow_p4d ((p4d_t *)vmlinux.kasan_early_shadow_p4d_off) +#define __sha(x) ((unsigned long)kasan_mem_to_shadow((void *)x)) + +static pte_t pte_z; + +static inline void kasan_populate(unsigned long start, unsigned long end, enum populate_mode mode) +{ + unsigned long sha_start = PAGE_ALIGN_DOWN(__sha(start)); + unsigned long sha_end = PAGE_ALIGN(__sha(end)); + + boot_debug("%-17s 0x%016lx-0x%016lx >> 0x%016lx-0x%016lx\n", get_populate_mode_name(mode), + start, end, sha_start, sha_end); + pgtable_populate(sha_start, sha_end, mode); +} + +static void kasan_populate_shadow(unsigned long kernel_start, unsigned long kernel_end) +{ + pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY); + pud_t pud_z = __pud(__pa(kasan_early_shadow_pmd) | _REGION3_ENTRY); + p4d_t p4d_z = __p4d(__pa(kasan_early_shadow_pud) | _REGION2_ENTRY); + unsigned long memgap_start = 0; + unsigned long start, end; + int i; + + pte_z = __pte(__pa(kasan_early_shadow_page) | pgprot_val(PAGE_KERNEL_RO)); + crst_table_init((unsigned long *)kasan_early_shadow_p4d, p4d_val(p4d_z)); + crst_table_init((unsigned long *)kasan_early_shadow_pud, pud_val(pud_z)); + crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z)); + memset64((u64 *)kasan_early_shadow_pte, pte_val(pte_z), PTRS_PER_PTE); + __arch_set_page_dat(kasan_early_shadow_p4d, 1UL << CRST_ALLOC_ORDER); + __arch_set_page_dat(kasan_early_shadow_pud, 1UL << CRST_ALLOC_ORDER); + __arch_set_page_dat(kasan_early_shadow_pmd, 1UL << CRST_ALLOC_ORDER); + __arch_set_page_dat(kasan_early_shadow_pte, 1); + + for_each_physmem_usable_range(i, &start, &end) { + kasan_populate((unsigned long)__identity_va(start), + (unsigned long)__identity_va(end), + POPULATE_KASAN_MAP_SHADOW); + if (memgap_start && physmem_info.info_source == MEM_DETECT_DIAG260) { + kasan_populate((unsigned long)__identity_va(memgap_start), + (unsigned long)__identity_va(start), + POPULATE_KASAN_ZERO_SHADOW); + } + memgap_start = end; + } + kasan_populate(kernel_start + TEXT_OFFSET, kernel_end, POPULATE_KASAN_MAP_SHADOW); + kasan_populate(0, (unsigned long)__identity_va(0), POPULATE_KASAN_ZERO_SHADOW); + kasan_populate(AMODE31_START, AMODE31_END, POPULATE_KASAN_ZERO_SHADOW); + /* shallowly populate kasan shadow for vmalloc and modules */ + kasan_populate(VMALLOC_START, MODULES_END, POPULATE_KASAN_SHALLOW); + /* populate kasan shadow for untracked memory */ + kasan_populate((unsigned long)__identity_va(ident_map_size), VMALLOC_START, + POPULATE_KASAN_ZERO_SHADOW); + kasan_populate(kernel_end, _REGION1_SIZE, POPULATE_KASAN_ZERO_SHADOW); +} + +static bool kasan_pgd_populate_zero_shadow(pgd_t *pgd, unsigned long addr, + unsigned long end, enum populate_mode mode) +{ + if (mode == POPULATE_KASAN_ZERO_SHADOW && + IS_ALIGNED(addr, PGDIR_SIZE) && end - addr >= PGDIR_SIZE) { + pgd_populate(&init_mm, pgd, kasan_early_shadow_p4d); + return true; + } + return false; +} + +static bool kasan_p4d_populate_zero_shadow(p4d_t *p4d, unsigned long addr, + unsigned long end, enum populate_mode mode) +{ + if (mode == POPULATE_KASAN_ZERO_SHADOW && + IS_ALIGNED(addr, P4D_SIZE) && end - addr >= P4D_SIZE) { + p4d_populate(&init_mm, p4d, kasan_early_shadow_pud); + return true; + } + return false; +} + +static bool kasan_pud_populate_zero_shadow(pud_t *pud, unsigned long addr, + unsigned long end, enum populate_mode mode) +{ + if (mode == POPULATE_KASAN_ZERO_SHADOW && + IS_ALIGNED(addr, PUD_SIZE) && end - addr >= PUD_SIZE) { + pud_populate(&init_mm, pud, kasan_early_shadow_pmd); + return true; + } + return false; +} + +static bool kasan_pmd_populate_zero_shadow(pmd_t *pmd, unsigned long addr, + unsigned long end, enum populate_mode mode) +{ + if (mode == POPULATE_KASAN_ZERO_SHADOW && + IS_ALIGNED(addr, PMD_SIZE) && end - addr >= PMD_SIZE) { + pmd_populate(&init_mm, pmd, kasan_early_shadow_pte); + return true; + } + return false; +} + +static bool kasan_pte_populate_zero_shadow(pte_t *pte, enum populate_mode mode) +{ + if (mode == POPULATE_KASAN_ZERO_SHADOW) { + set_pte(pte, pte_z); + return true; + } + return false; +} +#else + +static inline void kasan_populate_shadow(unsigned long kernel_start, unsigned long kernel_end) +{ +} + +static inline bool kasan_pgd_populate_zero_shadow(pgd_t *pgd, unsigned long addr, + unsigned long end, enum populate_mode mode) +{ + return false; +} + +static inline bool kasan_p4d_populate_zero_shadow(p4d_t *p4d, unsigned long addr, + unsigned long end, enum populate_mode mode) +{ + return false; +} + +static inline bool kasan_pud_populate_zero_shadow(pud_t *pud, unsigned long addr, + unsigned long end, enum populate_mode mode) +{ + return false; +} + +static inline bool kasan_pmd_populate_zero_shadow(pmd_t *pmd, unsigned long addr, + unsigned long end, enum populate_mode mode) +{ + return false; +} + +static bool kasan_pte_populate_zero_shadow(pte_t *pte, enum populate_mode mode) +{ + return false; +} + +#endif + +/* + * Mimic virt_to_kpte() in lack of init_mm symbol. Skip pmd NULL check though. + */ +static inline pte_t *__virt_to_kpte(unsigned long va) +{ + return pte_offset_kernel(pmd_offset(pud_offset(p4d_offset(pgd_offset_k(va), va), va), va), va); +} + +static void *boot_crst_alloc(unsigned long val) +{ + unsigned long size = PAGE_SIZE << CRST_ALLOC_ORDER; + unsigned long *table; + + table = (unsigned long *)physmem_alloc_or_die(RR_VMEM, size, size); + crst_table_init(table, val); + __arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER); + return table; +} + +static pte_t *boot_pte_alloc(void) +{ + static void *pte_leftover; + pte_t *pte; + + /* + * handling pte_leftovers this way helps to avoid memory fragmentation + * during POPULATE_KASAN_MAP_SHADOW when EDAT is off + */ + if (!pte_leftover) { + pte_leftover = (void *)physmem_alloc_or_die(RR_VMEM, PAGE_SIZE, PAGE_SIZE); + pte = pte_leftover + _PAGE_TABLE_SIZE; + __arch_set_page_dat(pte, 1); + } else { + pte = pte_leftover; + pte_leftover = NULL; + } + + memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE); + return pte; +} + +static unsigned long resolve_pa_may_alloc(unsigned long addr, unsigned long size, + enum populate_mode mode) +{ + switch (mode) { + case POPULATE_NONE: + return INVALID_PHYS_ADDR; + case POPULATE_DIRECT: + return addr; + case POPULATE_LOWCORE: + return __lowcore_pa(addr); + case POPULATE_ABS_LOWCORE: + return __abs_lowcore_pa(addr); + case POPULATE_KERNEL: + return __kernel_pa(addr); + case POPULATE_IDENTITY: + return __identity_pa(addr); +#ifdef CONFIG_KASAN + case POPULATE_KASAN_MAP_SHADOW: + /* Allow to fail large page allocations, this will fall back to 1mb/4k pages */ + addr = physmem_alloc(RR_VMEM, size, size, size == PAGE_SIZE); + if (addr) { + memset((void *)addr, 0, size); + return addr; + } + return INVALID_PHYS_ADDR; +#endif + default: + return INVALID_PHYS_ADDR; + } +} + +static bool large_page_mapping_allowed(enum populate_mode mode) +{ + switch (mode) { + case POPULATE_DIRECT: + case POPULATE_IDENTITY: + case POPULATE_KERNEL: +#ifdef CONFIG_KASAN + case POPULATE_KASAN_MAP_SHADOW: +#endif + return true; + default: + return false; + } +} + +static unsigned long try_get_large_pud_pa(pud_t *pu_dir, unsigned long addr, unsigned long end, + enum populate_mode mode) +{ + unsigned long pa, size = end - addr; + + if (!cpu_has_edat2() || !large_page_mapping_allowed(mode) || + !IS_ALIGNED(addr, PUD_SIZE) || (size < PUD_SIZE)) + return INVALID_PHYS_ADDR; + + pa = resolve_pa_may_alloc(addr, size, mode); + if (!IS_ALIGNED(pa, PUD_SIZE)) + return INVALID_PHYS_ADDR; + + return pa; +} + +static unsigned long try_get_large_pmd_pa(pmd_t *pm_dir, unsigned long addr, unsigned long end, + enum populate_mode mode) +{ + unsigned long pa, size = end - addr; + + if (!cpu_has_edat1() || !large_page_mapping_allowed(mode) || + !IS_ALIGNED(addr, PMD_SIZE) || (size < PMD_SIZE)) + return INVALID_PHYS_ADDR; + + pa = resolve_pa_may_alloc(addr, size, mode); + if (!IS_ALIGNED(pa, PMD_SIZE)) + return INVALID_PHYS_ADDR; + + return pa; +} + +static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end, + enum populate_mode mode) +{ + unsigned long pages = 0; + pte_t *pte, entry; + + pte = pte_offset_kernel(pmd, addr); + for (; addr < end; addr += PAGE_SIZE, pte++) { + if (pte_none(*pte)) { + if (kasan_pte_populate_zero_shadow(pte, mode)) + continue; + entry = __pte(resolve_pa_may_alloc(addr, PAGE_SIZE, mode)); + entry = set_pte_bit(entry, PAGE_KERNEL); + set_pte(pte, entry); + pages++; + } + } + if (mode == POPULATE_IDENTITY) + update_page_count(PG_DIRECT_MAP_4K, pages); +} + +static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long end, + enum populate_mode mode) +{ + unsigned long pa, next, pages = 0; + pmd_t *pmd, entry; + pte_t *pte; + + pmd = pmd_offset(pud, addr); + for (; addr < end; addr = next, pmd++) { + next = pmd_addr_end(addr, end); + if (pmd_none(*pmd)) { + if (kasan_pmd_populate_zero_shadow(pmd, addr, next, mode)) + continue; + pa = try_get_large_pmd_pa(pmd, addr, next, mode); + if (pa != INVALID_PHYS_ADDR) { + entry = __pmd(pa); + entry = set_pmd_bit(entry, SEGMENT_KERNEL); + set_pmd(pmd, entry); + pages++; + continue; + } + pte = boot_pte_alloc(); + pmd_populate(&init_mm, pmd, pte); + } else if (pmd_leaf(*pmd)) { + continue; + } + pgtable_pte_populate(pmd, addr, next, mode); + } + if (mode == POPULATE_IDENTITY) + update_page_count(PG_DIRECT_MAP_1M, pages); +} + +static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long end, + enum populate_mode mode) +{ + unsigned long pa, next, pages = 0; + pud_t *pud, entry; + pmd_t *pmd; + + pud = pud_offset(p4d, addr); + for (; addr < end; addr = next, pud++) { + next = pud_addr_end(addr, end); + if (pud_none(*pud)) { + if (kasan_pud_populate_zero_shadow(pud, addr, next, mode)) + continue; + pa = try_get_large_pud_pa(pud, addr, next, mode); + if (pa != INVALID_PHYS_ADDR) { + entry = __pud(pa); + entry = set_pud_bit(entry, REGION3_KERNEL); + set_pud(pud, entry); + pages++; + continue; + } + pmd = boot_crst_alloc(_SEGMENT_ENTRY_EMPTY); + pud_populate(&init_mm, pud, pmd); + } else if (pud_leaf(*pud)) { + continue; + } + pgtable_pmd_populate(pud, addr, next, mode); + } + if (mode == POPULATE_IDENTITY) + update_page_count(PG_DIRECT_MAP_2G, pages); +} + +static void pgtable_p4d_populate(pgd_t *pgd, unsigned long addr, unsigned long end, + enum populate_mode mode) +{ + unsigned long next; + p4d_t *p4d; + pud_t *pud; + + p4d = p4d_offset(pgd, addr); + for (; addr < end; addr = next, p4d++) { + next = p4d_addr_end(addr, end); + if (p4d_none(*p4d)) { + if (kasan_p4d_populate_zero_shadow(p4d, addr, next, mode)) + continue; + pud = boot_crst_alloc(_REGION3_ENTRY_EMPTY); + p4d_populate(&init_mm, p4d, pud); + } + pgtable_pud_populate(p4d, addr, next, mode); + } +} + +static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode) +{ + unsigned long next; + pgd_t *pgd; + p4d_t *p4d; + + if (!is_kasan_populate_mode(mode)) { + boot_debug("%-17s 0x%016lx-0x%016lx -> 0x%016lx-0x%016lx\n", + get_populate_mode_name(mode), addr, end, + resolve_pa_may_alloc(addr, 0, mode), + resolve_pa_may_alloc(end - 1, 0, mode) + 1); + } + + pgd = pgd_offset(&init_mm, addr); + for (; addr < end; addr = next, pgd++) { + next = pgd_addr_end(addr, end); + if (pgd_none(*pgd)) { + if (kasan_pgd_populate_zero_shadow(pgd, addr, next, mode)) + continue; + p4d = boot_crst_alloc(_REGION2_ENTRY_EMPTY); + pgd_populate(&init_mm, pgd, p4d); + } +#ifdef CONFIG_KASAN + if (mode == POPULATE_KASAN_SHALLOW) + continue; +#endif + pgtable_p4d_populate(pgd, addr, next, mode); + } +} + +void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned long asce_limit) +{ + unsigned long lowcore_address = 0; + unsigned long start, end; + unsigned long asce_type; + unsigned long asce_bits; + pgd_t *init_mm_pgd; + int i; + + /* + * Mark whole memory as no-dat. This must be done before any + * page tables are allocated, or kernel image builtin pages + * are marked as dat tables. + */ + for_each_physmem_online_range(i, &start, &end) + __arch_set_page_nodat((void *)start, (end - start) >> PAGE_SHIFT); + + /* + * init_mm->pgd contains virtual address of swapper_pg_dir. + * It is unusable at this stage since DAT is yet off. Swap + * it for physical address of swapper_pg_dir and restore + * the virtual address after all page tables are created. + */ + init_mm_pgd = init_mm.pgd; + init_mm.pgd = (pgd_t *)swapper_pg_dir; + + if (asce_limit == _REGION1_SIZE) { + asce_type = _REGION2_ENTRY_EMPTY; + asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH; + } else { + asce_type = _REGION3_ENTRY_EMPTY; + asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; + } + s390_invalid_asce.val = invalid_pg_dir | _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; + + crst_table_init((unsigned long *)swapper_pg_dir, asce_type); + crst_table_init((unsigned long *)invalid_pg_dir, _REGION3_ENTRY_EMPTY); + __arch_set_page_dat((void *)swapper_pg_dir, 1UL << CRST_ALLOC_ORDER); + __arch_set_page_dat((void *)invalid_pg_dir, 1UL << CRST_ALLOC_ORDER); + + if (machine_has_relocated_lowcore()) + lowcore_address = LOWCORE_ALT_ADDRESS; + + /* + * To allow prefixing the lowcore must be mapped with 4KB pages. + * To prevent creation of a large page at address 0 first map + * the lowcore and create the identity mapping only afterwards. + */ + pgtable_populate(lowcore_address, + lowcore_address + sizeof(struct lowcore), + POPULATE_LOWCORE); + for_each_physmem_usable_range(i, &start, &end) { + pgtable_populate((unsigned long)__identity_va(start), + (unsigned long)__identity_va(end), + POPULATE_IDENTITY); + } + + /* + * [kernel_start..kernel_start + TEXT_OFFSET] region is never + * accessed as per the linker script: + * + * . = TEXT_OFFSET; + * + * Therefore, skip mapping TEXT_OFFSET bytes to prevent access to + * [__kaslr_offset_phys..__kaslr_offset_phys + TEXT_OFFSET] region. + */ + pgtable_populate(kernel_start + TEXT_OFFSET, kernel_end, POPULATE_KERNEL); + pgtable_populate(AMODE31_START, AMODE31_END, POPULATE_DIRECT); + pgtable_populate(__abs_lowcore, __abs_lowcore + sizeof(struct lowcore), + POPULATE_ABS_LOWCORE); + pgtable_populate(__memcpy_real_area, __memcpy_real_area + PAGE_SIZE, + POPULATE_NONE); + memcpy_real_ptep = __identity_va(__virt_to_kpte(__memcpy_real_area)); + + kasan_populate_shadow(kernel_start, kernel_end); + + get_lowcore()->kernel_asce.val = swapper_pg_dir | asce_bits; + get_lowcore()->user_asce = s390_invalid_asce; + + local_ctl_load(1, &get_lowcore()->kernel_asce); + local_ctl_load(7, &get_lowcore()->user_asce); + local_ctl_load(13, &get_lowcore()->kernel_asce); + + init_mm.context.asce = get_lowcore()->kernel_asce.val; + init_mm.pgd = init_mm_pgd; +} diff --git a/arch/s390/boot/vmlinux.lds.S b/arch/s390/boot/vmlinux.lds.S new file mode 100644 index 000000000000..50988022f9ea --- /dev/null +++ b/arch/s390/boot/vmlinux.lds.S @@ -0,0 +1,172 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <asm-generic/vmlinux.lds.h> +#include <asm/vmlinux.lds.h> +#include <asm/thread_info.h> +#include <asm/page.h> +#include <asm/sclp.h> +#include "boot.h" + +OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390") +OUTPUT_ARCH(s390:64-bit) + +ENTRY(startup) + +SECTIONS +{ + . = 0; + .ipldata : { + *(.ipldata) + } + . = IPL_START; + .head.text : { + _head = . ; + HEAD_TEXT + _ehead = . ; + } + . = PARMAREA; + .parmarea : { + *(.parmarea) + } + .text : { + _text = .; /* Text */ + *(.text) + *(.text.*) + INIT_TEXT + _etext = . ; + } + .rodata : { + _rodata = . ; + *(.rodata) /* read-only data */ + *(.rodata.*) + _erodata = . ; + } + EXCEPTION_TABLE(16) + .got : { + *(.got) + } + NOTES + .data : { + _data = . ; + *(.data) + *(.data.*) + _edata = . ; + } + + BOOT_DATA + BOOT_DATA_PRESERVED + + /* + * This is the BSS section of the decompressor and not of the decompressed Linux kernel. + * It will consume place in the decompressor's image. + */ + . = ALIGN(8); + .bss : { + _bss = . ; + *(.bss) + *(.bss.*) + *(COMMON) + /* + * Stacks for the decompressor + */ + . = ALIGN(PAGE_SIZE); + _dump_info_stack_start = .; + . += PAGE_SIZE; + _dump_info_stack_end = .; + . = ALIGN(PAGE_SIZE); + _stack_start = .; + . += BOOT_STACK_SIZE; + _stack_end = .; + _ebss = .; + } + + /* + * uncompressed image info used by the decompressor it should match + * struct vmlinux_info. It comes from .vmlinux.info section of + * uncompressed vmlinux in a form of info.o + */ + . = ALIGN(8); + .vmlinux.info : { + _vmlinux_info = .; + *(.vmlinux.info) + } + + .decompressor.syms : { + . += 1; /* make sure we have \0 before the first entry */ + . = ALIGN(2); + _decompressor_syms_start = .; + *(.decompressor.syms) + _decompressor_syms_end = .; + } + + _decompressor_end = .; + + . = ALIGN(4); + .vmlinux.relocs : { + __vmlinux_relocs_64_start = .; + *(.vmlinux.relocs_64) + __vmlinux_relocs_64_end = .; + } + +#ifdef CONFIG_KERNEL_UNCOMPRESSED + . = ALIGN(PAGE_SIZE); + . += AMODE31_SIZE; /* .amode31 section */ + + /* + * Make sure the location counter is not less than TEXT_OFFSET. + * _SEGMENT_SIZE is not available, use ALIGN(1 << 20) instead. + */ + . = MAX(TEXT_OFFSET, ALIGN(1 << 20)); +#else + . = ALIGN(8); +#endif + .rodata.compressed : { + _compressed_start = .; + *(.vmlinux.bin.compressed) + _compressed_end = .; + } + +#define SB_TRAILER_SIZE 32 + /* Trailer needed for Secure Boot */ + . += SB_TRAILER_SIZE; /* make sure .sb.trailer does not overwrite the previous section */ + . = ALIGN(4096) - SB_TRAILER_SIZE; + .sb.trailer : { + QUAD(0) + QUAD(0) + QUAD(0) + QUAD(0x000000207a49504c) + } + _end = .; + + DWARF_DEBUG + ELF_DETAILS + + /* + * Make sure that the .got.plt is either completely empty or it + * contains only the three reserved double words. + */ + .got.plt : { + *(.got.plt) + } + ASSERT(SIZEOF(.got.plt) == 0 || SIZEOF(.got.plt) == 0x18, "Unexpected GOT/PLT entries detected!") + + /* + * Sections that should stay zero sized, which is safer to + * explicitly check instead of blindly discarding. + */ + .plt : { + *(.plt) *(.plt.*) *(.iplt) *(.igot .igot.plt) + } + ASSERT(SIZEOF(.plt) == 0, "Unexpected run-time procedure linkages detected!") + .rela.dyn : { + *(.rela.*) *(.rela_*) + } + ASSERT(SIZEOF(.rela.dyn) == 0, "Unexpected run-time relocations (.rela) detected!") + + /* Sections to be discarded */ + /DISCARD/ : { + COMMON_DISCARDS + *(.eh_frame) + *(*__ksymtab*) + *(___kcrctab*) + } +} |