summaryrefslogtreecommitdiff
path: root/arch/s390/boot
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390/boot')
-rw-r--r--arch/s390/boot/Makefile2
-rw-r--r--arch/s390/boot/boot.h42
-rwxr-xr-xarch/s390/boot/install.sh8
-rw-r--r--arch/s390/boot/ipl_parm.c6
-rw-r--r--arch/s390/boot/ipl_report.c100
-rw-r--r--arch/s390/boot/kaslr.c171
-rw-r--r--arch/s390/boot/mem_detect.c191
-rw-r--r--arch/s390/boot/pgm_check_info.c7
-rw-r--r--arch/s390/boot/physmem_info.c328
-rw-r--r--arch/s390/boot/startup.c129
-rw-r--r--arch/s390/boot/vmem.c284
-rw-r--r--arch/s390/boot/vmlinux.lds.S2
12 files changed, 805 insertions, 465 deletions
diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile
index cebd4ca16916..c7c81e5f9218 100644
--- a/arch/s390/boot/Makefile
+++ b/arch/s390/boot/Makefile
@@ -35,7 +35,7 @@ endif
CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char
-obj-y := head.o als.o startup.o mem_detect.o ipl_parm.o ipl_report.o vmem.o
+obj-y := head.o als.o startup.o physmem_info.o ipl_parm.o ipl_report.o vmem.o
obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o
obj-y += version.o pgm_check_info.o ctype.o ipl_data.o machine_kexec_reloc.o
obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o
diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h
index 58ce701d6110..222c6886acf6 100644
--- a/arch/s390/boot/boot.h
+++ b/arch/s390/boot/boot.h
@@ -8,6 +8,8 @@
#ifndef __ASSEMBLY__
+#include <asm/physmem_info.h>
+
struct machine_info {
unsigned char has_edat1 : 1;
unsigned char has_edat2 : 1;
@@ -30,24 +32,46 @@ struct vmlinux_info {
unsigned long init_mm_off;
unsigned long swapper_pg_dir_off;
unsigned long invalid_pg_dir_off;
+#ifdef CONFIG_KASAN
+ unsigned long kasan_early_shadow_page_off;
+ unsigned long kasan_early_shadow_pte_off;
+ unsigned long kasan_early_shadow_pmd_off;
+ unsigned long kasan_early_shadow_pud_off;
+ unsigned long kasan_early_shadow_p4d_off;
+#endif
};
void startup_kernel(void);
-unsigned long detect_memory(unsigned long *safe_addr);
-void mem_detect_set_usable_limit(unsigned long limit);
+unsigned long detect_max_physmem_end(void);
+void detect_physmem_online_ranges(unsigned long max_physmem_end);
+void physmem_set_usable_limit(unsigned long limit);
+void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size);
+void physmem_free(enum reserved_range_type type);
+/* for continuous/multiple allocations per type */
+unsigned long physmem_alloc_top_down(enum reserved_range_type type, unsigned long size,
+ unsigned long align);
+/* for single allocations, 1 per type */
+unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long size,
+ unsigned long align, unsigned long min, unsigned long max,
+ bool die_on_oom);
+unsigned long get_physmem_alloc_pos(void);
+bool ipl_report_certs_intersects(unsigned long addr, unsigned long size,
+ unsigned long *intersection_start);
bool is_ipl_block_dump(void);
void store_ipl_parmblock(void);
-unsigned long read_ipl_report(unsigned long safe_addr);
+int read_ipl_report(void);
+void save_ipl_cert_comp_list(void);
void setup_boot_command_line(void);
void parse_boot_command_line(void);
void verify_facilities(void);
void print_missing_facilities(void);
void sclp_early_setup_buffer(void);
void print_pgm_check_info(void);
-unsigned long get_random_base(unsigned long safe_addr);
+unsigned long randomize_within_range(unsigned long size, unsigned long align,
+ unsigned long min, unsigned long max);
void setup_vmem(unsigned long asce_limit);
-unsigned long vmem_estimate_memory_needs(unsigned long online_mem_total);
void __printf(1, 2) decompressor_printk(const char *fmt, ...);
+void print_stacktrace(unsigned long sp);
void error(char *m);
extern struct machine_info machine;
@@ -57,12 +81,11 @@ extern const char kernel_version[];
extern unsigned long memory_limit;
extern unsigned long vmalloc_size;
extern int vmalloc_size_set;
-extern int kaslr_enabled;
extern char __boot_data_start[], __boot_data_end[];
extern char __boot_data_preserved_start[], __boot_data_preserved_end[];
extern char _decompressor_syms_start[], _decompressor_syms_end[];
extern char _stack_start[], _stack_end[];
-extern char _end[];
+extern char _end[], _decompressor_end[];
extern unsigned char _compressed_start[];
extern unsigned char _compressed_end[];
extern struct vmlinux_info _vmlinux_info;
@@ -70,5 +93,10 @@ extern struct vmlinux_info _vmlinux_info;
#define __abs_lowcore_pa(x) (((unsigned long)(x) - __abs_lowcore) % sizeof(struct lowcore))
+static inline bool intersects(unsigned long addr0, unsigned long size0,
+ unsigned long addr1, unsigned long size1)
+{
+ return addr0 + size0 > addr1 && addr1 + size1 > addr0;
+}
#endif /* __ASSEMBLY__ */
#endif /* BOOT_BOOT_H */
diff --git a/arch/s390/boot/install.sh b/arch/s390/boot/install.sh
index 616ba1660f08..a13dd2f2aa1c 100755
--- a/arch/s390/boot/install.sh
+++ b/arch/s390/boot/install.sh
@@ -17,8 +17,8 @@
echo "Warning: '${INSTALLKERNEL}' command not available - additional " \
"bootloader config required" >&2
-if [ -f $4/vmlinuz-$1 ]; then mv $4/vmlinuz-$1 $4/vmlinuz-$1.old; fi
-if [ -f $4/System.map-$1 ]; then mv $4/System.map-$1 $4/System.map-$1.old; fi
+if [ -f "$4/vmlinuz-$1" ]; then mv -- "$4/vmlinuz-$1" "$4/vmlinuz-$1.old"; fi
+if [ -f "$4/System.map-$1" ]; then mv -- "$4/System.map-$1" "$4/System.map-$1.old"; fi
-cat $2 > $4/vmlinuz-$1
-cp $3 $4/System.map-$1
+cat -- "$2" > "$4/vmlinuz-$1"
+cp -- "$3" "$4/System.map-$1"
diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c
index c1f8f7999fed..8753cb0339e5 100644
--- a/arch/s390/boot/ipl_parm.c
+++ b/arch/s390/boot/ipl_parm.c
@@ -24,11 +24,11 @@ int __bootdata(noexec_disabled);
unsigned int __bootdata_preserved(zlib_dfltcc_support) = ZLIB_DFLTCC_FULL;
struct ipl_parameter_block __bootdata_preserved(ipl_block);
int __bootdata_preserved(ipl_block_valid);
+int __bootdata_preserved(__kaslr_enabled);
unsigned long vmalloc_size = VMALLOC_DEFAULT_SIZE;
unsigned long memory_limit;
int vmalloc_size_set;
-int kaslr_enabled;
static inline int __diag308(unsigned long subcode, void *addr)
{
@@ -264,7 +264,7 @@ void parse_boot_command_line(void)
char *args;
int rc;
- kaslr_enabled = IS_ENABLED(CONFIG_RANDOMIZE_BASE);
+ __kaslr_enabled = IS_ENABLED(CONFIG_RANDOMIZE_BASE);
args = strcpy(command_line_buf, early_command_line);
while (*args) {
args = next_arg(args, &param, &val);
@@ -300,7 +300,7 @@ void parse_boot_command_line(void)
modify_fac_list(val);
if (!strcmp(param, "nokaslr"))
- kaslr_enabled = 0;
+ __kaslr_enabled = 0;
#if IS_ENABLED(CONFIG_KVM)
if (!strcmp(param, "prot_virt")) {
diff --git a/arch/s390/boot/ipl_report.c b/arch/s390/boot/ipl_report.c
index 9b14045065b6..1803035e68d2 100644
--- a/arch/s390/boot/ipl_report.c
+++ b/arch/s390/boot/ipl_report.c
@@ -5,6 +5,7 @@
#include <asm/sclp.h>
#include <asm/sections.h>
#include <asm/boot_data.h>
+#include <asm/physmem_info.h>
#include <uapi/asm/ipl.h>
#include "boot.h"
@@ -16,20 +17,16 @@ unsigned long __bootdata_preserved(ipl_cert_list_size);
unsigned long __bootdata(early_ipl_comp_list_addr);
unsigned long __bootdata(early_ipl_comp_list_size);
+static struct ipl_rb_certificates *certs;
+static struct ipl_rb_components *comps;
+static bool ipl_report_needs_saving;
+
#define for_each_rb_entry(entry, rb) \
for (entry = rb->entries; \
(void *) entry + sizeof(*entry) <= (void *) rb + rb->len; \
entry++)
-static inline bool intersects(unsigned long addr0, unsigned long size0,
- unsigned long addr1, unsigned long size1)
-{
- return addr0 + size0 > addr1 && addr1 + size1 > addr0;
-}
-
-static unsigned long find_bootdata_space(struct ipl_rb_components *comps,
- struct ipl_rb_certificates *certs,
- unsigned long safe_addr)
+static unsigned long get_cert_comp_list_size(void)
{
struct ipl_rb_certificate_entry *cert;
struct ipl_rb_component_entry *comp;
@@ -44,36 +41,27 @@ static unsigned long find_bootdata_space(struct ipl_rb_components *comps,
ipl_cert_list_size = 0;
for_each_rb_entry(cert, certs)
ipl_cert_list_size += sizeof(unsigned int) + cert->len;
- size = ipl_cert_list_size + early_ipl_comp_list_size;
+ return ipl_cert_list_size + early_ipl_comp_list_size;
+}
- /*
- * Start from safe_addr to find a free memory area large
- * enough for the IPL report boot data. This area is used
- * for ipl_cert_list_addr/ipl_cert_list_size and
- * early_ipl_comp_list_addr/early_ipl_comp_list_size. It must
- * not overlap with any component or any certificate.
- */
-repeat:
- if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && initrd_data.start && initrd_data.size &&
- intersects(initrd_data.start, initrd_data.size, safe_addr, size))
- safe_addr = initrd_data.start + initrd_data.size;
- for_each_rb_entry(comp, comps)
- if (intersects(safe_addr, size, comp->addr, comp->len)) {
- safe_addr = comp->addr + comp->len;
- goto repeat;
- }
- for_each_rb_entry(cert, certs)
- if (intersects(safe_addr, size, cert->addr, cert->len)) {
- safe_addr = cert->addr + cert->len;
- goto repeat;
- }
- early_ipl_comp_list_addr = safe_addr;
- ipl_cert_list_addr = safe_addr + early_ipl_comp_list_size;
+bool ipl_report_certs_intersects(unsigned long addr, unsigned long size,
+ unsigned long *intersection_start)
+{
+ struct ipl_rb_certificate_entry *cert;
- return safe_addr + size;
+ if (!ipl_report_needs_saving)
+ return false;
+
+ for_each_rb_entry(cert, certs) {
+ if (intersects(addr, size, cert->addr, cert->len)) {
+ *intersection_start = cert->addr;
+ return true;
+ }
+ }
+ return false;
}
-static void copy_components_bootdata(struct ipl_rb_components *comps)
+static void copy_components_bootdata(void)
{
struct ipl_rb_component_entry *comp, *ptr;
@@ -82,7 +70,7 @@ static void copy_components_bootdata(struct ipl_rb_components *comps)
memcpy(ptr++, comp, sizeof(*ptr));
}
-static void copy_certificates_bootdata(struct ipl_rb_certificates *certs)
+static void copy_certificates_bootdata(void)
{
struct ipl_rb_certificate_entry *cert;
void *ptr;
@@ -96,10 +84,8 @@ static void copy_certificates_bootdata(struct ipl_rb_certificates *certs)
}
}
-unsigned long read_ipl_report(unsigned long safe_addr)
+int read_ipl_report(void)
{
- struct ipl_rb_certificates *certs;
- struct ipl_rb_components *comps;
struct ipl_pl_hdr *pl_hdr;
struct ipl_rl_hdr *rl_hdr;
struct ipl_rb_hdr *rb_hdr;
@@ -112,7 +98,7 @@ unsigned long read_ipl_report(unsigned long safe_addr)
*/
if (!ipl_block_valid ||
!(ipl_block.hdr.flags & IPL_PL_FLAG_IPLSR))
- return safe_addr;
+ return -1;
ipl_secure_flag = !!(ipl_block.hdr.flags & IPL_PL_FLAG_SIPL);
/*
* There is an IPL report, to find it load the pointer to the
@@ -150,16 +136,30 @@ unsigned long read_ipl_report(unsigned long safe_addr)
* With either the component list or the certificate list
* missing the kernel will stay ignorant of secure IPL.
*/
- if (!comps || !certs)
- return safe_addr;
+ if (!comps || !certs) {
+ certs = NULL;
+ return -1;
+ }
- /*
- * Copy component and certificate list to a safe area
- * where the decompressed kernel can find them.
- */
- safe_addr = find_bootdata_space(comps, certs, safe_addr);
- copy_components_bootdata(comps);
- copy_certificates_bootdata(certs);
+ ipl_report_needs_saving = true;
+ physmem_reserve(RR_IPLREPORT, (unsigned long)pl_hdr,
+ (unsigned long)rl_end - (unsigned long)pl_hdr);
+ return 0;
+}
+
+void save_ipl_cert_comp_list(void)
+{
+ unsigned long size;
+
+ if (!ipl_report_needs_saving)
+ return;
+
+ size = get_cert_comp_list_size();
+ early_ipl_comp_list_addr = physmem_alloc_top_down(RR_CERT_COMP_LIST, size, sizeof(int));
+ ipl_cert_list_addr = early_ipl_comp_list_addr + early_ipl_comp_list_size;
- return safe_addr;
+ copy_components_bootdata();
+ copy_certificates_bootdata();
+ physmem_free(RR_IPLREPORT);
+ ipl_report_needs_saving = false;
}
diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c
index 3e3d846400b4..90602101e2ae 100644
--- a/arch/s390/boot/kaslr.c
+++ b/arch/s390/boot/kaslr.c
@@ -3,7 +3,7 @@
* Copyright IBM Corp. 2019
*/
#include <linux/pgtable.h>
-#include <asm/mem_detect.h>
+#include <asm/physmem_info.h>
#include <asm/cpacf.h>
#include <asm/timex.h>
#include <asm/sclp.h>
@@ -91,113 +91,108 @@ static int get_random(unsigned long limit, unsigned long *value)
return 0;
}
-/*
- * To randomize kernel base address we have to consider several facts:
- * 1. physical online memory might not be continuous and have holes. mem_detect
- * info contains list of online memory ranges we should consider.
- * 2. we have several memory regions which are occupied and we should not
- * overlap and destroy them. Currently safe_addr tells us the border below
- * which all those occupied regions are. We are safe to use anything above
- * safe_addr.
- * 3. the upper limit might apply as well, even if memory above that limit is
- * online. Currently those limitations are:
- * 3.1. Limit set by "mem=" kernel command line option
- * 3.2. memory reserved at the end for kasan initialization.
- * 4. kernel base address must be aligned to THREAD_SIZE (kernel stack size).
- * Which is required for CONFIG_CHECK_STACK. Currently THREAD_SIZE is 4 pages
- * (16 pages when the kernel is built with kasan enabled)
- * Assumptions:
- * 1. kernel size (including .bss size) and upper memory limit are page aligned.
- * 2. mem_detect memory region start is THREAD_SIZE aligned / end is PAGE_SIZE
- * aligned (in practice memory configurations granularity on z/VM and LPAR
- * is 1mb).
- *
- * To guarantee uniform distribution of kernel base address among all suitable
- * addresses we generate random value just once. For that we need to build a
- * continuous range in which every value would be suitable. We can build this
- * range by simply counting all suitable addresses (let's call them positions)
- * which would be valid as kernel base address. To count positions we iterate
- * over online memory ranges. For each range which is big enough for the
- * kernel image we count all suitable addresses we can put the kernel image at
- * that is
- * (end - start - kernel_size) / THREAD_SIZE + 1
- * Two functions count_valid_kernel_positions and position_to_address help
- * to count positions in memory range given and then convert position back
- * to address.
- */
-static unsigned long count_valid_kernel_positions(unsigned long kernel_size,
- unsigned long _min,
- unsigned long _max)
+static void sort_reserved_ranges(struct reserved_range *res, unsigned long size)
{
- unsigned long start, end, pos = 0;
- int i;
-
- for_each_mem_detect_usable_block(i, &start, &end) {
- if (_min >= end)
- continue;
- if (start >= _max)
- break;
- start = max(_min, start);
- end = min(_max, end);
- if (end - start < kernel_size)
- continue;
- pos += (end - start - kernel_size) / THREAD_SIZE + 1;
+ struct reserved_range tmp;
+ int i, j;
+
+ for (i = 1; i < size; i++) {
+ tmp = res[i];
+ for (j = i - 1; j >= 0 && res[j].start > tmp.start; j--)
+ res[j + 1] = res[j];
+ res[j + 1] = tmp;
}
-
- return pos;
}
-static unsigned long position_to_address(unsigned long pos, unsigned long kernel_size,
- unsigned long _min, unsigned long _max)
+static unsigned long iterate_valid_positions(unsigned long size, unsigned long align,
+ unsigned long _min, unsigned long _max,
+ struct reserved_range *res, size_t res_count,
+ bool pos_count, unsigned long find_pos)
{
- unsigned long start, end;
+ unsigned long start, end, tmp_end, range_pos, pos = 0;
+ struct reserved_range *res_end = res + res_count;
+ struct reserved_range *skip_res;
int i;
- for_each_mem_detect_usable_block(i, &start, &end) {
+ align = max(align, 8UL);
+ _min = round_up(_min, align);
+ for_each_physmem_usable_range(i, &start, &end) {
if (_min >= end)
continue;
+ start = round_up(start, align);
if (start >= _max)
break;
start = max(_min, start);
end = min(_max, end);
- if (end - start < kernel_size)
- continue;
- if ((end - start - kernel_size) / THREAD_SIZE + 1 >= pos)
- return start + (pos - 1) * THREAD_SIZE;
- pos -= (end - start - kernel_size) / THREAD_SIZE + 1;
+
+ while (start + size <= end) {
+ /* skip reserved ranges below the start */
+ while (res && res->end <= start) {
+ res++;
+ if (res >= res_end)
+ res = NULL;
+ }
+ skip_res = NULL;
+ tmp_end = end;
+ /* has intersecting reserved range */
+ if (res && res->start < end) {
+ skip_res = res;
+ tmp_end = res->start;
+ }
+ if (start + size <= tmp_end) {
+ range_pos = (tmp_end - start - size) / align + 1;
+ if (pos_count) {
+ pos += range_pos;
+ } else {
+ if (range_pos >= find_pos)
+ return start + (find_pos - 1) * align;
+ find_pos -= range_pos;
+ }
+ }
+ if (!skip_res)
+ break;
+ start = round_up(skip_res->end, align);
+ }
}
- return 0;
+ return pos_count ? pos : 0;
}
-unsigned long get_random_base(unsigned long safe_addr)
+/*
+ * Two types of decompressor memory allocations/reserves are considered
+ * differently.
+ *
+ * "Static" or "single" allocations are done via physmem_alloc_range() and
+ * physmem_reserve(), and they are listed in physmem_info.reserved[]. Each
+ * type of "static" allocation can only have one allocation per type and
+ * cannot have chains.
+ *
+ * On the other hand, "dynamic" or "repetitive" allocations are done via
+ * physmem_alloc_top_down(). These allocations are tightly packed together
+ * top down from the end of online memory. physmem_alloc_pos represents
+ * current position where those allocations start.
+ *
+ * Functions randomize_within_range() and iterate_valid_positions()
+ * only consider "dynamic" allocations by never looking above
+ * physmem_alloc_pos. "Static" allocations, however, are explicitly
+ * considered by checking the "res" (reserves) array. The first
+ * reserved_range of a "dynamic" allocation may also be checked along the
+ * way, but it will always be above the maximum value anyway.
+ */
+unsigned long randomize_within_range(unsigned long size, unsigned long align,
+ unsigned long min, unsigned long max)
{
- unsigned long usable_total = get_mem_detect_usable_total();
- unsigned long memory_limit = get_mem_detect_end();
- unsigned long base_pos, max_pos, kernel_size;
- int i;
-
- /*
- * Avoid putting kernel in the end of physical memory
- * which vmem and kasan code will use for shadow memory and
- * pgtable mapping allocations.
- */
- memory_limit -= kasan_estimate_memory_needs(usable_total);
- memory_limit -= vmem_estimate_memory_needs(usable_total);
+ struct reserved_range res[RR_MAX];
+ unsigned long max_pos, pos;
- safe_addr = ALIGN(safe_addr, THREAD_SIZE);
- kernel_size = vmlinux.image_size + vmlinux.bss_size;
- if (safe_addr + kernel_size > memory_limit)
- return 0;
+ memcpy(res, physmem_info.reserved, sizeof(res));
+ sort_reserved_ranges(res, ARRAY_SIZE(res));
+ max = min(max, get_physmem_alloc_pos());
- max_pos = count_valid_kernel_positions(kernel_size, safe_addr, memory_limit);
- if (!max_pos) {
- sclp_early_printk("KASLR disabled: not enough memory\n");
+ max_pos = iterate_valid_positions(size, align, min, max, res, ARRAY_SIZE(res), true, 0);
+ if (!max_pos)
return 0;
- }
-
- /* we need a value in the range [1, base_pos] inclusive */
- if (get_random(max_pos, &base_pos))
+ if (get_random(max_pos, &pos))
return 0;
- return position_to_address(base_pos + 1, kernel_size, safe_addr, memory_limit);
+ return iterate_valid_positions(size, align, min, max, res, ARRAY_SIZE(res), false, pos + 1);
}
diff --git a/arch/s390/boot/mem_detect.c b/arch/s390/boot/mem_detect.c
deleted file mode 100644
index 35f4ba11f7fd..000000000000
--- a/arch/s390/boot/mem_detect.c
+++ /dev/null
@@ -1,191 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <asm/setup.h>
-#include <asm/processor.h>
-#include <asm/sclp.h>
-#include <asm/sections.h>
-#include <asm/mem_detect.h>
-#include <asm/sparsemem.h>
-#include "decompressor.h"
-#include "boot.h"
-
-struct mem_detect_info __bootdata(mem_detect);
-
-/* up to 256 storage elements, 1020 subincrements each */
-#define ENTRIES_EXTENDED_MAX \
- (256 * (1020 / 2) * sizeof(struct mem_detect_block))
-
-static struct mem_detect_block *__get_mem_detect_block_ptr(u32 n)
-{
- if (n < MEM_INLINED_ENTRIES)
- return &mem_detect.entries[n];
- return &mem_detect.entries_extended[n - MEM_INLINED_ENTRIES];
-}
-
-/*
- * sequential calls to add_mem_detect_block with adjacent memory areas
- * are merged together into single memory block.
- */
-void add_mem_detect_block(u64 start, u64 end)
-{
- struct mem_detect_block *block;
-
- if (mem_detect.count) {
- block = __get_mem_detect_block_ptr(mem_detect.count - 1);
- if (block->end == start) {
- block->end = end;
- return;
- }
- }
-
- block = __get_mem_detect_block_ptr(mem_detect.count);
- block->start = start;
- block->end = end;
- mem_detect.count++;
-}
-
-static int __diag260(unsigned long rx1, unsigned long rx2)
-{
- unsigned long reg1, reg2, ry;
- union register_pair rx;
- psw_t old;
- int rc;
-
- rx.even = rx1;
- rx.odd = rx2;
- ry = 0x10; /* storage configuration */
- rc = -1; /* fail */
- asm volatile(
- " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n"
- " epsw %[reg1],%[reg2]\n"
- " st %[reg1],0(%[psw_pgm])\n"
- " st %[reg2],4(%[psw_pgm])\n"
- " larl %[reg1],1f\n"
- " stg %[reg1],8(%[psw_pgm])\n"
- " diag %[rx],%[ry],0x260\n"
- " ipm %[rc]\n"
- " srl %[rc],28\n"
- "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n"
- : [reg1] "=&d" (reg1),
- [reg2] "=&a" (reg2),
- [rc] "+&d" (rc),
- [ry] "+&d" (ry),
- "+Q" (S390_lowcore.program_new_psw),
- "=Q" (old)
- : [rx] "d" (rx.pair),
- [psw_old] "a" (&old),
- [psw_pgm] "a" (&S390_lowcore.program_new_psw)
- : "cc", "memory");
- return rc == 0 ? ry : -1;
-}
-
-static int diag260(void)
-{
- int rc, i;
-
- struct {
- unsigned long start;
- unsigned long end;
- } storage_extents[8] __aligned(16); /* VM supports up to 8 extends */
-
- memset(storage_extents, 0, sizeof(storage_extents));
- rc = __diag260((unsigned long)storage_extents, sizeof(storage_extents));
- if (rc == -1)
- return -1;
-
- for (i = 0; i < min_t(int, rc, ARRAY_SIZE(storage_extents)); i++)
- add_mem_detect_block(storage_extents[i].start, storage_extents[i].end + 1);
- return 0;
-}
-
-static int tprot(unsigned long addr)
-{
- unsigned long reg1, reg2;
- int rc = -EFAULT;
- psw_t old;
-
- asm volatile(
- " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n"
- " epsw %[reg1],%[reg2]\n"
- " st %[reg1],0(%[psw_pgm])\n"
- " st %[reg2],4(%[psw_pgm])\n"
- " larl %[reg1],1f\n"
- " stg %[reg1],8(%[psw_pgm])\n"
- " tprot 0(%[addr]),0\n"
- " ipm %[rc]\n"
- " srl %[rc],28\n"
- "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n"
- : [reg1] "=&d" (reg1),
- [reg2] "=&a" (reg2),
- [rc] "+&d" (rc),
- "=Q" (S390_lowcore.program_new_psw.addr),
- "=Q" (old)
- : [psw_old] "a" (&old),
- [psw_pgm] "a" (&S390_lowcore.program_new_psw),
- [addr] "a" (addr)
- : "cc", "memory");
- return rc;
-}
-
-static unsigned long search_mem_end(void)
-{
- unsigned long range = 1 << (MAX_PHYSMEM_BITS - 20); /* in 1MB blocks */
- unsigned long offset = 0;
- unsigned long pivot;
-
- while (range > 1) {
- range >>= 1;
- pivot = offset + range;
- if (!tprot(pivot << 20))
- offset = pivot;
- }
- return (offset + 1) << 20;
-}
-
-unsigned long detect_memory(unsigned long *safe_addr)
-{
- unsigned long max_physmem_end = 0;
-
- sclp_early_get_memsize(&max_physmem_end);
- mem_detect.entries_extended = (struct mem_detect_block *)ALIGN(*safe_addr, sizeof(u64));
-
- if (!sclp_early_read_storage_info()) {
- mem_detect.info_source = MEM_DETECT_SCLP_STOR_INFO;
- } else if (!diag260()) {
- mem_detect.info_source = MEM_DETECT_DIAG260;
- max_physmem_end = max_physmem_end ?: get_mem_detect_end();
- } else if (max_physmem_end) {
- add_mem_detect_block(0, max_physmem_end);
- mem_detect.info_source = MEM_DETECT_SCLP_READ_INFO;
- } else {
- max_physmem_end = search_mem_end();
- add_mem_detect_block(0, max_physmem_end);
- mem_detect.info_source = MEM_DETECT_BIN_SEARCH;
- }
-
- if (mem_detect.count > MEM_INLINED_ENTRIES) {
- *safe_addr += (mem_detect.count - MEM_INLINED_ENTRIES) *
- sizeof(struct mem_detect_block);
- }
-
- return max_physmem_end;
-}
-
-void mem_detect_set_usable_limit(unsigned long limit)
-{
- struct mem_detect_block *block;
- int i;
-
- /* make sure mem_detect.usable ends up within online memory block */
- for (i = 0; i < mem_detect.count; i++) {
- block = __get_mem_detect_block_ptr(i);
- if (block->start >= limit)
- break;
- if (block->end >= limit) {
- mem_detect.usable = limit;
- break;
- }
- mem_detect.usable = block->end;
- }
-}
diff --git a/arch/s390/boot/pgm_check_info.c b/arch/s390/boot/pgm_check_info.c
index c2a1defc79da..97244cd7a206 100644
--- a/arch/s390/boot/pgm_check_info.c
+++ b/arch/s390/boot/pgm_check_info.c
@@ -123,11 +123,10 @@ out:
sclp_early_printk(buf);
}
-static noinline void print_stacktrace(void)
+void print_stacktrace(unsigned long sp)
{
struct stack_info boot_stack = { STACK_TYPE_TASK, (unsigned long)_stack_start,
(unsigned long)_stack_end };
- unsigned long sp = S390_lowcore.gpregs_save_area[15];
bool first = true;
decompressor_printk("Call Trace:\n");
@@ -154,7 +153,7 @@ void print_pgm_check_info(void)
decompressor_printk("Kernel command line: %s\n", early_command_line);
decompressor_printk("Kernel fault: interruption code %04x ilc:%x\n",
S390_lowcore.pgm_code, S390_lowcore.pgm_ilc >> 1);
- if (kaslr_enabled)
+ if (kaslr_enabled())
decompressor_printk("Kernel random base: %lx\n", __kaslr_offset);
decompressor_printk("PSW : %016lx %016lx (%pS)\n",
S390_lowcore.psw_save_area.mask,
@@ -173,7 +172,7 @@ void print_pgm_check_info(void)
gpregs[8], gpregs[9], gpregs[10], gpregs[11]);
decompressor_printk(" %016lx %016lx %016lx %016lx\n",
gpregs[12], gpregs[13], gpregs[14], gpregs[15]);
- print_stacktrace();
+ print_stacktrace(S390_lowcore.gpregs_save_area[15]);
decompressor_printk("Last Breaking-Event-Address:\n");
decompressor_printk(" [<%016lx>] %pS\n", (unsigned long)S390_lowcore.pgm_last_break,
(void *)S390_lowcore.pgm_last_break);
diff --git a/arch/s390/boot/physmem_info.c b/arch/s390/boot/physmem_info.c
new file mode 100644
index 000000000000..0cf79826eef9
--- /dev/null
+++ b/arch/s390/boot/physmem_info.c
@@ -0,0 +1,328 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/processor.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <asm/physmem_info.h>
+#include <asm/stacktrace.h>
+#include <asm/boot_data.h>
+#include <asm/sparsemem.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+#include <asm/sclp.h>
+#include <asm/uv.h>
+#include "decompressor.h"
+#include "boot.h"
+
+struct physmem_info __bootdata(physmem_info);
+static unsigned int physmem_alloc_ranges;
+static unsigned long physmem_alloc_pos;
+
+/* up to 256 storage elements, 1020 subincrements each */
+#define ENTRIES_EXTENDED_MAX \
+ (256 * (1020 / 2) * sizeof(struct physmem_range))
+
+static struct physmem_range *__get_physmem_range_ptr(u32 n)
+{
+ if (n < MEM_INLINED_ENTRIES)
+ return &physmem_info.online[n];
+ if (unlikely(!physmem_info.online_extended)) {
+ physmem_info.online_extended = (struct physmem_range *)physmem_alloc_range(
+ RR_MEM_DETECT_EXTENDED, ENTRIES_EXTENDED_MAX, sizeof(long), 0,
+ physmem_alloc_pos, true);
+ }
+ return &physmem_info.online_extended[n - MEM_INLINED_ENTRIES];
+}
+
+/*
+ * sequential calls to add_physmem_online_range with adjacent memory ranges
+ * are merged together into single memory range.
+ */
+void add_physmem_online_range(u64 start, u64 end)
+{
+ struct physmem_range *range;
+
+ if (physmem_info.range_count) {
+ range = __get_physmem_range_ptr(physmem_info.range_count - 1);
+ if (range->end == start) {
+ range->end = end;
+ return;
+ }
+ }
+
+ range = __get_physmem_range_ptr(physmem_info.range_count);
+ range->start = start;
+ range->end = end;
+ physmem_info.range_count++;
+}
+
+static int __diag260(unsigned long rx1, unsigned long rx2)
+{
+ unsigned long reg1, reg2, ry;
+ union register_pair rx;
+ psw_t old;
+ int rc;
+
+ rx.even = rx1;
+ rx.odd = rx2;
+ ry = 0x10; /* storage configuration */
+ rc = -1; /* fail */
+ asm volatile(
+ " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n"
+ " epsw %[reg1],%[reg2]\n"
+ " st %[reg1],0(%[psw_pgm])\n"
+ " st %[reg2],4(%[psw_pgm])\n"
+ " larl %[reg1],1f\n"
+ " stg %[reg1],8(%[psw_pgm])\n"
+ " diag %[rx],%[ry],0x260\n"
+ " ipm %[rc]\n"
+ " srl %[rc],28\n"
+ "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n"
+ : [reg1] "=&d" (reg1),
+ [reg2] "=&a" (reg2),
+ [rc] "+&d" (rc),
+ [ry] "+&d" (ry),
+ "+Q" (S390_lowcore.program_new_psw),
+ "=Q" (old)
+ : [rx] "d" (rx.pair),
+ [psw_old] "a" (&old),
+ [psw_pgm] "a" (&S390_lowcore.program_new_psw)
+ : "cc", "memory");
+ return rc == 0 ? ry : -1;
+}
+
+static int diag260(void)
+{
+ int rc, i;
+
+ struct {
+ unsigned long start;
+ unsigned long end;
+ } storage_extents[8] __aligned(16); /* VM supports up to 8 extends */
+
+ memset(storage_extents, 0, sizeof(storage_extents));
+ rc = __diag260((unsigned long)storage_extents, sizeof(storage_extents));
+ if (rc == -1)
+ return -1;
+
+ for (i = 0; i < min_t(int, rc, ARRAY_SIZE(storage_extents)); i++)
+ add_physmem_online_range(storage_extents[i].start, storage_extents[i].end + 1);
+ return 0;
+}
+
+static int tprot(unsigned long addr)
+{
+ unsigned long reg1, reg2;
+ int rc = -EFAULT;
+ psw_t old;
+
+ asm volatile(
+ " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n"
+ " epsw %[reg1],%[reg2]\n"
+ " st %[reg1],0(%[psw_pgm])\n"
+ " st %[reg2],4(%[psw_pgm])\n"
+ " larl %[reg1],1f\n"
+ " stg %[reg1],8(%[psw_pgm])\n"
+ " tprot 0(%[addr]),0\n"
+ " ipm %[rc]\n"
+ " srl %[rc],28\n"
+ "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n"
+ : [reg1] "=&d" (reg1),
+ [reg2] "=&a" (reg2),
+ [rc] "+&d" (rc),
+ "=Q" (S390_lowcore.program_new_psw.addr),
+ "=Q" (old)
+ : [psw_old] "a" (&old),
+ [psw_pgm] "a" (&S390_lowcore.program_new_psw),
+ [addr] "a" (addr)
+ : "cc", "memory");
+ return rc;
+}
+
+static unsigned long search_mem_end(void)
+{
+ unsigned long range = 1 << (MAX_PHYSMEM_BITS - 20); /* in 1MB blocks */
+ unsigned long offset = 0;
+ unsigned long pivot;
+
+ while (range > 1) {
+ range >>= 1;
+ pivot = offset + range;
+ if (!tprot(pivot << 20))
+ offset = pivot;
+ }
+ return (offset + 1) << 20;
+}
+
+unsigned long detect_max_physmem_end(void)
+{
+ unsigned long max_physmem_end = 0;
+
+ if (!sclp_early_get_memsize(&max_physmem_end)) {
+ physmem_info.info_source = MEM_DETECT_SCLP_READ_INFO;
+ } else {
+ max_physmem_end = search_mem_end();
+ physmem_info.info_source = MEM_DETECT_BIN_SEARCH;
+ }
+ return max_physmem_end;
+}
+
+void detect_physmem_online_ranges(unsigned long max_physmem_end)
+{
+ if (!sclp_early_read_storage_info()) {
+ physmem_info.info_source = MEM_DETECT_SCLP_STOR_INFO;
+ } else if (!diag260()) {
+ physmem_info.info_source = MEM_DETECT_DIAG260;
+ } else if (max_physmem_end) {
+ add_physmem_online_range(0, max_physmem_end);
+ }
+}
+
+void physmem_set_usable_limit(unsigned long limit)
+{
+ physmem_info.usable = limit;
+ physmem_alloc_pos = limit;
+}
+
+static void die_oom(unsigned long size, unsigned long align, unsigned long min, unsigned long max)
+{
+ unsigned long start, end, total_mem = 0, total_reserved_mem = 0;
+ struct reserved_range *range;
+ enum reserved_range_type t;
+ int i;
+
+ decompressor_printk("Linux version %s\n", kernel_version);
+ if (!is_prot_virt_guest() && early_command_line[0])
+ decompressor_printk("Kernel command line: %s\n", early_command_line);
+ decompressor_printk("Out of memory allocating %lx bytes %lx aligned in range %lx:%lx\n",
+ size, align, min, max);
+ decompressor_printk("Reserved memory ranges:\n");
+ for_each_physmem_reserved_range(t, range, &start, &end) {
+ decompressor_printk("%016lx %016lx %s\n", start, end, get_rr_type_name(t));
+ total_reserved_mem += end - start;
+ }
+ decompressor_printk("Usable online memory ranges (info source: %s [%x]):\n",
+ get_physmem_info_source(), physmem_info.info_source);
+ for_each_physmem_usable_range(i, &start, &end) {
+ decompressor_printk("%016lx %016lx\n", start, end);
+ total_mem += end - start;
+ }
+ decompressor_printk("Usable online memory total: %lx Reserved: %lx Free: %lx\n",
+ total_mem, total_reserved_mem,
+ total_mem > total_reserved_mem ? total_mem - total_reserved_mem : 0);
+ print_stacktrace(current_frame_address());
+ sclp_early_printk("\n\n -- System halted\n");
+ disabled_wait();
+}
+
+void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size)
+{
+ physmem_info.reserved[type].start = addr;
+ physmem_info.reserved[type].end = addr + size;
+}
+
+void physmem_free(enum reserved_range_type type)
+{
+ physmem_info.reserved[type].start = 0;
+ physmem_info.reserved[type].end = 0;
+}
+
+static bool __physmem_alloc_intersects(unsigned long addr, unsigned long size,
+ unsigned long *intersection_start)
+{
+ unsigned long res_addr, res_size;
+ int t;
+
+ for (t = 0; t < RR_MAX; t++) {
+ if (!get_physmem_reserved(t, &res_addr, &res_size))
+ continue;
+ if (intersects(addr, size, res_addr, res_size)) {
+ *intersection_start = res_addr;
+ return true;
+ }
+ }
+ return ipl_report_certs_intersects(addr, size, intersection_start);
+}
+
+static unsigned long __physmem_alloc_range(unsigned long size, unsigned long align,
+ unsigned long min, unsigned long max,
+ unsigned int from_ranges, unsigned int *ranges_left,
+ bool die_on_oom)
+{
+ unsigned int nranges = from_ranges ?: physmem_info.range_count;
+ unsigned long range_start, range_end;
+ unsigned long intersection_start;
+ unsigned long addr, pos = max;
+
+ align = max(align, 8UL);
+ while (nranges) {
+ __get_physmem_range(nranges - 1, &range_start, &range_end, false);
+ pos = min(range_end, pos);
+
+ if (round_up(min, align) + size > pos)
+ break;
+ addr = round_down(pos - size, align);
+ if (range_start > addr) {
+ nranges--;
+ continue;
+ }
+ if (__physmem_alloc_intersects(addr, size, &intersection_start)) {
+ pos = intersection_start;
+ continue;
+ }
+
+ if (ranges_left)
+ *ranges_left = nranges;
+ return addr;
+ }
+ if (die_on_oom)
+ die_oom(size, align, min, max);
+ return 0;
+}
+
+unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long size,
+ unsigned long align, unsigned long min, unsigned long max,
+ bool die_on_oom)
+{
+ unsigned long addr;
+
+ max = min(max, physmem_alloc_pos);
+ addr = __physmem_alloc_range(size, align, min, max, 0, NULL, die_on_oom);
+ if (addr)
+ physmem_reserve(type, addr, size);
+ return addr;
+}
+
+unsigned long physmem_alloc_top_down(enum reserved_range_type type, unsigned long size,
+ unsigned long align)
+{
+ struct reserved_range *range = &physmem_info.reserved[type];
+ struct reserved_range *new_range;
+ unsigned int ranges_left;
+ unsigned long addr;
+
+ addr = __physmem_alloc_range(size, align, 0, physmem_alloc_pos, physmem_alloc_ranges,
+ &ranges_left, true);
+ /* if not a consecutive allocation of the same type or first allocation */
+ if (range->start != addr + size) {
+ if (range->end) {
+ physmem_alloc_pos = __physmem_alloc_range(
+ sizeof(struct reserved_range), 0, 0, physmem_alloc_pos,
+ physmem_alloc_ranges, &ranges_left, true);
+ new_range = (struct reserved_range *)physmem_alloc_pos;
+ *new_range = *range;
+ range->chain = new_range;
+ addr = __physmem_alloc_range(size, align, 0, physmem_alloc_pos,
+ ranges_left, &ranges_left, true);
+ }
+ range->end = addr + size;
+ }
+ range->start = addr;
+ physmem_alloc_pos = addr;
+ physmem_alloc_ranges = ranges_left;
+ return addr;
+}
+
+unsigned long get_physmem_alloc_pos(void)
+{
+ return physmem_alloc_pos;
+}
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index 11413f0baabc..64bd7ac3e35d 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -12,7 +12,7 @@
#include <asm/diag.h>
#include <asm/uv.h>
#include <asm/abs_lowcore.h>
-#include <asm/mem_detect.h>
+#include <asm/physmem_info.h>
#include "decompressor.h"
#include "boot.h"
#include "uv.h"
@@ -21,7 +21,6 @@ unsigned long __bootdata_preserved(__kaslr_offset);
unsigned long __bootdata_preserved(__abs_lowcore);
unsigned long __bootdata_preserved(__memcpy_real_area);
pte_t *__bootdata_preserved(memcpy_real_ptep);
-unsigned long __bootdata(__amode31_base);
unsigned long __bootdata_preserved(VMALLOC_START);
unsigned long __bootdata_preserved(VMALLOC_END);
struct page *__bootdata_preserved(vmemmap);
@@ -29,8 +28,6 @@ unsigned long __bootdata_preserved(vmemmap_size);
unsigned long __bootdata_preserved(MODULES_VADDR);
unsigned long __bootdata_preserved(MODULES_END);
unsigned long __bootdata(ident_map_size);
-int __bootdata(is_full_image) = 1;
-struct initrd_data __bootdata(initrd_data);
u64 __bootdata_preserved(stfle_fac_list[16]);
u64 __bootdata_preserved(alt_stfle_fac_list[16]);
@@ -76,17 +73,20 @@ unsigned long mem_safe_offset(void)
}
#endif
-static unsigned long rescue_initrd(unsigned long safe_addr)
+static void rescue_initrd(unsigned long min, unsigned long max)
{
+ unsigned long old_addr, addr, size;
+
if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD))
- return safe_addr;
- if (!initrd_data.start || !initrd_data.size)
- return safe_addr;
- if (initrd_data.start < safe_addr) {
- memmove((void *)safe_addr, (void *)initrd_data.start, initrd_data.size);
- initrd_data.start = safe_addr;
- }
- return initrd_data.start + initrd_data.size;
+ return;
+ if (!get_physmem_reserved(RR_INITRD, &addr, &size))
+ return;
+ if (addr >= min && addr + size <= max)
+ return;
+ old_addr = addr;
+ physmem_free(RR_INITRD);
+ addr = physmem_alloc_top_down(RR_INITRD, size, 0);
+ memmove((void *)addr, (void *)old_addr, size);
}
static void copy_bootdata(void)
@@ -140,7 +140,7 @@ static void handle_relocs(unsigned long offset)
*
* Consider the following factors:
* 1. max_physmem_end - end of physical memory online or standby.
- * Always <= end of the last online memory block (get_mem_detect_end()).
+ * Always >= end of the last online memory range (get_physmem_online_end()).
* 2. CONFIG_MAX_PHYSMEM_BITS - the maximum size of physical memory the
* kernel is able to support.
* 3. "mem=" kernel command line option which limits physical memory usage.
@@ -160,10 +160,10 @@ static void setup_ident_map_size(unsigned long max_physmem_end)
#ifdef CONFIG_CRASH_DUMP
if (oldmem_data.start) {
- kaslr_enabled = 0;
+ __kaslr_enabled = 0;
ident_map_size = min(ident_map_size, oldmem_data.size);
} else if (ipl_block_valid && is_ipl_block_dump()) {
- kaslr_enabled = 0;
+ __kaslr_enabled = 0;
if (!sclp_early_get_hsa_size(&hsa_size) && hsa_size)
ident_map_size = min(ident_map_size, hsa_size);
}
@@ -235,9 +235,9 @@ static unsigned long setup_kernel_memory_layout(void)
/*
* This function clears the BSS section of the decompressed Linux kernel and NOT the decompressor's.
*/
-static void clear_bss_section(void)
+static void clear_bss_section(unsigned long vmlinux_lma)
{
- memset((void *)vmlinux.default_lma + vmlinux.image_size, 0, vmlinux.bss_size);
+ memset((void *)vmlinux_lma + vmlinux.image_size, 0, vmlinux.bss_size);
}
/*
@@ -256,7 +256,6 @@ static void setup_vmalloc_size(void)
static void offset_vmlinux_info(unsigned long offset)
{
- vmlinux.default_lma += offset;
*(unsigned long *)(&vmlinux.entry) += offset;
vmlinux.bootdata_off += offset;
vmlinux.bootdata_preserved_off += offset;
@@ -266,60 +265,83 @@ static void offset_vmlinux_info(unsigned long offset)
vmlinux.init_mm_off += offset;
vmlinux.swapper_pg_dir_off += offset;
vmlinux.invalid_pg_dir_off += offset;
-}
-
-static unsigned long reserve_amode31(unsigned long safe_addr)
-{
- __amode31_base = PAGE_ALIGN(safe_addr);
- return __amode31_base + vmlinux.amode31_size;
+#ifdef CONFIG_KASAN
+ vmlinux.kasan_early_shadow_page_off += offset;
+ vmlinux.kasan_early_shadow_pte_off += offset;
+ vmlinux.kasan_early_shadow_pmd_off += offset;
+ vmlinux.kasan_early_shadow_pud_off += offset;
+ vmlinux.kasan_early_shadow_p4d_off += offset;
+#endif
}
void startup_kernel(void)
{
unsigned long max_physmem_end;
- unsigned long random_lma;
- unsigned long safe_addr;
+ unsigned long vmlinux_lma = 0;
+ unsigned long amode31_lma = 0;
unsigned long asce_limit;
+ unsigned long safe_addr;
void *img;
psw_t psw;
- initrd_data.start = parmarea.initrd_start;
- initrd_data.size = parmarea.initrd_size;
+ setup_lpp();
+ safe_addr = mem_safe_offset();
+ /*
+ * reserve decompressor memory together with decompression heap, buffer and
+ * memory which might be occupied by uncompressed kernel at default 1Mb
+ * position (if KASLR is off or failed).
+ */
+ physmem_reserve(RR_DECOMPRESSOR, 0, safe_addr);
+ if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && parmarea.initrd_size)
+ physmem_reserve(RR_INITRD, parmarea.initrd_start, parmarea.initrd_size);
oldmem_data.start = parmarea.oldmem_base;
oldmem_data.size = parmarea.oldmem_size;
- setup_lpp();
store_ipl_parmblock();
- safe_addr = mem_safe_offset();
- safe_addr = reserve_amode31(safe_addr);
- safe_addr = read_ipl_report(safe_addr);
+ read_ipl_report();
uv_query_info();
- safe_addr = rescue_initrd(safe_addr);
sclp_early_read_info();
setup_boot_command_line();
parse_boot_command_line();
detect_facilities();
sanitize_prot_virt_host();
- max_physmem_end = detect_memory(&safe_addr);
+ max_physmem_end = detect_max_physmem_end();
setup_ident_map_size(max_physmem_end);
setup_vmalloc_size();
asce_limit = setup_kernel_memory_layout();
- mem_detect_set_usable_limit(ident_map_size);
-
- if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_enabled) {
- random_lma = get_random_base(safe_addr);
- if (random_lma) {
- __kaslr_offset = random_lma - vmlinux.default_lma;
- img = (void *)vmlinux.default_lma;
+ /* got final ident_map_size, physmem allocations could be performed now */
+ physmem_set_usable_limit(ident_map_size);
+ detect_physmem_online_ranges(max_physmem_end);
+ save_ipl_cert_comp_list();
+ rescue_initrd(safe_addr, ident_map_size);
+
+ if (kaslr_enabled()) {
+ vmlinux_lma = randomize_within_range(vmlinux.image_size + vmlinux.bss_size,
+ THREAD_SIZE, vmlinux.default_lma,
+ ident_map_size);
+ if (vmlinux_lma) {
+ __kaslr_offset = vmlinux_lma - vmlinux.default_lma;
offset_vmlinux_info(__kaslr_offset);
}
}
+ vmlinux_lma = vmlinux_lma ?: vmlinux.default_lma;
+ physmem_reserve(RR_VMLINUX, vmlinux_lma, vmlinux.image_size + vmlinux.bss_size);
if (!IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED)) {
img = decompress_kernel();
- memmove((void *)vmlinux.default_lma, img, vmlinux.image_size);
- } else if (__kaslr_offset)
- memcpy((void *)vmlinux.default_lma, img, vmlinux.image_size);
+ memmove((void *)vmlinux_lma, img, vmlinux.image_size);
+ } else if (__kaslr_offset) {
+ img = (void *)vmlinux.default_lma;
+ memmove((void *)vmlinux_lma, img, vmlinux.image_size);
+ memset(img, 0, vmlinux.image_size);
+ }
+
+ /* vmlinux decompression is done, shrink reserved low memory */
+ physmem_reserve(RR_DECOMPRESSOR, 0, (unsigned long)_decompressor_end);
+ if (kaslr_enabled())
+ amode31_lma = randomize_within_range(vmlinux.amode31_size, PAGE_SIZE, 0, SZ_2G);
+ amode31_lma = amode31_lma ?: vmlinux.default_lma - vmlinux.amode31_size;
+ physmem_reserve(RR_AMODE31, amode31_lma, vmlinux.amode31_size);
/*
* The order of the following operations is important:
@@ -334,21 +356,16 @@ void startup_kernel(void)
* - copy_bootdata() must follow setup_vmem() to propagate changes to
* bootdata made by setup_vmem()
*/
- clear_bss_section();
+ clear_bss_section(vmlinux_lma);
handle_relocs(__kaslr_offset);
setup_vmem(asce_limit);
copy_bootdata();
- if (__kaslr_offset) {
- /*
- * Save KASLR offset for early dumps, before vmcore_info is set.
- * Mark as uneven to distinguish from real vmcore_info pointer.
- */
- S390_lowcore.vmcore_info = __kaslr_offset | 0x1UL;
- /* Clear non-relocated kernel */
- if (IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED))
- memset(img, 0, vmlinux.image_size);
- }
+ /*
+ * Save KASLR offset for early dumps, before vmcore_info is set.
+ * Mark as uneven to distinguish from real vmcore_info pointer.
+ */
+ S390_lowcore.vmcore_info = __kaslr_offset ? __kaslr_offset | 0x1UL : 0;
/*
* Jump to the decompressed kernel entry point and switch DAT mode on.
diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c
index 4d1d0d8e99cb..acb1f8b53105 100644
--- a/arch/s390/boot/vmem.c
+++ b/arch/s390/boot/vmem.c
@@ -1,81 +1,217 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/sched/task.h>
#include <linux/pgtable.h>
+#include <linux/kasan.h>
#include <asm/pgalloc.h>
#include <asm/facility.h>
#include <asm/sections.h>
-#include <asm/mem_detect.h>
+#include <asm/physmem_info.h>
#include <asm/maccess.h>
#include <asm/abs_lowcore.h>
#include "decompressor.h"
#include "boot.h"
+unsigned long __bootdata_preserved(s390_invalid_asce);
+
+#ifdef CONFIG_PROC_FS
+atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]);
+#endif
+
#define init_mm (*(struct mm_struct *)vmlinux.init_mm_off)
#define swapper_pg_dir vmlinux.swapper_pg_dir_off
#define invalid_pg_dir vmlinux.invalid_pg_dir_off
-/*
- * Mimic virt_to_kpte() in lack of init_mm symbol. Skip pmd NULL check though.
- */
-static inline pte_t *__virt_to_kpte(unsigned long va)
-{
- return pte_offset_kernel(pmd_offset(pud_offset(p4d_offset(pgd_offset_k(va), va), va), va), va);
-}
-
-unsigned long __bootdata_preserved(s390_invalid_asce);
-unsigned long __bootdata(pgalloc_pos);
-unsigned long __bootdata(pgalloc_end);
-unsigned long __bootdata(pgalloc_low);
-
enum populate_mode {
POPULATE_NONE,
- POPULATE_ONE2ONE,
+ POPULATE_DIRECT,
POPULATE_ABS_LOWCORE,
+#ifdef CONFIG_KASAN
+ POPULATE_KASAN_MAP_SHADOW,
+ POPULATE_KASAN_ZERO_SHADOW,
+ POPULATE_KASAN_SHALLOW
+#endif
};
-static void boot_check_oom(void)
+static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode);
+
+#ifdef CONFIG_KASAN
+
+#define kasan_early_shadow_page vmlinux.kasan_early_shadow_page_off
+#define kasan_early_shadow_pte ((pte_t *)vmlinux.kasan_early_shadow_pte_off)
+#define kasan_early_shadow_pmd ((pmd_t *)vmlinux.kasan_early_shadow_pmd_off)
+#define kasan_early_shadow_pud ((pud_t *)vmlinux.kasan_early_shadow_pud_off)
+#define kasan_early_shadow_p4d ((p4d_t *)vmlinux.kasan_early_shadow_p4d_off)
+#define __sha(x) ((unsigned long)kasan_mem_to_shadow((void *)x))
+
+static pte_t pte_z;
+
+static void kasan_populate_shadow(void)
+{
+ pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY);
+ pud_t pud_z = __pud(__pa(kasan_early_shadow_pmd) | _REGION3_ENTRY);
+ p4d_t p4d_z = __p4d(__pa(kasan_early_shadow_pud) | _REGION2_ENTRY);
+ unsigned long untracked_end;
+ unsigned long start, end;
+ int i;
+
+ pte_z = __pte(__pa(kasan_early_shadow_page) | pgprot_val(PAGE_KERNEL_RO));
+ if (!machine.has_nx)
+ pte_z = clear_pte_bit(pte_z, __pgprot(_PAGE_NOEXEC));
+ crst_table_init((unsigned long *)kasan_early_shadow_p4d, p4d_val(p4d_z));
+ crst_table_init((unsigned long *)kasan_early_shadow_pud, pud_val(pud_z));
+ crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z));
+ memset64((u64 *)kasan_early_shadow_pte, pte_val(pte_z), PTRS_PER_PTE);
+
+ /*
+ * Current memory layout:
+ * +- 0 -------------+ +- shadow start -+
+ * |1:1 ident mapping| /|1/8 of ident map|
+ * | | / | |
+ * +-end of ident map+ / +----------------+
+ * | ... gap ... | / | kasan |
+ * | | / | zero page |
+ * +- vmalloc area -+ / | mapping |
+ * | vmalloc_size | / | (untracked) |
+ * +- modules vaddr -+ / +----------------+
+ * | 2Gb |/ | unmapped | allocated per module
+ * +- shadow start -+ +----------------+
+ * | 1/8 addr space | | zero pg mapping| (untracked)
+ * +- shadow end ----+---------+- shadow end ---+
+ *
+ * Current memory layout (KASAN_VMALLOC):
+ * +- 0 -------------+ +- shadow start -+
+ * |1:1 ident mapping| /|1/8 of ident map|
+ * | | / | |
+ * +-end of ident map+ / +----------------+
+ * | ... gap ... | / | kasan zero page| (untracked)
+ * | | / | mapping |
+ * +- vmalloc area -+ / +----------------+
+ * | vmalloc_size | / |shallow populate|
+ * +- modules vaddr -+ / +----------------+
+ * | 2Gb |/ |shallow populate|
+ * +- shadow start -+ +----------------+
+ * | 1/8 addr space | | zero pg mapping| (untracked)
+ * +- shadow end ----+---------+- shadow end ---+
+ */
+
+ for_each_physmem_usable_range(i, &start, &end)
+ pgtable_populate(__sha(start), __sha(end), POPULATE_KASAN_MAP_SHADOW);
+ if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
+ untracked_end = VMALLOC_START;
+ /* shallowly populate kasan shadow for vmalloc and modules */
+ pgtable_populate(__sha(VMALLOC_START), __sha(MODULES_END), POPULATE_KASAN_SHALLOW);
+ } else {
+ untracked_end = MODULES_VADDR;
+ }
+ /* populate kasan shadow for untracked memory */
+ pgtable_populate(__sha(ident_map_size), __sha(untracked_end), POPULATE_KASAN_ZERO_SHADOW);
+ pgtable_populate(__sha(MODULES_END), __sha(_REGION1_SIZE), POPULATE_KASAN_ZERO_SHADOW);
+}
+
+static bool kasan_pgd_populate_zero_shadow(pgd_t *pgd, unsigned long addr,
+ unsigned long end, enum populate_mode mode)
{
- if (pgalloc_pos < pgalloc_low)
- error("out of memory on boot\n");
+ if (mode == POPULATE_KASAN_ZERO_SHADOW &&
+ IS_ALIGNED(addr, PGDIR_SIZE) && end - addr >= PGDIR_SIZE) {
+ pgd_populate(&init_mm, pgd, kasan_early_shadow_p4d);
+ return true;
+ }
+ return false;
}
-static void pgtable_populate_init(void)
+static bool kasan_p4d_populate_zero_shadow(p4d_t *p4d, unsigned long addr,
+ unsigned long end, enum populate_mode mode)
{
- unsigned long initrd_end;
- unsigned long kernel_end;
-
- kernel_end = vmlinux.default_lma + vmlinux.image_size + vmlinux.bss_size;
- pgalloc_low = round_up(kernel_end, PAGE_SIZE);
- if (IS_ENABLED(CONFIG_BLK_DEV_INITRD)) {
- initrd_end = round_up(initrd_data.start + initrd_data.size, _SEGMENT_SIZE);
- pgalloc_low = max(pgalloc_low, initrd_end);
+ if (mode == POPULATE_KASAN_ZERO_SHADOW &&
+ IS_ALIGNED(addr, P4D_SIZE) && end - addr >= P4D_SIZE) {
+ p4d_populate(&init_mm, p4d, kasan_early_shadow_pud);
+ return true;
}
+ return false;
+}
- pgalloc_end = round_down(get_mem_detect_end(), PAGE_SIZE);
- pgalloc_pos = pgalloc_end;
+static bool kasan_pud_populate_zero_shadow(pud_t *pud, unsigned long addr,
+ unsigned long end, enum populate_mode mode)
+{
+ if (mode == POPULATE_KASAN_ZERO_SHADOW &&
+ IS_ALIGNED(addr, PUD_SIZE) && end - addr >= PUD_SIZE) {
+ pud_populate(&init_mm, pud, kasan_early_shadow_pmd);
+ return true;
+ }
+ return false;
+}
- boot_check_oom();
+static bool kasan_pmd_populate_zero_shadow(pmd_t *pmd, unsigned long addr,
+ unsigned long end, enum populate_mode mode)
+{
+ if (mode == POPULATE_KASAN_ZERO_SHADOW &&
+ IS_ALIGNED(addr, PMD_SIZE) && end - addr >= PMD_SIZE) {
+ pmd_populate(&init_mm, pmd, kasan_early_shadow_pte);
+ return true;
+ }
+ return false;
}
-static void *boot_alloc_pages(unsigned int order)
+static bool kasan_pte_populate_zero_shadow(pte_t *pte, enum populate_mode mode)
{
- unsigned long size = PAGE_SIZE << order;
+ pte_t entry;
- pgalloc_pos -= size;
- pgalloc_pos = round_down(pgalloc_pos, size);
+ if (mode == POPULATE_KASAN_ZERO_SHADOW) {
+ set_pte(pte, pte_z);
+ return true;
+ }
+ return false;
+}
+#else
- boot_check_oom();
+static inline void kasan_populate_shadow(void) {}
- return (void *)pgalloc_pos;
+static inline bool kasan_pgd_populate_zero_shadow(pgd_t *pgd, unsigned long addr,
+ unsigned long end, enum populate_mode mode)
+{
+ return false;
+}
+
+static inline bool kasan_p4d_populate_zero_shadow(p4d_t *p4d, unsigned long addr,
+ unsigned long end, enum populate_mode mode)
+{
+ return false;
+}
+
+static inline bool kasan_pud_populate_zero_shadow(pud_t *pud, unsigned long addr,
+ unsigned long end, enum populate_mode mode)
+{
+ return false;
+}
+
+static inline bool kasan_pmd_populate_zero_shadow(pmd_t *pmd, unsigned long addr,
+ unsigned long end, enum populate_mode mode)
+{
+ return false;
+}
+
+static bool kasan_pte_populate_zero_shadow(pte_t *pte, enum populate_mode mode)
+{
+ return false;
+}
+
+#endif
+
+/*
+ * Mimic virt_to_kpte() in lack of init_mm symbol. Skip pmd NULL check though.
+ */
+static inline pte_t *__virt_to_kpte(unsigned long va)
+{
+ return pte_offset_kernel(pmd_offset(pud_offset(p4d_offset(pgd_offset_k(va), va), va), va), va);
}
static void *boot_crst_alloc(unsigned long val)
{
+ unsigned long size = PAGE_SIZE << CRST_ALLOC_ORDER;
unsigned long *table;
- table = boot_alloc_pages(CRST_ALLOC_ORDER);
- if (table)
- crst_table_init(table, val);
+ table = (unsigned long *)physmem_alloc_top_down(RR_VMEM, size, size);
+ crst_table_init(table, val);
return table;
}
@@ -84,28 +220,37 @@ static pte_t *boot_pte_alloc(void)
static void *pte_leftover;
pte_t *pte;
- BUILD_BUG_ON(_PAGE_TABLE_SIZE * 2 != PAGE_SIZE);
-
+ /*
+ * handling pte_leftovers this way helps to avoid memory fragmentation
+ * during POPULATE_KASAN_MAP_SHADOW when EDAT is off
+ */
if (!pte_leftover) {
- pte_leftover = boot_alloc_pages(0);
+ pte_leftover = (void *)physmem_alloc_top_down(RR_VMEM, PAGE_SIZE, PAGE_SIZE);
pte = pte_leftover + _PAGE_TABLE_SIZE;
} else {
pte = pte_leftover;
pte_leftover = NULL;
}
+
memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
return pte;
}
-static unsigned long _pa(unsigned long addr, enum populate_mode mode)
+static unsigned long _pa(unsigned long addr, unsigned long size, enum populate_mode mode)
{
switch (mode) {
case POPULATE_NONE:
return -1;
- case POPULATE_ONE2ONE:
+ case POPULATE_DIRECT:
return addr;
case POPULATE_ABS_LOWCORE:
return __abs_lowcore_pa(addr);
+#ifdef CONFIG_KASAN
+ case POPULATE_KASAN_MAP_SHADOW:
+ addr = physmem_alloc_top_down(RR_VMEM, size, size);
+ memset((void *)addr, 0, size);
+ return addr;
+#endif
default:
return -1;
}
@@ -126,23 +271,28 @@ static bool can_large_pmd(pmd_t *pm_dir, unsigned long addr, unsigned long end)
static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end,
enum populate_mode mode)
{
- unsigned long next;
+ unsigned long pages = 0;
pte_t *pte, entry;
pte = pte_offset_kernel(pmd, addr);
for (; addr < end; addr += PAGE_SIZE, pte++) {
if (pte_none(*pte)) {
- entry = __pte(_pa(addr, mode));
+ if (kasan_pte_populate_zero_shadow(pte, mode))
+ continue;
+ entry = __pte(_pa(addr, PAGE_SIZE, mode));
entry = set_pte_bit(entry, PAGE_KERNEL_EXEC);
set_pte(pte, entry);
+ pages++;
}
}
+ if (mode == POPULATE_DIRECT)
+ update_page_count(PG_DIRECT_MAP_4K, pages);
}
static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long end,
enum populate_mode mode)
{
- unsigned long next;
+ unsigned long next, pages = 0;
pmd_t *pmd, entry;
pte_t *pte;
@@ -150,10 +300,13 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e
for (; addr < end; addr = next, pmd++) {
next = pmd_addr_end(addr, end);
if (pmd_none(*pmd)) {
+ if (kasan_pmd_populate_zero_shadow(pmd, addr, next, mode))
+ continue;
if (can_large_pmd(pmd, addr, next)) {
- entry = __pmd(_pa(addr, mode));
+ entry = __pmd(_pa(addr, _SEGMENT_SIZE, mode));
entry = set_pmd_bit(entry, SEGMENT_KERNEL_EXEC);
set_pmd(pmd, entry);
+ pages++;
continue;
}
pte = boot_pte_alloc();
@@ -163,12 +316,14 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e
}
pgtable_pte_populate(pmd, addr, next, mode);
}
+ if (mode == POPULATE_DIRECT)
+ update_page_count(PG_DIRECT_MAP_1M, pages);
}
static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long end,
enum populate_mode mode)
{
- unsigned long next;
+ unsigned long next, pages = 0;
pud_t *pud, entry;
pmd_t *pmd;
@@ -176,10 +331,13 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e
for (; addr < end; addr = next, pud++) {
next = pud_addr_end(addr, end);
if (pud_none(*pud)) {
+ if (kasan_pud_populate_zero_shadow(pud, addr, next, mode))
+ continue;
if (can_large_pud(pud, addr, next)) {
- entry = __pud(_pa(addr, mode));
+ entry = __pud(_pa(addr, _REGION3_SIZE, mode));
entry = set_pud_bit(entry, REGION3_KERNEL_EXEC);
set_pud(pud, entry);
+ pages++;
continue;
}
pmd = boot_crst_alloc(_SEGMENT_ENTRY_EMPTY);
@@ -189,6 +347,8 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e
}
pgtable_pmd_populate(pud, addr, next, mode);
}
+ if (mode == POPULATE_DIRECT)
+ update_page_count(PG_DIRECT_MAP_2G, pages);
}
static void pgtable_p4d_populate(pgd_t *pgd, unsigned long addr, unsigned long end,
@@ -202,6 +362,8 @@ static void pgtable_p4d_populate(pgd_t *pgd, unsigned long addr, unsigned long e
for (; addr < end; addr = next, p4d++) {
next = p4d_addr_end(addr, end);
if (p4d_none(*p4d)) {
+ if (kasan_p4d_populate_zero_shadow(p4d, addr, next, mode))
+ continue;
pud = boot_crst_alloc(_REGION3_ENTRY_EMPTY);
p4d_populate(&init_mm, p4d, pud);
}
@@ -219,9 +381,15 @@ static void pgtable_populate(unsigned long addr, unsigned long end, enum populat
for (; addr < end; addr = next, pgd++) {
next = pgd_addr_end(addr, end);
if (pgd_none(*pgd)) {
+ if (kasan_pgd_populate_zero_shadow(pgd, addr, next, mode))
+ continue;
p4d = boot_crst_alloc(_REGION2_ENTRY_EMPTY);
pgd_populate(&init_mm, pgd, p4d);
}
+#ifdef CONFIG_KASAN
+ if (mode == POPULATE_KASAN_SHALLOW)
+ continue;
+#endif
pgtable_p4d_populate(pgd, addr, next, mode);
}
}
@@ -250,16 +418,17 @@ void setup_vmem(unsigned long asce_limit)
* To prevent creation of a large page at address 0 first map
* the lowcore and create the identity mapping only afterwards.
*/
- pgtable_populate_init();
- pgtable_populate(0, sizeof(struct lowcore), POPULATE_ONE2ONE);
- for_each_mem_detect_usable_block(i, &start, &end)
- pgtable_populate(start, end, POPULATE_ONE2ONE);
+ pgtable_populate(0, sizeof(struct lowcore), POPULATE_DIRECT);
+ for_each_physmem_usable_range(i, &start, &end)
+ pgtable_populate(start, end, POPULATE_DIRECT);
pgtable_populate(__abs_lowcore, __abs_lowcore + sizeof(struct lowcore),
POPULATE_ABS_LOWCORE);
pgtable_populate(__memcpy_real_area, __memcpy_real_area + PAGE_SIZE,
POPULATE_NONE);
memcpy_real_ptep = __virt_to_kpte(__memcpy_real_area);
+ kasan_populate_shadow();
+
S390_lowcore.kernel_asce = swapper_pg_dir | asce_bits;
S390_lowcore.user_asce = s390_invalid_asce;
@@ -269,10 +438,3 @@ void setup_vmem(unsigned long asce_limit)
init_mm.context.asce = S390_lowcore.kernel_asce;
}
-
-unsigned long vmem_estimate_memory_needs(unsigned long online_mem_total)
-{
- unsigned long pages = DIV_ROUND_UP(online_mem_total, PAGE_SIZE);
-
- return DIV_ROUND_UP(pages, _PAGE_ENTRIES) * _PAGE_TABLE_SIZE * 2;
-}
diff --git a/arch/s390/boot/vmlinux.lds.S b/arch/s390/boot/vmlinux.lds.S
index fa9d33b01b85..389df0e0d9e5 100644
--- a/arch/s390/boot/vmlinux.lds.S
+++ b/arch/s390/boot/vmlinux.lds.S
@@ -93,6 +93,8 @@ SECTIONS
_decompressor_syms_end = .;
}
+ _decompressor_end = .;
+
#ifdef CONFIG_KERNEL_UNCOMPRESSED
. = 0x100000;
#else