diff options
Diffstat (limited to 'arch/s390/boot')
-rw-r--r-- | arch/s390/boot/Makefile | 2 | ||||
-rw-r--r-- | arch/s390/boot/boot.h | 40 | ||||
-rw-r--r-- | arch/s390/boot/decompressor.c | 3 | ||||
-rw-r--r-- | arch/s390/boot/decompressor.h | 26 | ||||
-rw-r--r-- | arch/s390/boot/kaslr.c | 20 | ||||
-rw-r--r-- | arch/s390/boot/mem_detect.c | 72 | ||||
-rw-r--r-- | arch/s390/boot/startup.c | 86 | ||||
-rw-r--r-- | arch/s390/boot/vmem.c | 278 |
8 files changed, 431 insertions, 96 deletions
diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index d52c3e2e16bc..47a397da0498 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -35,7 +35,7 @@ endif CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char -obj-y := head.o als.o startup.o mem_detect.o ipl_parm.o ipl_report.o +obj-y := head.o als.o startup.o mem_detect.o ipl_parm.o ipl_report.o vmem.o obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o obj-y += version.o pgm_check_info.o ctype.o ipl_data.o machine_kexec_reloc.o obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index 70418389414d..58ce701d6110 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -8,10 +8,36 @@ #ifndef __ASSEMBLY__ +struct machine_info { + unsigned char has_edat1 : 1; + unsigned char has_edat2 : 1; + unsigned char has_nx : 1; +}; + +struct vmlinux_info { + unsigned long default_lma; + unsigned long entry; + unsigned long image_size; /* does not include .bss */ + unsigned long bss_size; /* uncompressed image .bss size */ + unsigned long bootdata_off; + unsigned long bootdata_size; + unsigned long bootdata_preserved_off; + unsigned long bootdata_preserved_size; + unsigned long dynsym_start; + unsigned long rela_dyn_start; + unsigned long rela_dyn_end; + unsigned long amode31_size; + unsigned long init_mm_off; + unsigned long swapper_pg_dir_off; + unsigned long invalid_pg_dir_off; +}; + void startup_kernel(void); -unsigned long detect_memory(void); +unsigned long detect_memory(unsigned long *safe_addr); +void mem_detect_set_usable_limit(unsigned long limit); bool is_ipl_block_dump(void); void store_ipl_parmblock(void); +unsigned long read_ipl_report(unsigned long safe_addr); void setup_boot_command_line(void); void parse_boot_command_line(void); void verify_facilities(void); @@ -19,7 +45,12 @@ void print_missing_facilities(void); void sclp_early_setup_buffer(void); void print_pgm_check_info(void); unsigned long get_random_base(unsigned long safe_addr); +void setup_vmem(unsigned long asce_limit); +unsigned long vmem_estimate_memory_needs(unsigned long online_mem_total); void __printf(1, 2) decompressor_printk(const char *fmt, ...); +void error(char *m); + +extern struct machine_info machine; /* Symbols defined by linker scripts */ extern const char kernel_version[]; @@ -31,8 +62,13 @@ extern char __boot_data_start[], __boot_data_end[]; extern char __boot_data_preserved_start[], __boot_data_preserved_end[]; extern char _decompressor_syms_start[], _decompressor_syms_end[]; extern char _stack_start[], _stack_end[]; +extern char _end[]; +extern unsigned char _compressed_start[]; +extern unsigned char _compressed_end[]; +extern struct vmlinux_info _vmlinux_info; +#define vmlinux _vmlinux_info -unsigned long read_ipl_report(unsigned long safe_offset); +#define __abs_lowcore_pa(x) (((unsigned long)(x) - __abs_lowcore) % sizeof(struct lowcore)) #endif /* __ASSEMBLY__ */ #endif /* BOOT_BOOT_H */ diff --git a/arch/s390/boot/decompressor.c b/arch/s390/boot/decompressor.c index 8dcd7af2911a..d762733a0753 100644 --- a/arch/s390/boot/decompressor.c +++ b/arch/s390/boot/decompressor.c @@ -11,6 +11,7 @@ #include <linux/string.h> #include <asm/page.h> #include "decompressor.h" +#include "boot.h" /* * gzip declarations @@ -80,6 +81,6 @@ void *decompress_kernel(void) void *output = (void *)decompress_offset; __decompress(_compressed_start, _compressed_end - _compressed_start, - NULL, NULL, output, 0, NULL, error); + NULL, NULL, output, vmlinux.image_size, NULL, error); return output; } diff --git a/arch/s390/boot/decompressor.h b/arch/s390/boot/decompressor.h index f75cc31a77dd..92b81d2ea35d 100644 --- a/arch/s390/boot/decompressor.h +++ b/arch/s390/boot/decompressor.h @@ -2,37 +2,11 @@ #ifndef BOOT_COMPRESSED_DECOMPRESSOR_H #define BOOT_COMPRESSED_DECOMPRESSOR_H -#include <linux/stddef.h> - #ifdef CONFIG_KERNEL_UNCOMPRESSED static inline void *decompress_kernel(void) { return NULL; } #else void *decompress_kernel(void); #endif unsigned long mem_safe_offset(void); -void error(char *m); - -struct vmlinux_info { - unsigned long default_lma; - void (*entry)(void); - unsigned long image_size; /* does not include .bss */ - unsigned long bss_size; /* uncompressed image .bss size */ - unsigned long bootdata_off; - unsigned long bootdata_size; - unsigned long bootdata_preserved_off; - unsigned long bootdata_preserved_size; - unsigned long dynsym_start; - unsigned long rela_dyn_start; - unsigned long rela_dyn_end; - unsigned long amode31_size; -}; - -/* Symbols defined by linker scripts */ -extern char _end[]; -extern unsigned char _compressed_start[]; -extern unsigned char _compressed_end[]; -extern char _vmlinux_info[]; - -#define vmlinux (*(struct vmlinux_info *)_vmlinux_info) #endif /* BOOT_COMPRESSED_DECOMPRESSOR_H */ diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c index e8d74d4f62aa..3e3d846400b4 100644 --- a/arch/s390/boot/kaslr.c +++ b/arch/s390/boot/kaslr.c @@ -132,7 +132,7 @@ static unsigned long count_valid_kernel_positions(unsigned long kernel_size, unsigned long start, end, pos = 0; int i; - for_each_mem_detect_block(i, &start, &end) { + for_each_mem_detect_usable_block(i, &start, &end) { if (_min >= end) continue; if (start >= _max) @@ -153,7 +153,7 @@ static unsigned long position_to_address(unsigned long pos, unsigned long kernel unsigned long start, end; int i; - for_each_mem_detect_block(i, &start, &end) { + for_each_mem_detect_usable_block(i, &start, &end) { if (_min >= end) continue; if (start >= _max) @@ -172,26 +172,20 @@ static unsigned long position_to_address(unsigned long pos, unsigned long kernel unsigned long get_random_base(unsigned long safe_addr) { + unsigned long usable_total = get_mem_detect_usable_total(); unsigned long memory_limit = get_mem_detect_end(); unsigned long base_pos, max_pos, kernel_size; - unsigned long kasan_needs; int i; - memory_limit = min(memory_limit, ident_map_size); - /* * Avoid putting kernel in the end of physical memory - * which kasan will use for shadow memory and early pgtable - * mapping allocations. + * which vmem and kasan code will use for shadow memory and + * pgtable mapping allocations. */ - memory_limit -= kasan_estimate_memory_needs(memory_limit); + memory_limit -= kasan_estimate_memory_needs(usable_total); + memory_limit -= vmem_estimate_memory_needs(usable_total); - if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && initrd_data.start && initrd_data.size) { - if (safe_addr < initrd_data.start + initrd_data.size) - safe_addr = initrd_data.start + initrd_data.size; - } safe_addr = ALIGN(safe_addr, THREAD_SIZE); - kernel_size = vmlinux.image_size + vmlinux.bss_size; if (safe_addr + kernel_size > memory_limit) return 0; diff --git a/arch/s390/boot/mem_detect.c b/arch/s390/boot/mem_detect.c index 7fa1a32ea0f3..35f4ba11f7fd 100644 --- a/arch/s390/boot/mem_detect.c +++ b/arch/s390/boot/mem_detect.c @@ -16,29 +16,10 @@ struct mem_detect_info __bootdata(mem_detect); #define ENTRIES_EXTENDED_MAX \ (256 * (1020 / 2) * sizeof(struct mem_detect_block)) -/* - * To avoid corrupting old kernel memory during dump, find lowest memory - * chunk possible either right after the kernel end (decompressed kernel) or - * after initrd (if it is present and there is no hole between the kernel end - * and initrd) - */ -static void *mem_detect_alloc_extended(void) -{ - unsigned long offset = ALIGN(mem_safe_offset(), sizeof(u64)); - - if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && initrd_data.start && initrd_data.size && - initrd_data.start < offset + ENTRIES_EXTENDED_MAX) - offset = ALIGN(initrd_data.start + initrd_data.size, sizeof(u64)); - - return (void *)offset; -} - static struct mem_detect_block *__get_mem_detect_block_ptr(u32 n) { if (n < MEM_INLINED_ENTRIES) return &mem_detect.entries[n]; - if (unlikely(!mem_detect.entries_extended)) - mem_detect.entries_extended = mem_detect_alloc_extended(); return &mem_detect.entries_extended[n - MEM_INLINED_ENTRIES]; } @@ -147,7 +128,7 @@ static int tprot(unsigned long addr) return rc; } -static void search_mem_end(void) +static unsigned long search_mem_end(void) { unsigned long range = 1 << (MAX_PHYSMEM_BITS - 20); /* in 1MB blocks */ unsigned long offset = 0; @@ -159,33 +140,52 @@ static void search_mem_end(void) if (!tprot(pivot << 20)) offset = pivot; } - - add_mem_detect_block(0, (offset + 1) << 20); + return (offset + 1) << 20; } -unsigned long detect_memory(void) +unsigned long detect_memory(unsigned long *safe_addr) { - unsigned long max_physmem_end; + unsigned long max_physmem_end = 0; sclp_early_get_memsize(&max_physmem_end); + mem_detect.entries_extended = (struct mem_detect_block *)ALIGN(*safe_addr, sizeof(u64)); if (!sclp_early_read_storage_info()) { mem_detect.info_source = MEM_DETECT_SCLP_STOR_INFO; - return max_physmem_end; - } - - if (!diag260()) { + } else if (!diag260()) { mem_detect.info_source = MEM_DETECT_DIAG260; - return max_physmem_end; - } - - if (max_physmem_end) { + max_physmem_end = max_physmem_end ?: get_mem_detect_end(); + } else if (max_physmem_end) { add_mem_detect_block(0, max_physmem_end); mem_detect.info_source = MEM_DETECT_SCLP_READ_INFO; - return max_physmem_end; + } else { + max_physmem_end = search_mem_end(); + add_mem_detect_block(0, max_physmem_end); + mem_detect.info_source = MEM_DETECT_BIN_SEARCH; } - search_mem_end(); - mem_detect.info_source = MEM_DETECT_BIN_SEARCH; - return get_mem_detect_end(); + if (mem_detect.count > MEM_INLINED_ENTRIES) { + *safe_addr += (mem_detect.count - MEM_INLINED_ENTRIES) * + sizeof(struct mem_detect_block); + } + + return max_physmem_end; +} + +void mem_detect_set_usable_limit(unsigned long limit) +{ + struct mem_detect_block *block; + int i; + + /* make sure mem_detect.usable ends up within online memory block */ + for (i = 0; i < mem_detect.count; i++) { + block = __get_mem_detect_block_ptr(i); + if (block->start >= limit) + break; + if (block->end >= limit) { + mem_detect.usable = limit; + break; + } + mem_detect.usable = block->end; + } } diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 47ca3264c023..11413f0baabc 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -3,6 +3,7 @@ #include <linux/elf.h> #include <asm/boot_data.h> #include <asm/sections.h> +#include <asm/maccess.h> #include <asm/cpu_mf.h> #include <asm/setup.h> #include <asm/kasan.h> @@ -11,6 +12,7 @@ #include <asm/diag.h> #include <asm/uv.h> #include <asm/abs_lowcore.h> +#include <asm/mem_detect.h> #include "decompressor.h" #include "boot.h" #include "uv.h" @@ -18,6 +20,7 @@ unsigned long __bootdata_preserved(__kaslr_offset); unsigned long __bootdata_preserved(__abs_lowcore); unsigned long __bootdata_preserved(__memcpy_real_area); +pte_t *__bootdata_preserved(memcpy_real_ptep); unsigned long __bootdata(__amode31_base); unsigned long __bootdata_preserved(VMALLOC_START); unsigned long __bootdata_preserved(VMALLOC_END); @@ -33,6 +36,8 @@ u64 __bootdata_preserved(stfle_fac_list[16]); u64 __bootdata_preserved(alt_stfle_fac_list[16]); struct oldmem_data __bootdata_preserved(oldmem_data); +struct machine_info machine; + void error(char *x) { sclp_early_printk("\n\n"); @@ -42,6 +47,20 @@ void error(char *x) disabled_wait(); } +static void detect_facilities(void) +{ + if (test_facility(8)) { + machine.has_edat1 = 1; + __ctl_set_bit(0, 23); + } + if (test_facility(78)) + machine.has_edat2 = 1; + if (!noexec_disabled && test_facility(130)) { + machine.has_nx = 1; + __ctl_set_bit(0, 20); + } +} + static void setup_lpp(void) { S390_lowcore.current_pid = 0; @@ -57,16 +76,17 @@ unsigned long mem_safe_offset(void) } #endif -static void rescue_initrd(unsigned long addr) +static unsigned long rescue_initrd(unsigned long safe_addr) { if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD)) - return; + return safe_addr; if (!initrd_data.start || !initrd_data.size) - return; - if (addr <= initrd_data.start) - return; - memmove((void *)addr, (void *)initrd_data.start, initrd_data.size); - initrd_data.start = addr; + return safe_addr; + if (initrd_data.start < safe_addr) { + memmove((void *)safe_addr, (void *)initrd_data.start, initrd_data.size); + initrd_data.start = safe_addr; + } + return initrd_data.start + initrd_data.size; } static void copy_bootdata(void) @@ -150,9 +170,10 @@ static void setup_ident_map_size(unsigned long max_physmem_end) #endif } -static void setup_kernel_memory_layout(void) +static unsigned long setup_kernel_memory_layout(void) { unsigned long vmemmap_start; + unsigned long asce_limit; unsigned long rte_size; unsigned long pages; unsigned long vmax; @@ -167,10 +188,10 @@ static void setup_kernel_memory_layout(void) vmalloc_size > _REGION2_SIZE || vmemmap_start + vmemmap_size + vmalloc_size + MODULES_LEN > _REGION2_SIZE) { - vmax = _REGION1_SIZE; + asce_limit = _REGION1_SIZE; rte_size = _REGION2_SIZE; } else { - vmax = _REGION2_SIZE; + asce_limit = _REGION2_SIZE; rte_size = _REGION3_SIZE; } /* @@ -178,7 +199,7 @@ static void setup_kernel_memory_layout(void) * secure storage limit, so that any vmalloc allocation * we do could be used to back secure guest storage. */ - vmax = adjust_to_uv_max(vmax); + vmax = adjust_to_uv_max(asce_limit); #ifdef CONFIG_KASAN /* force vmalloc and modules below kasan shadow */ vmax = min(vmax, KASAN_SHADOW_START); @@ -207,6 +228,8 @@ static void setup_kernel_memory_layout(void) /* make sure vmemmap doesn't overlay with vmalloc area */ VMALLOC_START = max(vmemmap_start + vmemmap_size, VMALLOC_START); vmemmap = (struct page *)vmemmap_start; + + return asce_limit; } /* @@ -240,19 +263,25 @@ static void offset_vmlinux_info(unsigned long offset) vmlinux.rela_dyn_start += offset; vmlinux.rela_dyn_end += offset; vmlinux.dynsym_start += offset; + vmlinux.init_mm_off += offset; + vmlinux.swapper_pg_dir_off += offset; + vmlinux.invalid_pg_dir_off += offset; } static unsigned long reserve_amode31(unsigned long safe_addr) { __amode31_base = PAGE_ALIGN(safe_addr); - return safe_addr + vmlinux.amode31_size; + return __amode31_base + vmlinux.amode31_size; } void startup_kernel(void) { + unsigned long max_physmem_end; unsigned long random_lma; unsigned long safe_addr; + unsigned long asce_limit; void *img; + psw_t psw; initrd_data.start = parmarea.initrd_start; initrd_data.size = parmarea.initrd_size; @@ -265,14 +294,17 @@ void startup_kernel(void) safe_addr = reserve_amode31(safe_addr); safe_addr = read_ipl_report(safe_addr); uv_query_info(); - rescue_initrd(safe_addr); + safe_addr = rescue_initrd(safe_addr); sclp_early_read_info(); setup_boot_command_line(); parse_boot_command_line(); + detect_facilities(); sanitize_prot_virt_host(); - setup_ident_map_size(detect_memory()); + max_physmem_end = detect_memory(&safe_addr); + setup_ident_map_size(max_physmem_end); setup_vmalloc_size(); - setup_kernel_memory_layout(); + asce_limit = setup_kernel_memory_layout(); + mem_detect_set_usable_limit(ident_map_size); if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_enabled) { random_lma = get_random_base(safe_addr); @@ -289,9 +321,23 @@ void startup_kernel(void) } else if (__kaslr_offset) memcpy((void *)vmlinux.default_lma, img, vmlinux.image_size); + /* + * The order of the following operations is important: + * + * - handle_relocs() must follow clear_bss_section() to establish static + * memory references to data in .bss to be used by setup_vmem() + * (i.e init_mm.pgd) + * + * - setup_vmem() must follow handle_relocs() to be able using + * static memory references to data in .bss (i.e init_mm.pgd) + * + * - copy_bootdata() must follow setup_vmem() to propagate changes to + * bootdata made by setup_vmem() + */ clear_bss_section(); - copy_bootdata(); handle_relocs(__kaslr_offset); + setup_vmem(asce_limit); + copy_bootdata(); if (__kaslr_offset) { /* @@ -303,5 +349,11 @@ void startup_kernel(void) if (IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED)) memset(img, 0, vmlinux.image_size); } - vmlinux.entry(); + + /* + * Jump to the decompressed kernel entry point and switch DAT mode on. + */ + psw.addr = vmlinux.entry; + psw.mask = PSW_KERNEL_BITS; + __load_psw(psw); } diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c new file mode 100644 index 000000000000..4d1d0d8e99cb --- /dev/null +++ b/arch/s390/boot/vmem.c @@ -0,0 +1,278 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/sched/task.h> +#include <linux/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/facility.h> +#include <asm/sections.h> +#include <asm/mem_detect.h> +#include <asm/maccess.h> +#include <asm/abs_lowcore.h> +#include "decompressor.h" +#include "boot.h" + +#define init_mm (*(struct mm_struct *)vmlinux.init_mm_off) +#define swapper_pg_dir vmlinux.swapper_pg_dir_off +#define invalid_pg_dir vmlinux.invalid_pg_dir_off + +/* + * Mimic virt_to_kpte() in lack of init_mm symbol. Skip pmd NULL check though. + */ +static inline pte_t *__virt_to_kpte(unsigned long va) +{ + return pte_offset_kernel(pmd_offset(pud_offset(p4d_offset(pgd_offset_k(va), va), va), va), va); +} + +unsigned long __bootdata_preserved(s390_invalid_asce); +unsigned long __bootdata(pgalloc_pos); +unsigned long __bootdata(pgalloc_end); +unsigned long __bootdata(pgalloc_low); + +enum populate_mode { + POPULATE_NONE, + POPULATE_ONE2ONE, + POPULATE_ABS_LOWCORE, +}; + +static void boot_check_oom(void) +{ + if (pgalloc_pos < pgalloc_low) + error("out of memory on boot\n"); +} + +static void pgtable_populate_init(void) +{ + unsigned long initrd_end; + unsigned long kernel_end; + + kernel_end = vmlinux.default_lma + vmlinux.image_size + vmlinux.bss_size; + pgalloc_low = round_up(kernel_end, PAGE_SIZE); + if (IS_ENABLED(CONFIG_BLK_DEV_INITRD)) { + initrd_end = round_up(initrd_data.start + initrd_data.size, _SEGMENT_SIZE); + pgalloc_low = max(pgalloc_low, initrd_end); + } + + pgalloc_end = round_down(get_mem_detect_end(), PAGE_SIZE); + pgalloc_pos = pgalloc_end; + + boot_check_oom(); +} + +static void *boot_alloc_pages(unsigned int order) +{ + unsigned long size = PAGE_SIZE << order; + + pgalloc_pos -= size; + pgalloc_pos = round_down(pgalloc_pos, size); + + boot_check_oom(); + + return (void *)pgalloc_pos; +} + +static void *boot_crst_alloc(unsigned long val) +{ + unsigned long *table; + + table = boot_alloc_pages(CRST_ALLOC_ORDER); + if (table) + crst_table_init(table, val); + return table; +} + +static pte_t *boot_pte_alloc(void) +{ + static void *pte_leftover; + pte_t *pte; + + BUILD_BUG_ON(_PAGE_TABLE_SIZE * 2 != PAGE_SIZE); + + if (!pte_leftover) { + pte_leftover = boot_alloc_pages(0); + pte = pte_leftover + _PAGE_TABLE_SIZE; + } else { + pte = pte_leftover; + pte_leftover = NULL; + } + memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE); + return pte; +} + +static unsigned long _pa(unsigned long addr, enum populate_mode mode) +{ + switch (mode) { + case POPULATE_NONE: + return -1; + case POPULATE_ONE2ONE: + return addr; + case POPULATE_ABS_LOWCORE: + return __abs_lowcore_pa(addr); + default: + return -1; + } +} + +static bool can_large_pud(pud_t *pu_dir, unsigned long addr, unsigned long end) +{ + return machine.has_edat2 && + IS_ALIGNED(addr, PUD_SIZE) && (end - addr) >= PUD_SIZE; +} + +static bool can_large_pmd(pmd_t *pm_dir, unsigned long addr, unsigned long end) +{ + return machine.has_edat1 && + IS_ALIGNED(addr, PMD_SIZE) && (end - addr) >= PMD_SIZE; +} + +static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end, + enum populate_mode mode) +{ + unsigned long next; + pte_t *pte, entry; + + pte = pte_offset_kernel(pmd, addr); + for (; addr < end; addr += PAGE_SIZE, pte++) { + if (pte_none(*pte)) { + entry = __pte(_pa(addr, mode)); + entry = set_pte_bit(entry, PAGE_KERNEL_EXEC); + set_pte(pte, entry); + } + } +} + +static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long end, + enum populate_mode mode) +{ + unsigned long next; + pmd_t *pmd, entry; + pte_t *pte; + + pmd = pmd_offset(pud, addr); + for (; addr < end; addr = next, pmd++) { + next = pmd_addr_end(addr, end); + if (pmd_none(*pmd)) { + if (can_large_pmd(pmd, addr, next)) { + entry = __pmd(_pa(addr, mode)); + entry = set_pmd_bit(entry, SEGMENT_KERNEL_EXEC); + set_pmd(pmd, entry); + continue; + } + pte = boot_pte_alloc(); + pmd_populate(&init_mm, pmd, pte); + } else if (pmd_large(*pmd)) { + continue; + } + pgtable_pte_populate(pmd, addr, next, mode); + } +} + +static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long end, + enum populate_mode mode) +{ + unsigned long next; + pud_t *pud, entry; + pmd_t *pmd; + + pud = pud_offset(p4d, addr); + for (; addr < end; addr = next, pud++) { + next = pud_addr_end(addr, end); + if (pud_none(*pud)) { + if (can_large_pud(pud, addr, next)) { + entry = __pud(_pa(addr, mode)); + entry = set_pud_bit(entry, REGION3_KERNEL_EXEC); + set_pud(pud, entry); + continue; + } + pmd = boot_crst_alloc(_SEGMENT_ENTRY_EMPTY); + pud_populate(&init_mm, pud, pmd); + } else if (pud_large(*pud)) { + continue; + } + pgtable_pmd_populate(pud, addr, next, mode); + } +} + +static void pgtable_p4d_populate(pgd_t *pgd, unsigned long addr, unsigned long end, + enum populate_mode mode) +{ + unsigned long next; + p4d_t *p4d; + pud_t *pud; + + p4d = p4d_offset(pgd, addr); + for (; addr < end; addr = next, p4d++) { + next = p4d_addr_end(addr, end); + if (p4d_none(*p4d)) { + pud = boot_crst_alloc(_REGION3_ENTRY_EMPTY); + p4d_populate(&init_mm, p4d, pud); + } + pgtable_pud_populate(p4d, addr, next, mode); + } +} + +static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode) +{ + unsigned long next; + pgd_t *pgd; + p4d_t *p4d; + + pgd = pgd_offset(&init_mm, addr); + for (; addr < end; addr = next, pgd++) { + next = pgd_addr_end(addr, end); + if (pgd_none(*pgd)) { + p4d = boot_crst_alloc(_REGION2_ENTRY_EMPTY); + pgd_populate(&init_mm, pgd, p4d); + } + pgtable_p4d_populate(pgd, addr, next, mode); + } +} + +void setup_vmem(unsigned long asce_limit) +{ + unsigned long start, end; + unsigned long asce_type; + unsigned long asce_bits; + int i; + + if (asce_limit == _REGION1_SIZE) { + asce_type = _REGION2_ENTRY_EMPTY; + asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH; + } else { + asce_type = _REGION3_ENTRY_EMPTY; + asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; + } + s390_invalid_asce = invalid_pg_dir | _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; + + crst_table_init((unsigned long *)swapper_pg_dir, asce_type); + crst_table_init((unsigned long *)invalid_pg_dir, _REGION3_ENTRY_EMPTY); + + /* + * To allow prefixing the lowcore must be mapped with 4KB pages. + * To prevent creation of a large page at address 0 first map + * the lowcore and create the identity mapping only afterwards. + */ + pgtable_populate_init(); + pgtable_populate(0, sizeof(struct lowcore), POPULATE_ONE2ONE); + for_each_mem_detect_usable_block(i, &start, &end) + pgtable_populate(start, end, POPULATE_ONE2ONE); + pgtable_populate(__abs_lowcore, __abs_lowcore + sizeof(struct lowcore), + POPULATE_ABS_LOWCORE); + pgtable_populate(__memcpy_real_area, __memcpy_real_area + PAGE_SIZE, + POPULATE_NONE); + memcpy_real_ptep = __virt_to_kpte(__memcpy_real_area); + + S390_lowcore.kernel_asce = swapper_pg_dir | asce_bits; + S390_lowcore.user_asce = s390_invalid_asce; + + __ctl_load(S390_lowcore.kernel_asce, 1, 1); + __ctl_load(S390_lowcore.user_asce, 7, 7); + __ctl_load(S390_lowcore.kernel_asce, 13, 13); + + init_mm.context.asce = S390_lowcore.kernel_asce; +} + +unsigned long vmem_estimate_memory_needs(unsigned long online_mem_total) +{ + unsigned long pages = DIV_ROUND_UP(online_mem_total, PAGE_SIZE); + + return DIV_ROUND_UP(pages, _PAGE_ENTRIES) * _PAGE_TABLE_SIZE * 2; +} |