summaryrefslogtreecommitdiff
path: root/arch/s390/kernel
diff options
context:
space:
mode:
authorAlexander Gordeev <agordeev@linux.ibm.com>2022-12-13 11:35:11 +0100
committerHeiko Carstens <hca@linux.ibm.com>2023-01-13 14:15:05 +0100
commitbb1520d581a3a46e2d6e12bb74604ace33404de5 (patch)
treee30707db7e1375a9b4bb8bd40102e57aee8c968a /arch/s390/kernel
parentbd50b7436217b4123911c2bca1efd74718654f06 (diff)
s390/mm: start kernel with DAT enabled
The setup of the kernel virtual address space is spread throughout the sources, boot stages and config options like this: 1. The available physical memory regions are queried and stored as mem_detect information for later use in the decompressor. 2. Based on the physical memory availability the virtual memory layout is established in the decompressor; 3. If CONFIG_KASAN is disabled the kernel paging setup code populates kernel pgtables and turns DAT mode on. It uses the information stored at step [1]. 4. If CONFIG_KASAN is enabled the kernel early boot kasan setup populates kernel pgtables and turns DAT mode on. It uses the information stored at step [1]. The kasan setup creates early_pg_dir directory and directly overwrites swapper_pg_dir entries to make shadow memory pages available. Move the kernel virtual memory setup to the decompressor and start the kernel with DAT turned on right from the very first istruction. That completely eliminates the boot phase when the kernel runs in DAT-off mode, simplies the overall design and consolidates pgtables setup. The identity mapping is created in the decompressor, while kasan shadow mappings are still created by the early boot kernel code. Share with decompressor the existing kasan memory allocator. It decreases the size of a newly requested memory block from pgalloc_pos and ensures that kernel image is not overwritten. pgalloc_low and pgalloc_pos pointers are made preserved boot variables for that. Use the bootdata infrastructure to setup swapper_pg_dir and invalid_pg_dir directories used by the kernel later. The interim early_pg_dir directory established by the kasan initialization code gets eliminated as result. As the kernel runs in DAT-on mode only the PSW_KERNEL_BITS define gets PSW_MASK_DAT bit by default. Additionally, the setup_lowcore_dat_off() and setup_lowcore_dat_on() routines get merged, since there is no DAT-off mode stage anymore. The memory mappings are created with RW+X protection that allows the early boot code setting up all necessary data and services for the kernel being booted. Just before the paging is enabled the memory protection is changed to RO+X for text, RO+NX for read-only data and RW+NX for kernel data and the identity mapping. Reviewed-by: Heiko Carstens <hca@linux.ibm.com> Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com> Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Diffstat (limited to 'arch/s390/kernel')
-rw-r--r--arch/s390/kernel/early.c5
-rw-r--r--arch/s390/kernel/idle.c4
-rw-r--r--arch/s390/kernel/process.c4
-rw-r--r--arch/s390/kernel/setup.c84
-rw-r--r--arch/s390/kernel/smp.c4
-rw-r--r--arch/s390/kernel/vmlinux.lds.S3
6 files changed, 49 insertions, 55 deletions
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 9693c8630e73..9cfd9f4fc927 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -18,6 +18,7 @@
#include <linux/uaccess.h>
#include <linux/kernel.h>
#include <asm/asm-extable.h>
+#include <linux/memblock.h>
#include <asm/diag.h>
#include <asm/ebcdic.h>
#include <asm/ipl.h>
@@ -160,9 +161,7 @@ static noinline __init void setup_lowcore_early(void)
psw_t psw;
psw.addr = (unsigned long)early_pgm_check_handler;
- psw.mask = PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA;
- if (IS_ENABLED(CONFIG_KASAN))
- psw.mask |= PSW_MASK_DAT;
+ psw.mask = PSW_KERNEL_BITS;
S390_lowcore.program_new_psw = psw;
S390_lowcore.preempt_count = INIT_PREEMPT_COUNT;
}
diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
index 4bf1ee293f2b..a8aebb5c95cf 100644
--- a/arch/s390/kernel/idle.c
+++ b/arch/s390/kernel/idle.c
@@ -51,8 +51,8 @@ void arch_cpu_idle(void)
unsigned long psw_mask;
/* Wait for external, I/O or machine check interrupt. */
- psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT | PSW_MASK_DAT |
- PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
+ psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT |
+ PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
clear_cpu_flag(CIF_NOHZ_DELAY);
/* psw_idle() returns with interrupts disabled. */
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 3f5d2db0b854..67df64ef4839 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -147,8 +147,8 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
if (unlikely(args->fn)) {
/* kernel thread */
memset(&frame->childregs, 0, sizeof(struct pt_regs));
- frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT |
- PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
+ frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_IO |
+ PSW_MASK_EXT | PSW_MASK_MCHECK;
frame->childregs.psw.addr =
(unsigned long)__ret_from_fork;
frame->childregs.gprs[9] = (unsigned long)args->fn;
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 2b6091349daa..1ffaa85cd518 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -149,6 +149,9 @@ int __bootdata(noexec_disabled);
unsigned long __bootdata(ident_map_size);
struct mem_detect_info __bootdata(mem_detect);
struct initrd_data __bootdata(initrd_data);
+unsigned long __bootdata(pgalloc_pos);
+unsigned long __bootdata(pgalloc_end);
+unsigned long __bootdata(pgalloc_low);
unsigned long __bootdata_preserved(__kaslr_offset);
unsigned long __bootdata(__amode31_base);
@@ -411,16 +414,12 @@ void __init arch_call_rest_init(void)
call_on_stack_noreturn(rest_init, stack);
}
-static void __init setup_lowcore_dat_off(void)
+static void __init setup_lowcore(void)
{
- unsigned long int_psw_mask = PSW_KERNEL_BITS;
- struct lowcore *abs_lc, *lc;
+ struct lowcore *lc, *abs_lc;
unsigned long mcck_stack;
unsigned long flags;
- if (IS_ENABLED(CONFIG_KASAN))
- int_psw_mask |= PSW_MASK_DAT;
-
/*
* Setup lowcore for boot cpu
*/
@@ -430,17 +429,17 @@ static void __init setup_lowcore_dat_off(void)
panic("%s: Failed to allocate %zu bytes align=%zx\n",
__func__, sizeof(*lc), sizeof(*lc));
- lc->restart_psw.mask = PSW_KERNEL_BITS;
- lc->restart_psw.addr = (unsigned long) restart_int_handler;
- lc->external_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK;
+ lc->restart_psw.mask = PSW_KERNEL_BITS & ~PSW_MASK_DAT;
+ lc->restart_psw.addr = __pa(restart_int_handler);
+ lc->external_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
lc->external_new_psw.addr = (unsigned long) ext_int_handler;
- lc->svc_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK;
+ lc->svc_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
lc->svc_new_psw.addr = (unsigned long) system_call;
- lc->program_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK;
+ lc->program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
lc->program_new_psw.addr = (unsigned long) pgm_check_handler;
- lc->mcck_new_psw.mask = int_psw_mask;
+ lc->mcck_new_psw.mask = PSW_KERNEL_BITS;
lc->mcck_new_psw.addr = (unsigned long) mcck_int_handler;
- lc->io_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK;
+ lc->io_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
lc->io_new_psw.addr = (unsigned long) io_int_handler;
lc->clock_comparator = clock_comparator_max;
lc->nodat_stack = ((unsigned long) &init_thread_union)
@@ -477,15 +476,7 @@ static void __init setup_lowcore_dat_off(void)
lc->restart_fn = (unsigned long) do_restart;
lc->restart_data = 0;
lc->restart_source = -1U;
-
- abs_lc = get_abs_lowcore(&flags);
- abs_lc->restart_stack = lc->restart_stack;
- abs_lc->restart_fn = lc->restart_fn;
- abs_lc->restart_data = lc->restart_data;
- abs_lc->restart_source = lc->restart_source;
- abs_lc->restart_psw = lc->restart_psw;
- abs_lc->mcesad = lc->mcesad;
- put_abs_lowcore(abs_lc, flags);
+ __ctl_store(lc->cregs_save_area, 0, 15);
mcck_stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE);
if (!mcck_stack)
@@ -499,33 +490,26 @@ static void __init setup_lowcore_dat_off(void)
lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
lc->preempt_count = PREEMPT_DISABLED;
+ lc->kernel_asce = S390_lowcore.kernel_asce;
+ lc->user_asce = S390_lowcore.user_asce;
+
+ abs_lc = get_abs_lowcore(&flags);
+ abs_lc->restart_stack = lc->restart_stack;
+ abs_lc->restart_fn = lc->restart_fn;
+ abs_lc->restart_data = lc->restart_data;
+ abs_lc->restart_source = lc->restart_source;
+ abs_lc->restart_psw = lc->restart_psw;
+ abs_lc->restart_flags = RESTART_FLAG_CTLREGS;
+ memcpy(abs_lc->cregs_save_area, lc->cregs_save_area, sizeof(abs_lc->cregs_save_area));
+ abs_lc->program_new_psw = lc->program_new_psw;
+ abs_lc->mcesad = lc->mcesad;
+ put_abs_lowcore(abs_lc, flags);
set_prefix(__pa(lc));
lowcore_ptr[0] = lc;
-}
-
-static void __init setup_lowcore_dat_on(void)
-{
- struct lowcore *abs_lc;
- unsigned long flags;
-
- __ctl_clear_bit(0, 28);
- S390_lowcore.external_new_psw.mask |= PSW_MASK_DAT;
- S390_lowcore.svc_new_psw.mask |= PSW_MASK_DAT;
- S390_lowcore.program_new_psw.mask |= PSW_MASK_DAT;
- S390_lowcore.mcck_new_psw.mask |= PSW_MASK_DAT;
- S390_lowcore.io_new_psw.mask |= PSW_MASK_DAT;
- __ctl_set_bit(0, 28);
- __ctl_store(S390_lowcore.cregs_save_area, 0, 15);
if (abs_lowcore_map(0, lowcore_ptr[0], true))
panic("Couldn't setup absolute lowcore");
abs_lowcore_mapped = true;
- abs_lc = get_abs_lowcore(&flags);
- abs_lc->restart_flags = RESTART_FLAG_CTLREGS;
- abs_lc->program_new_psw = S390_lowcore.program_new_psw;
- memcpy(abs_lc->cregs_save_area, S390_lowcore.cregs_save_area,
- sizeof(abs_lc->cregs_save_area));
- put_abs_lowcore(abs_lc, flags);
}
static struct resource code_resource = {
@@ -650,6 +634,14 @@ static struct notifier_block kdump_mem_nb = {
#endif
/*
+ * Reserve page tables created by decompressor
+ */
+static void __init reserve_pgtables(void)
+{
+ memblock_reserve(pgalloc_pos, pgalloc_end - pgalloc_pos);
+}
+
+/*
* Reserve memory for kdump kernel to be loaded with kexec
*/
static void __init reserve_crashkernel(void)
@@ -1004,6 +996,7 @@ void __init setup_arch(char **cmdline_p)
setup_control_program_code();
/* Do some memory reservations *before* memory is added to memblock */
+ reserve_pgtables();
reserve_kernel();
reserve_initrd();
reserve_certificate_list();
@@ -1038,7 +1031,7 @@ void __init setup_arch(char **cmdline_p)
#endif
setup_resources();
- setup_lowcore_dat_off();
+ setup_lowcore();
smp_fill_possible_mask();
cpu_detect_mhz_feature();
cpu_init();
@@ -1050,7 +1043,7 @@ void __init setup_arch(char **cmdline_p)
static_branch_enable(&cpu_has_bear);
/*
- * Create kernel page tables and switch to virtual addressing.
+ * Create kernel page tables.
*/
paging_init();
memcpy_real_init();
@@ -1058,7 +1051,6 @@ void __init setup_arch(char **cmdline_p)
* After paging_init created the kernel page table, the new PSWs
* in lowcore can now run with DAT enabled.
*/
- setup_lowcore_dat_on();
#ifdef CONFIG_CRASH_DUMP
smp_save_dump_ipl_cpu();
#endif
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 0031325ce4bc..24f19f10b237 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -327,7 +327,7 @@ static void pcpu_delegate(struct pcpu *pcpu,
lc = lowcore_ptr[pcpu - pcpu_devices];
source_cpu = stap();
- __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
+
if (pcpu->address == source_cpu) {
call_on_stack(2, stack, void, __pcpu_delegate,
pcpu_delegate_fn *, func, void *, data);
@@ -488,7 +488,7 @@ void smp_send_stop(void)
int cpu;
/* Disable all interrupts/machine checks */
- __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
+ __load_psw_mask(PSW_KERNEL_BITS);
trace_hardirqs_off();
debug_set_critical();
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 5ea3830af0cc..a965ddc34f43 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -213,6 +213,9 @@ SECTIONS
QUAD(__rela_dyn_start) /* rela_dyn_start */
QUAD(__rela_dyn_end) /* rela_dyn_end */
QUAD(_eamode31 - _samode31) /* amode31_size */
+ QUAD(init_mm)
+ QUAD(swapper_pg_dir)
+ QUAD(invalid_pg_dir)
} :NONE
/* Debugging sections. */