diff options
author | Ard Biesheuvel <ardb@kernel.org> | 2024-02-14 13:29:04 +0100 |
---|---|---|
committer | Catalin Marinas <catalin.marinas@arm.com> | 2024-02-16 12:42:33 +0000 |
commit | 97a6f43bb049e64b9913c50c7530e13d78e205d4 (patch) | |
tree | f27badee5d092ecc09f221e6ce6dd19b83e9b44c /arch/arm64/kernel/pi | |
parent | 82ca151da7d54d7571c5d511d016b7780d5d559f (diff) |
arm64: head: Move early kernel mapping routines into C code
The asm version of the kernel mapping code works fine for creating a
coarse grained identity map, but for mapping the kernel down to its
exact boundaries with the right attributes, it is not suitable. This is
why we create a preliminary RWX kernel mapping first, and then rebuild
it from scratch later on.
So let's reimplement this in C, in a way that will make it unnecessary
to create the kernel page tables yet another time in paging_init().
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20240214122845.2033971-63-ardb+git@google.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Diffstat (limited to 'arch/arm64/kernel/pi')
-rw-r--r-- | arch/arm64/kernel/pi/Makefile | 1 | ||||
-rw-r--r-- | arch/arm64/kernel/pi/idreg-override.c | 22 | ||||
-rw-r--r-- | arch/arm64/kernel/pi/kaslr_early.c | 12 | ||||
-rw-r--r-- | arch/arm64/kernel/pi/map_kernel.c | 164 | ||||
-rw-r--r-- | arch/arm64/kernel/pi/map_range.c | 88 | ||||
-rw-r--r-- | arch/arm64/kernel/pi/patch-scs.c | 16 | ||||
-rw-r--r-- | arch/arm64/kernel/pi/pi.h | 14 | ||||
-rw-r--r-- | arch/arm64/kernel/pi/relocate.c | 2 |
8 files changed, 289 insertions, 30 deletions
diff --git a/arch/arm64/kernel/pi/Makefile b/arch/arm64/kernel/pi/Makefile index a8b302245f15..8c2f80a46b93 100644 --- a/arch/arm64/kernel/pi/Makefile +++ b/arch/arm64/kernel/pi/Makefile @@ -39,6 +39,7 @@ $(obj)/lib-%.o: $(srctree)/lib/%.c FORCE $(call if_changed_rule,cc_o_c) obj-y := idreg-override.pi.o \ + map_kernel.pi.o map_range.pi.o \ lib-fdt.pi.o lib-fdt_ro.pi.o obj-$(CONFIG_RELOCATABLE) += relocate.pi.o obj-$(CONFIG_RANDOMIZE_BASE) += kaslr_early.pi.o diff --git a/arch/arm64/kernel/pi/idreg-override.c b/arch/arm64/kernel/pi/idreg-override.c index e4bcabcc6860..1884bd936c0d 100644 --- a/arch/arm64/kernel/pi/idreg-override.c +++ b/arch/arm64/kernel/pi/idreg-override.c @@ -308,37 +308,35 @@ static __init void __parse_cmdline(const char *cmdline, bool parse_aliases) } while (1); } -static __init const u8 *get_bootargs_cmdline(const void *fdt) +static __init const u8 *get_bootargs_cmdline(const void *fdt, int node) { + static char const bootargs[] __initconst = "bootargs"; const u8 *prop; - int node; - node = fdt_path_offset(fdt, "/chosen"); if (node < 0) return NULL; - prop = fdt_getprop(fdt, node, "bootargs", NULL); + prop = fdt_getprop(fdt, node, bootargs, NULL); if (!prop) return NULL; return strlen(prop) ? prop : NULL; } -static __init void parse_cmdline(const void *fdt) +static __init void parse_cmdline(const void *fdt, int chosen) { - const u8 *prop = get_bootargs_cmdline(fdt); + static char const cmdline[] __initconst = CONFIG_CMDLINE; + const u8 *prop = get_bootargs_cmdline(fdt, chosen); if (IS_ENABLED(CONFIG_CMDLINE_FORCE) || !prop) - __parse_cmdline(CONFIG_CMDLINE, true); + __parse_cmdline(cmdline, true); if (!IS_ENABLED(CONFIG_CMDLINE_FORCE) && prop) __parse_cmdline(prop, true); } -/* Keep checkers quiet */ -void init_feature_override(u64 boot_status, const void *fdt); - -asmlinkage void __init init_feature_override(u64 boot_status, const void *fdt) +void __init init_feature_override(u64 boot_status, const void *fdt, + int chosen) { struct arm64_ftr_override *override; const struct ftr_set_desc *reg; @@ -354,7 +352,7 @@ asmlinkage void __init init_feature_override(u64 boot_status, const void *fdt) __boot_status = boot_status; - parse_cmdline(fdt); + parse_cmdline(fdt, chosen); for (i = 0; i < ARRAY_SIZE(regs); i++) { reg = prel64_pointer(regs[i].reg); diff --git a/arch/arm64/kernel/pi/kaslr_early.c b/arch/arm64/kernel/pi/kaslr_early.c index eeecee7ffd6f..0257b43819db 100644 --- a/arch/arm64/kernel/pi/kaslr_early.c +++ b/arch/arm64/kernel/pi/kaslr_early.c @@ -16,17 +16,17 @@ #include <asm/memory.h> #include <asm/pgtable.h> +#include "pi.h" + extern u16 memstart_offset_seed; -static u64 __init get_kaslr_seed(void *fdt) +static u64 __init get_kaslr_seed(void *fdt, int node) { - static char const chosen_str[] __initconst = "chosen"; static char const seed_str[] __initconst = "kaslr-seed"; - int node, len; fdt64_t *prop; u64 ret; + int len; - node = fdt_path_offset(fdt, chosen_str); if (node < 0) return 0; @@ -39,14 +39,14 @@ static u64 __init get_kaslr_seed(void *fdt) return ret; } -asmlinkage u64 __init kaslr_early_init(void *fdt) +u64 __init kaslr_early_init(void *fdt, int chosen) { u64 seed, range; if (kaslr_disabled_cmdline()) return 0; - seed = get_kaslr_seed(fdt); + seed = get_kaslr_seed(fdt, chosen); if (!seed) { if (!__early_cpu_has_rndr() || !__arm64_rndr((unsigned long *)&seed)) diff --git a/arch/arm64/kernel/pi/map_kernel.c b/arch/arm64/kernel/pi/map_kernel.c new file mode 100644 index 000000000000..f206373b28b0 --- /dev/null +++ b/arch/arm64/kernel/pi/map_kernel.c @@ -0,0 +1,164 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright 2023 Google LLC +// Author: Ard Biesheuvel <ardb@google.com> + +#include <linux/init.h> +#include <linux/libfdt.h> +#include <linux/linkage.h> +#include <linux/types.h> +#include <linux/sizes.h> +#include <linux/string.h> + +#include <asm/memory.h> +#include <asm/pgalloc.h> +#include <asm/pgtable.h> +#include <asm/tlbflush.h> + +#include "pi.h" + +extern const u8 __eh_frame_start[], __eh_frame_end[]; + +extern void idmap_cpu_replace_ttbr1(void *pgdir); + +static void __init map_segment(pgd_t *pg_dir, u64 *pgd, u64 va_offset, + void *start, void *end, pgprot_t prot, + bool may_use_cont, int root_level) +{ + map_range(pgd, ((u64)start + va_offset) & ~PAGE_OFFSET, + ((u64)end + va_offset) & ~PAGE_OFFSET, (u64)start, + prot, root_level, (pte_t *)pg_dir, may_use_cont, 0); +} + +static void __init unmap_segment(pgd_t *pg_dir, u64 va_offset, void *start, + void *end, int root_level) +{ + map_segment(pg_dir, NULL, va_offset, start, end, __pgprot(0), + false, root_level); +} + +static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level) +{ + bool enable_scs = IS_ENABLED(CONFIG_UNWIND_PATCH_PAC_INTO_SCS); + bool twopass = IS_ENABLED(CONFIG_RELOCATABLE); + u64 pgdp = (u64)init_pg_dir + PAGE_SIZE; + pgprot_t text_prot = PAGE_KERNEL_ROX; + pgprot_t data_prot = PAGE_KERNEL; + pgprot_t prot; + + /* + * External debuggers may need to write directly to the text mapping to + * install SW breakpoints. Allow this (only) when explicitly requested + * with rodata=off. + */ + if (arm64_test_sw_feature_override(ARM64_SW_FEATURE_OVERRIDE_RODATA_OFF)) + text_prot = PAGE_KERNEL_EXEC; + + /* + * We only enable the shadow call stack dynamically if we are running + * on a system that does not implement PAC or BTI. PAC and SCS provide + * roughly the same level of protection, and BTI relies on the PACIASP + * instructions serving as landing pads, preventing us from patching + * those instructions into something else. + */ + if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) && cpu_has_pac()) + enable_scs = false; + + if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) && cpu_has_bti()) { + enable_scs = false; + + /* + * If we have a CPU that supports BTI and a kernel built for + * BTI then mark the kernel executable text as guarded pages + * now so we don't have to rewrite the page tables later. + */ + text_prot = __pgprot_modify(text_prot, PTE_GP, PTE_GP); + } + + /* Map all code read-write on the first pass if needed */ + twopass |= enable_scs; + prot = twopass ? data_prot : text_prot; + + map_segment(init_pg_dir, &pgdp, va_offset, _stext, _etext, prot, + !twopass, root_level); + map_segment(init_pg_dir, &pgdp, va_offset, __start_rodata, + __inittext_begin, data_prot, false, root_level); + map_segment(init_pg_dir, &pgdp, va_offset, __inittext_begin, + __inittext_end, prot, false, root_level); + map_segment(init_pg_dir, &pgdp, va_offset, __initdata_begin, + __initdata_end, data_prot, false, root_level); + map_segment(init_pg_dir, &pgdp, va_offset, _data, _end, data_prot, + true, root_level); + dsb(ishst); + + idmap_cpu_replace_ttbr1(init_pg_dir); + + if (twopass) { + if (IS_ENABLED(CONFIG_RELOCATABLE)) + relocate_kernel(kaslr_offset); + + if (enable_scs) { + scs_patch(__eh_frame_start + va_offset, + __eh_frame_end - __eh_frame_start); + asm("ic ialluis"); + + dynamic_scs_is_enabled = true; + } + + /* + * Unmap the text region before remapping it, to avoid + * potential TLB conflicts when creating the contiguous + * descriptors. + */ + unmap_segment(init_pg_dir, va_offset, _stext, _etext, + root_level); + dsb(ishst); + isb(); + __tlbi(vmalle1); + isb(); + + /* + * Remap these segments with different permissions + * No new page table allocations should be needed + */ + map_segment(init_pg_dir, NULL, va_offset, _stext, _etext, + text_prot, true, root_level); + map_segment(init_pg_dir, NULL, va_offset, __inittext_begin, + __inittext_end, text_prot, false, root_level); + dsb(ishst); + } +} + +asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt) +{ + static char const chosen_str[] __initconst = "/chosen"; + u64 va_base, pa_base = (u64)&_text; + u64 kaslr_offset = pa_base % MIN_KIMG_ALIGN; + int root_level = 4 - CONFIG_PGTABLE_LEVELS; + int chosen; + + /* Clear BSS and the initial page tables */ + memset(__bss_start, 0, (u64)init_pg_end - (u64)__bss_start); + + /* Parse the command line for CPU feature overrides */ + chosen = fdt_path_offset(fdt, chosen_str); + init_feature_override(boot_status, fdt, chosen); + + /* + * The virtual KASLR displacement modulo 2MiB is decided by the + * physical placement of the image, as otherwise, we might not be able + * to create the early kernel mapping using 2 MiB block descriptors. So + * take the low bits of the KASLR offset from the physical address, and + * fill in the high bits from the seed. + */ + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { + u64 kaslr_seed = kaslr_early_init(fdt, chosen); + + if (kaslr_seed && kaslr_requires_kpti()) + arm64_use_ng_mappings = true; + + kaslr_offset |= kaslr_seed & ~(MIN_KIMG_ALIGN - 1); + } + + va_base = KIMAGE_VADDR + kaslr_offset; + map_kernel(kaslr_offset, va_base - pa_base, root_level); +} diff --git a/arch/arm64/kernel/pi/map_range.c b/arch/arm64/kernel/pi/map_range.c new file mode 100644 index 000000000000..c31feda18f47 --- /dev/null +++ b/arch/arm64/kernel/pi/map_range.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright 2023 Google LLC +// Author: Ard Biesheuvel <ardb@google.com> + +#include <linux/types.h> +#include <linux/sizes.h> + +#include <asm/memory.h> +#include <asm/pgalloc.h> +#include <asm/pgtable.h> + +#include "pi.h" + +/** + * map_range - Map a contiguous range of physical pages into virtual memory + * + * @pte: Address of physical pointer to array of pages to + * allocate page tables from + * @start: Virtual address of the start of the range + * @end: Virtual address of the end of the range (exclusive) + * @pa: Physical address of the start of the range + * @prot: Access permissions of the range + * @level: Translation level for the mapping + * @tbl: The level @level page table to create the mappings in + * @may_use_cont: Whether the use of the contiguous attribute is allowed + * @va_offset: Offset between a physical page and its current mapping + * in the VA space + */ +void __init map_range(u64 *pte, u64 start, u64 end, u64 pa, pgprot_t prot, + int level, pte_t *tbl, bool may_use_cont, u64 va_offset) +{ + u64 cmask = (level == 3) ? CONT_PTE_SIZE - 1 : U64_MAX; + u64 protval = pgprot_val(prot) & ~PTE_TYPE_MASK; + int lshift = (3 - level) * (PAGE_SHIFT - 3); + u64 lmask = (PAGE_SIZE << lshift) - 1; + + start &= PAGE_MASK; + pa &= PAGE_MASK; + + /* Advance tbl to the entry that covers start */ + tbl += (start >> (lshift + PAGE_SHIFT)) % PTRS_PER_PTE; + + /* + * Set the right block/page bits for this level unless we are + * clearing the mapping + */ + if (protval) + protval |= (level < 3) ? PMD_TYPE_SECT : PTE_TYPE_PAGE; + + while (start < end) { + u64 next = min((start | lmask) + 1, PAGE_ALIGN(end)); + + if (level < 3 && (start | next | pa) & lmask) { + /* + * This chunk needs a finer grained mapping. Create a + * table mapping if necessary and recurse. + */ + if (pte_none(*tbl)) { + *tbl = __pte(__phys_to_pte_val(*pte) | + PMD_TYPE_TABLE | PMD_TABLE_UXN); + *pte += PTRS_PER_PTE * sizeof(pte_t); + } + map_range(pte, start, next, pa, prot, level + 1, + (pte_t *)(__pte_to_phys(*tbl) + va_offset), + may_use_cont, va_offset); + } else { + /* + * Start a contiguous range if start and pa are + * suitably aligned + */ + if (((start | pa) & cmask) == 0 && may_use_cont) + protval |= PTE_CONT; + + /* + * Clear the contiguous attribute if the remaining + * range does not cover a contiguous block + */ + if ((end & ~cmask) <= start) + protval &= ~PTE_CONT; + + /* Put down a block or page mapping */ + *tbl = __pte(__phys_to_pte_val(pa) | protval); + } + pa += next - start; + start = next; + tbl++; + } +} diff --git a/arch/arm64/kernel/pi/patch-scs.c b/arch/arm64/kernel/pi/patch-scs.c index c65ef40d1e6b..49d8b40e61bc 100644 --- a/arch/arm64/kernel/pi/patch-scs.c +++ b/arch/arm64/kernel/pi/patch-scs.c @@ -11,6 +11,10 @@ #include <asm/scs.h> +#include "pi.h" + +bool dynamic_scs_is_enabled; + // // This minimal DWARF CFI parser is partially based on the code in // arch/arc/kernel/unwind.c, and on the document below: @@ -46,8 +50,6 @@ #define DW_CFA_GNU_negative_offset_extended 0x2f #define DW_CFA_hi_user 0x3f -extern const u8 __eh_frame_start[], __eh_frame_end[]; - enum { PACIASP = 0xd503233f, AUTIASP = 0xd50323bf, @@ -250,13 +252,3 @@ int scs_patch(const u8 eh_frame[], int size) } return 0; } - -asmlinkage void __init scs_patch_vmlinux(const u8 start[], const u8 end[]) -{ - if (!should_patch_pac_into_scs()) - return; - - scs_patch(start, end - start); - asm("ic ialluis"); - isb(); -} diff --git a/arch/arm64/kernel/pi/pi.h b/arch/arm64/kernel/pi/pi.h index 7c2d9bbf0ff9..d307c58e9741 100644 --- a/arch/arm64/kernel/pi/pi.h +++ b/arch/arm64/kernel/pi/pi.h @@ -2,6 +2,8 @@ // Copyright 2023 Google LLC // Author: Ard Biesheuvel <ardb@google.com> +#include <linux/types.h> + #define __prel64_initconst __section(".init.rodata.prel64") #define PREL64(type, name) union { type *name; prel64_t name ## _prel; } @@ -16,3 +18,15 @@ static inline void *prel64_to_pointer(const prel64_t *offset) return NULL; return (void *)offset + *offset; } + +extern bool dynamic_scs_is_enabled; + +void init_feature_override(u64 boot_status, const void *fdt, int chosen); +u64 kaslr_early_init(void *fdt, int chosen); +void relocate_kernel(u64 offset); +int scs_patch(const u8 eh_frame[], int size); + +void map_range(u64 *pgd, u64 start, u64 end, u64 pa, pgprot_t prot, + int level, pte_t *tbl, bool may_use_cont, u64 va_offset); + +asmlinkage void early_map_kernel(u64 boot_status, void *fdt); diff --git a/arch/arm64/kernel/pi/relocate.c b/arch/arm64/kernel/pi/relocate.c index 1853408ea76b..2407d2696398 100644 --- a/arch/arm64/kernel/pi/relocate.c +++ b/arch/arm64/kernel/pi/relocate.c @@ -7,6 +7,8 @@ #include <linux/init.h> #include <linux/types.h> +#include "pi.h" + extern const Elf64_Rela rela_start[], rela_end[]; extern const u64 relr_start[], relr_end[]; |