summaryrefslogtreecommitdiff
path: root/arch/arm64/kernel
diff options
context:
space:
mode:
authorCatalin Marinas <catalin.marinas@arm.com>2024-03-07 19:05:29 +0000
committerCatalin Marinas <catalin.marinas@arm.com>2024-03-07 19:05:29 +0000
commit88f0912253ea47a2bde36e0820f0b9c025d389ad (patch)
treefb477daef419d79cc960de26157e5de2a6fc9b69 /arch/arm64/kernel
parent0c5ade742e91d7bf3a508bf6223deb7410009b6d (diff)
parent27f2b9fcddc76d542ac339febf2af55b67f610ca (diff)
Merge branch 'for-next/stage1-lpa2' into for-next/core
* for-next/stage1-lpa2: (48 commits) : Add support for LPA2 and WXN and stage 1 arm64/mm: Avoid ID mapping of kpti flag if it is no longer needed arm64/mm: Use generic __pud_free() helper in pud_free() implementation arm64: gitignore: ignore relacheck arm64: Use Signed/Unsigned enums for TGRAN{4,16,64} and VARange arm64: mm: Make PUD folding check in set_pud() a runtime check arm64: mm: add support for WXN memory translation attribute mm: add arch hook to validate mmap() prot flags arm64: defconfig: Enable LPA2 support arm64: Enable 52-bit virtual addressing for 4k and 16k granule configs arm64: kvm: avoid CONFIG_PGTABLE_LEVELS for runtime levels arm64: ptdump: Deal with translation levels folded at runtime arm64: ptdump: Disregard unaddressable VA space arm64: mm: Add support for folding PUDs at runtime arm64: kasan: Reduce minimum shadow alignment and enable 5 level paging arm64: mm: Add 5 level paging support to fixmap and swapper handling arm64: Enable LPA2 at boot if supported by the system arm64: mm: add LPA2 and 5 level paging support to G-to-nG conversion arm64: mm: Add definitions to support 5 levels of paging arm64: mm: Add LPA2 support to phys<->pte conversion routines arm64: mm: Wire up TCR.DS bit to PTE shareability fields ...
Diffstat (limited to 'arch/arm64/kernel')
-rw-r--r--arch/arm64/kernel/Makefile13
-rw-r--r--arch/arm64/kernel/cpufeature.c102
-rw-r--r--arch/arm64/kernel/head.S463
-rw-r--r--arch/arm64/kernel/image-vars.h35
-rw-r--r--arch/arm64/kernel/kaslr.c4
-rw-r--r--arch/arm64/kernel/module.c2
-rw-r--r--arch/arm64/kernel/pi/.gitignore3
-rw-r--r--arch/arm64/kernel/pi/Makefile27
-rw-r--r--arch/arm64/kernel/pi/idreg-override.c (renamed from arch/arm64/kernel/idreg-override.c)80
-rw-r--r--arch/arm64/kernel/pi/kaslr_early.c67
-rw-r--r--arch/arm64/kernel/pi/map_kernel.c276
-rw-r--r--arch/arm64/kernel/pi/map_range.c105
-rw-r--r--arch/arm64/kernel/pi/patch-scs.c (renamed from arch/arm64/kernel/patch-scs.c)36
-rw-r--r--arch/arm64/kernel/pi/pi.h36
-rw-r--r--arch/arm64/kernel/pi/relacheck.c130
-rw-r--r--arch/arm64/kernel/pi/relocate.c64
-rw-r--r--arch/arm64/kernel/setup.c22
-rw-r--r--arch/arm64/kernel/sleep.S3
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S17
19 files changed, 846 insertions, 639 deletions
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 467cb7117273..14b4a179bad3 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -33,8 +33,7 @@ obj-y := debug-monitors.o entry.o irq.o fpsimd.o \
return_address.o cpuinfo.o cpu_errata.o \
cpufeature.o alternative.o cacheinfo.o \
smp.o smp_spin_table.o topology.o smccc-call.o \
- syscall.o proton-pack.o idreg-override.o idle.o \
- patching.o
+ syscall.o proton-pack.o idle.o patching.o pi/
obj-$(CONFIG_COMPAT) += sys32.o signal32.o \
sys_compat.o
@@ -57,7 +56,7 @@ obj-$(CONFIG_ACPI) += acpi.o
obj-$(CONFIG_ACPI_NUMA) += acpi_numa.o
obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o
obj-$(CONFIG_PARAVIRT) += paravirt.o
-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o pi/
+obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o
obj-$(CONFIG_ELF_CORE) += elfcore.o
obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o \
@@ -72,14 +71,6 @@ obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o
obj-$(CONFIG_ARM64_MTE) += mte.o
obj-y += vdso-wrap.o
obj-$(CONFIG_COMPAT_VDSO) += vdso32-wrap.o
-obj-$(CONFIG_UNWIND_PATCH_PAC_INTO_SCS) += patch-scs.o
-
-# We need to prevent the SCS patching code from patching itself. Using
-# -mbranch-protection=none here to avoid the patchable PAC opcodes from being
-# generated triggers an issue with full LTO on Clang, which stops emitting PAC
-# instructions altogether. So disable LTO as well for the compilation unit.
-CFLAGS_patch-scs.o += -mbranch-protection=none
-CFLAGS_REMOVE_patch-scs.o += $(CC_FLAGS_LTO)
# Force dependency (vdso*-wrap.S includes vdso.so through incbin)
$(obj)/vdso-wrap.o: $(obj)/vdso/vdso.so
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 6e1cca7b2098..d6679d8b737e 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -688,13 +688,15 @@ static const struct arm64_ftr_bits ftr_raz[] = {
#define ARM64_FTR_REG(id, table) \
__ARM64_FTR_REG_OVERRIDE(#id, id, table, &no_override)
-struct arm64_ftr_override __ro_after_init id_aa64mmfr1_override;
-struct arm64_ftr_override __ro_after_init id_aa64pfr0_override;
-struct arm64_ftr_override __ro_after_init id_aa64pfr1_override;
-struct arm64_ftr_override __ro_after_init id_aa64zfr0_override;
-struct arm64_ftr_override __ro_after_init id_aa64smfr0_override;
-struct arm64_ftr_override __ro_after_init id_aa64isar1_override;
-struct arm64_ftr_override __ro_after_init id_aa64isar2_override;
+struct arm64_ftr_override id_aa64mmfr0_override;
+struct arm64_ftr_override id_aa64mmfr1_override;
+struct arm64_ftr_override id_aa64mmfr2_override;
+struct arm64_ftr_override id_aa64pfr0_override;
+struct arm64_ftr_override id_aa64pfr1_override;
+struct arm64_ftr_override id_aa64zfr0_override;
+struct arm64_ftr_override id_aa64smfr0_override;
+struct arm64_ftr_override id_aa64isar1_override;
+struct arm64_ftr_override id_aa64isar2_override;
struct arm64_ftr_override arm64_sw_feature_override;
@@ -755,10 +757,12 @@ static const struct __ftr_reg_entry {
ARM64_FTR_REG(SYS_ID_AA64ISAR3_EL1, ftr_id_aa64isar3),
/* Op1 = 0, CRn = 0, CRm = 7 */
- ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0),
+ ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0,
+ &id_aa64mmfr0_override),
ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1,
&id_aa64mmfr1_override),
- ARM64_FTR_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2),
+ ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2,
+ &id_aa64mmfr2_override),
ARM64_FTR_REG(SYS_ID_AA64MMFR3_EL1, ftr_id_aa64mmfr3),
/* Op1 = 1, CRn = 0, CRm = 0 */
@@ -1669,46 +1673,6 @@ has_useable_cnp(const struct arm64_cpu_capabilities *entry, int scope)
return has_cpuid_feature(entry, scope);
}
-/*
- * This check is triggered during the early boot before the cpufeature
- * is initialised. Checking the status on the local CPU allows the boot
- * CPU to detect the need for non-global mappings and thus avoiding a
- * pagetable re-write after all the CPUs are booted. This check will be
- * anyway run on individual CPUs, allowing us to get the consistent
- * state once the SMP CPUs are up and thus make the switch to non-global
- * mappings if required.
- */
-bool kaslr_requires_kpti(void)
-{
- if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE))
- return false;
-
- /*
- * E0PD does a similar job to KPTI so can be used instead
- * where available.
- */
- if (IS_ENABLED(CONFIG_ARM64_E0PD)) {
- u64 mmfr2 = read_sysreg_s(SYS_ID_AA64MMFR2_EL1);
- if (cpuid_feature_extract_unsigned_field(mmfr2,
- ID_AA64MMFR2_EL1_E0PD_SHIFT))
- return false;
- }
-
- /*
- * Systems affected by Cavium erratum 24756 are incompatible
- * with KPTI.
- */
- if (IS_ENABLED(CONFIG_CAVIUM_ERRATUM_27456)) {
- extern const struct midr_range cavium_erratum_27456_cpus[];
-
- if (is_midr_in_range_list(read_cpuid_id(),
- cavium_erratum_27456_cpus))
- return false;
- }
-
- return kaslr_enabled();
-}
-
static bool __meltdown_safe = true;
static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */
@@ -1761,7 +1725,7 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
}
/* Useful for KASLR robustness */
- if (kaslr_requires_kpti()) {
+ if (kaslr_enabled() && kaslr_requires_kpti()) {
if (!__kpti_forced) {
str = "KASLR";
__kpti_forced = 1;
@@ -1850,6 +1814,11 @@ static int __init __kpti_install_ng_mappings(void *__unused)
pgd_t *kpti_ng_temp_pgd;
u64 alloc = 0;
+ if (levels == 5 && !pgtable_l5_enabled())
+ levels = 4;
+ else if (levels == 4 && !pgtable_l4_enabled())
+ levels = 3;
+
remap_fn = (void *)__pa_symbol(idmap_kpti_install_ng_mappings);
if (!cpu) {
@@ -1863,9 +1832,9 @@ static int __init __kpti_install_ng_mappings(void *__unused)
//
// The physical pages are laid out as follows:
//
- // +--------+-/-------+-/------ +-\\--------+
- // : PTE[] : | PMD[] : | PUD[] : || PGD[] :
- // +--------+-\-------+-\------ +-//--------+
+ // +--------+-/-------+-/------ +-/------ +-\\\--------+
+ // : PTE[] : | PMD[] : | PUD[] : | P4D[] : ||| PGD[] :
+ // +--------+-\-------+-\------ +-\------ +-///--------+
// ^
// The first page is mapped into this hierarchy at a PMD_SHIFT
// aligned virtual address, so that we can manipulate the PTE
@@ -2091,14 +2060,7 @@ static bool has_nested_virt_support(const struct arm64_cpu_capabilities *cap,
static bool hvhe_possible(const struct arm64_cpu_capabilities *entry,
int __unused)
{
- u64 val;
-
- val = read_sysreg(id_aa64mmfr1_el1);
- if (!cpuid_feature_extract_unsigned_field(val, ID_AA64MMFR1_EL1_VH_SHIFT))
- return false;
-
- val = arm64_sw_feature_override.val & arm64_sw_feature_override.mask;
- return cpuid_feature_extract_unsigned_field(val, ARM64_SW_FEATURE_OVERRIDE_HVHE);
+ return arm64_test_sw_feature_override(ARM64_SW_FEATURE_OVERRIDE_HVHE);
}
#ifdef CONFIG_ARM64_PAN
@@ -2796,6 +2758,24 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.cpu_enable = cpu_enable_fpmr,
ARM64_CPUID_FIELDS(ID_AA64PFR2_EL1, FPMR, IMP)
},
+#ifdef CONFIG_ARM64_VA_BITS_52
+ {
+ .capability = ARM64_HAS_VA52,
+ .type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
+ .matches = has_cpuid_feature,
+#ifdef CONFIG_ARM64_64K_PAGES
+ .desc = "52-bit Virtual Addressing (LVA)",
+ ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, VARange, 52)
+#else
+ .desc = "52-bit Virtual Addressing (LPA2)",
+#ifdef CONFIG_ARM64_4K_PAGES
+ ARM64_CPUID_FIELDS(ID_AA64MMFR0_EL1, TGRAN4, 52_BIT)
+#else
+ ARM64_CPUID_FIELDS(ID_AA64MMFR0_EL1, TGRAN16, 52_BIT)
+#endif
+#endif
+ },
+#endif
{},
};
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index cab7f91949d8..405e9bce8c73 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -80,28 +80,42 @@
* x19 primary_entry() .. start_kernel() whether we entered with the MMU on
* x20 primary_entry() .. __primary_switch() CPU boot mode
* x21 primary_entry() .. start_kernel() FDT pointer passed at boot in x0
- * x22 create_idmap() .. start_kernel() ID map VA of the DT blob
- * x23 primary_entry() .. start_kernel() physical misalignment/KASLR offset
- * x24 __primary_switch() linear map KASLR seed
- * x25 primary_entry() .. start_kernel() supported VA size
- * x28 create_idmap() callee preserved temp register
*/
SYM_CODE_START(primary_entry)
bl record_mmu_state
bl preserve_boot_args
- bl create_idmap
+
+ adrp x1, early_init_stack
+ mov sp, x1
+ mov x29, xzr
+ adrp x0, init_idmap_pg_dir
+ mov x1, xzr
+ bl __pi_create_init_idmap
+
+ /*
+ * If the page tables have been populated with non-cacheable
+ * accesses (MMU disabled), invalidate those tables again to
+ * remove any speculatively loaded cache lines.
+ */
+ cbnz x19, 0f
+ dmb sy
+ mov x1, x0 // end of used region
+ adrp x0, init_idmap_pg_dir
+ adr_l x2, dcache_inval_poc
+ blr x2
+ b 1f
/*
* If we entered with the MMU and caches on, clean the ID mapped part
* of the primary boot code to the PoC so we can safely execute it with
* the MMU off.
*/
- cbz x19, 0f
- adrp x0, __idmap_text_start
+0: adrp x0, __idmap_text_start
adr_l x1, __idmap_text_end
adr_l x2, dcache_clean_poc
blr x2
-0: mov x0, x19
+
+1: mov x0, x19
bl init_kernel_el // w0=cpu_boot_mode
mov x20, x0
@@ -111,14 +125,6 @@ SYM_CODE_START(primary_entry)
* On return, the CPU will be ready for the MMU to be turned on and
* the TCR will have been set.
*/
-#if VA_BITS > 48
- mrs_s x0, SYS_ID_AA64MMFR2_EL1
- tst x0, ID_AA64MMFR2_EL1_VARange_MASK
- mov x0, #VA_BITS
- mov x25, #VA_BITS_MIN
- csel x25, x25, x0, eq
- mov x0, x25
-#endif
bl __cpu_setup // initialise processor
b __primary_switch
SYM_CODE_END(primary_entry)
@@ -177,267 +183,6 @@ SYM_CODE_START_LOCAL(preserve_boot_args)
ret
SYM_CODE_END(preserve_boot_args)
-SYM_FUNC_START_LOCAL(clear_page_tables)
- /*
- * Clear the init page tables.
- */
- adrp x0, init_pg_dir
- adrp x1, init_pg_end
- sub x2, x1, x0
- mov x1, xzr
- b __pi_memset // tail call
-SYM_FUNC_END(clear_page_tables)
-
-/*
- * Macro to populate page table entries, these entries can be pointers to the next level
- * or last level entries pointing to physical memory.
- *
- * tbl: page table address
- * rtbl: pointer to page table or physical memory
- * index: start index to write
- * eindex: end index to write - [index, eindex] written to
- * flags: flags for pagetable entry to or in
- * inc: increment to rtbl between each entry
- * tmp1: temporary variable
- *
- * Preserves: tbl, eindex, flags, inc
- * Corrupts: index, tmp1
- * Returns: rtbl
- */
- .macro populate_entries, tbl, rtbl, index, eindex, flags, inc, tmp1
-.Lpe\@: phys_to_pte \tmp1, \rtbl
- orr \tmp1, \tmp1, \flags // tmp1 = table entry
- str \tmp1, [\tbl, \index, lsl #3]
- add \rtbl, \rtbl, \inc // rtbl = pa next level
- add \index, \index, #1
- cmp \index, \eindex
- b.ls .Lpe\@
- .endm
-
-/*
- * Compute indices of table entries from virtual address range. If multiple entries
- * were needed in the previous page table level then the next page table level is assumed
- * to be composed of multiple pages. (This effectively scales the end index).
- *
- * vstart: virtual address of start of range
- * vend: virtual address of end of range - we map [vstart, vend]
- * shift: shift used to transform virtual address into index
- * order: #imm 2log(number of entries in page table)
- * istart: index in table corresponding to vstart
- * iend: index in table corresponding to vend
- * count: On entry: how many extra entries were required in previous level, scales
- * our end index.
- * On exit: returns how many extra entries required for next page table level
- *
- * Preserves: vstart, vend
- * Returns: istart, iend, count
- */
- .macro compute_indices, vstart, vend, shift, order, istart, iend, count
- ubfx \istart, \vstart, \shift, \order
- ubfx \iend, \vend, \shift, \order
- add \iend, \iend, \count, lsl \order
- sub \count, \iend, \istart
- .endm
-
-/*
- * Map memory for specified virtual address range. Each level of page table needed supports
- * multiple entries. If a level requires n entries the next page table level is assumed to be
- * formed from n pages.
- *
- * tbl: location of page table
- * rtbl: address to be used for first level page table entry (typically tbl + PAGE_SIZE)
- * vstart: virtual address of start of range
- * vend: virtual address of end of range - we map [vstart, vend - 1]
- * flags: flags to use to map last level entries
- * phys: physical address corresponding to vstart - physical memory is contiguous
- * order: #imm 2log(number of entries in PGD table)
- *
- * If extra_shift is set, an extra level will be populated if the end address does
- * not fit in 'extra_shift' bits. This assumes vend is in the TTBR0 range.
- *
- * Temporaries: istart, iend, tmp, count, sv - these need to be different registers
- * Preserves: vstart, flags
- * Corrupts: tbl, rtbl, vend, istart, iend, tmp, count, sv
- */
- .macro map_memory, tbl, rtbl, vstart, vend, flags, phys, order, istart, iend, tmp, count, sv, extra_shift
- sub \vend, \vend, #1
- add \rtbl, \tbl, #PAGE_SIZE
- mov \count, #0
-
- .ifnb \extra_shift
- tst \vend, #~((1 << (\extra_shift)) - 1)
- b.eq .L_\@
- compute_indices \vstart, \vend, #\extra_shift, #(PAGE_SHIFT - 3), \istart, \iend, \count
- mov \sv, \rtbl
- populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
- mov \tbl, \sv
- .endif
-.L_\@:
- compute_indices \vstart, \vend, #PGDIR_SHIFT, #\order, \istart, \iend, \count
- mov \sv, \rtbl
- populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
- mov \tbl, \sv
-
-#if SWAPPER_PGTABLE_LEVELS > 3
- compute_indices \vstart, \vend, #PUD_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count
- mov \sv, \rtbl
- populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
- mov \tbl, \sv
-#endif
-
-#if SWAPPER_PGTABLE_LEVELS > 2
- compute_indices \vstart, \vend, #SWAPPER_TABLE_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count
- mov \sv, \rtbl
- populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
- mov \tbl, \sv
-#endif
-
- compute_indices \vstart, \vend, #SWAPPER_BLOCK_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count
- bic \rtbl, \phys, #SWAPPER_BLOCK_SIZE - 1
- populate_entries \tbl, \rtbl, \istart, \iend, \flags, #SWAPPER_BLOCK_SIZE, \tmp
- .endm
-
-/*
- * Remap a subregion created with the map_memory macro with modified attributes
- * or output address. The entire remapped region must have been covered in the
- * invocation of map_memory.
- *
- * x0: last level table address (returned in first argument to map_memory)
- * x1: start VA of the existing mapping
- * x2: start VA of the region to update
- * x3: end VA of the region to update (exclusive)
- * x4: start PA associated with the region to update
- * x5: attributes to set on the updated region
- * x6: order of the last level mappings
- */
-SYM_FUNC_START_LOCAL(remap_region)
- sub x3, x3, #1 // make end inclusive
-
- // Get the index offset for the start of the last level table
- lsr x1, x1, x6
- bfi x1, xzr, #0, #PAGE_SHIFT - 3
-
- // Derive the start and end indexes into the last level table
- // associated with the provided region
- lsr x2, x2, x6
- lsr x3, x3, x6
- sub x2, x2, x1
- sub x3, x3, x1
-
- mov x1, #1
- lsl x6, x1, x6 // block size at this level
-
- populate_entries x0, x4, x2, x3, x5, x6, x7
- ret
-SYM_FUNC_END(remap_region)
-
-SYM_FUNC_START_LOCAL(create_idmap)
- mov x28, lr
- /*
- * The ID map carries a 1:1 mapping of the physical address range
- * covered by the loaded image, which could be anywhere in DRAM. This
- * means that the required size of the VA (== PA) space is decided at
- * boot time, and could be more than the configured size of the VA
- * space for ordinary kernel and user space mappings.
- *
- * There are three cases to consider here:
- * - 39 <= VA_BITS < 48, and the ID map needs up to 48 VA bits to cover
- * the placement of the image. In this case, we configure one extra
- * level of translation on the fly for the ID map only. (This case
- * also covers 42-bit VA/52-bit PA on 64k pages).
- *
- * - VA_BITS == 48, and the ID map needs more than 48 VA bits. This can
- * only happen when using 64k pages, in which case we need to extend
- * the root level table rather than add a level. Note that we can
- * treat this case as 'always extended' as long as we take care not
- * to program an unsupported T0SZ value into the TCR register.
- *
- * - Combinations that would require two additional levels of
- * translation are not supported, e.g., VA_BITS==36 on 16k pages, or
- * VA_BITS==39/4k pages with 5-level paging, where the input address
- * requires more than 47 or 48 bits, respectively.
- */
-#if (VA_BITS < 48)
-#define IDMAP_PGD_ORDER (VA_BITS - PGDIR_SHIFT)
-#define EXTRA_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3)
-
- /*
- * If VA_BITS < 48, we have to configure an additional table level.
- * First, we have to verify our assumption that the current value of
- * VA_BITS was chosen such that all translation levels are fully
- * utilised, and that lowering T0SZ will always result in an additional
- * translation level to be configured.
- */
-#if VA_BITS != EXTRA_SHIFT
-#error "Mismatch between VA_BITS and page size/number of translation levels"
-#endif
-#else
-#define IDMAP_PGD_ORDER (PHYS_MASK_SHIFT - PGDIR_SHIFT)
-#define EXTRA_SHIFT
- /*
- * If VA_BITS == 48, we don't have to configure an additional
- * translation level, but the top-level table has more entries.
- */
-#endif
- adrp x0, init_idmap_pg_dir
- adrp x3, _text
- adrp x6, _end + MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE
- mov_q x7, SWAPPER_RX_MMUFLAGS
-
- map_memory x0, x1, x3, x6, x7, x3, IDMAP_PGD_ORDER, x10, x11, x12, x13, x14, EXTRA_SHIFT
-
- /* Remap the kernel page tables r/w in the ID map */
- adrp x1, _text
- adrp x2, init_pg_dir
- adrp x3, init_pg_end
- bic x4, x2, #SWAPPER_BLOCK_SIZE - 1
- mov_q x5, SWAPPER_RW_MMUFLAGS
- mov x6, #SWAPPER_BLOCK_SHIFT
- bl remap_region
-
- /* Remap the FDT after the kernel image */
- adrp x1, _text
- adrp x22, _end + SWAPPER_BLOCK_SIZE
- bic x2, x22, #SWAPPER_BLOCK_SIZE - 1
- bfi x22, x21, #0, #SWAPPER_BLOCK_SHIFT // remapped FDT address
- add x3, x2, #MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE
- bic x4, x21, #SWAPPER_BLOCK_SIZE - 1
- mov_q x5, SWAPPER_RW_MMUFLAGS
- mov x6, #SWAPPER_BLOCK_SHIFT
- bl remap_region
-
- /*
- * Since the page tables have been populated with non-cacheable
- * accesses (MMU disabled), invalidate those tables again to
- * remove any speculatively loaded cache lines.
- */
- cbnz x19, 0f // skip cache invalidation if MMU is on
- dmb sy
-
- adrp x0, init_idmap_pg_dir
- adrp x1, init_idmap_pg_end
- bl dcache_inval_poc
-0: ret x28
-SYM_FUNC_END(create_idmap)
-
-SYM_FUNC_START_LOCAL(create_kernel_mapping)
- adrp x0, init_pg_dir
- mov_q x5, KIMAGE_VADDR // compile time __va(_text)
-#ifdef CONFIG_RELOCATABLE
- add x5, x5, x23 // add KASLR displacement
-#endif
- adrp x6, _end // runtime __pa(_end)
- adrp x3, _text // runtime __pa(_text)
- sub x6, x6, x3 // _end - _text
- add x6, x6, x5 // runtime __va(_end)
- mov_q x7, SWAPPER_RW_MMUFLAGS
-
- map_memory x0, x1, x5, x6, x7, x3, (VA_BITS - PGDIR_SHIFT), x10, x11, x12, x13, x14
-
- dsb ishst // sync with page table walker
- ret
-SYM_FUNC_END(create_kernel_mapping)
-
/*
* Initialize CPU registers with task-specific and cpu-specific context.
*
@@ -489,34 +234,9 @@ SYM_FUNC_START_LOCAL(__primary_switched)
mov x0, x20
bl set_cpu_boot_mode_flag
- // Clear BSS
- adr_l x0, __bss_start
- mov x1, xzr
- adr_l x2, __bss_stop
- sub x2, x2, x0
- bl __pi_memset
- dsb ishst // Make zero page visible to PTW
-
-#if VA_BITS > 48
- adr_l x8, vabits_actual // Set this early so KASAN early init
- str x25, [x8] // ... observes the correct value
- dc civac, x8 // Make visible to booting secondaries
-#endif
-
-#ifdef CONFIG_RANDOMIZE_BASE
- adrp x5, memstart_offset_seed // Save KASLR linear map seed
- strh w24, [x5, :lo12:memstart_offset_seed]
-#endif
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
bl kasan_early_init
#endif
- mov x0, x21 // pass FDT address in x0
- bl early_fdt_map // Try mapping the FDT early
- mov x0, x20 // pass the full boot status
- bl init_feature_override // Parse cpu feature overrides
-#ifdef CONFIG_UNWIND_PATCH_PAC_INTO_SCS
- bl scs_patch_vmlinux
-#endif
mov x0, x20
bl finalise_el2 // Prefer VHE if possible
ldp x29, x30, [sp], #16
@@ -643,10 +363,13 @@ SYM_FUNC_START_LOCAL(secondary_startup)
* Common entry point for secondary CPUs.
*/
mov x20, x0 // preserve boot mode
+
+#ifdef CONFIG_ARM64_VA_BITS_52
+alternative_if ARM64_HAS_VA52
bl __cpu_secondary_check52bitva
-#if VA_BITS > 48
- ldr_l x0, vabits_actual
+alternative_else_nop_endif
#endif
+
bl __cpu_setup // initialise processor
adrp x1, swapper_pg_dir
adrp x2, idmap_pg_dir
@@ -749,15 +472,18 @@ SYM_FUNC_START(__enable_mmu)
ret
SYM_FUNC_END(__enable_mmu)
+#ifdef CONFIG_ARM64_VA_BITS_52
SYM_FUNC_START(__cpu_secondary_check52bitva)
-#if VA_BITS > 48
- ldr_l x0, vabits_actual
- cmp x0, #52
- b.ne 2f
-
+#ifndef CONFIG_ARM64_LPA2
mrs_s x0, SYS_ID_AA64MMFR2_EL1
and x0, x0, ID_AA64MMFR2_EL1_VARange_MASK
cbnz x0, 2f
+#else
+ mrs x0, id_aa64mmfr0_el1
+ sbfx x0, x0, #ID_AA64MMFR0_EL1_TGRAN_SHIFT, 4
+ cmp x0, #ID_AA64MMFR0_EL1_TGRAN_LPA2
+ b.ge 2f
+#endif
update_early_cpu_boot_status \
CPU_STUCK_IN_KERNEL | CPU_STUCK_REASON_52_BIT_VA, x0, x1
@@ -765,9 +491,9 @@ SYM_FUNC_START(__cpu_secondary_check52bitva)
wfi
b 1b
-#endif
2: ret
SYM_FUNC_END(__cpu_secondary_check52bitva)
+#endif
SYM_FUNC_START_LOCAL(__no_granule_support)
/* Indicate that this CPU can't boot and is stuck in the kernel */
@@ -779,123 +505,18 @@ SYM_FUNC_START_LOCAL(__no_granule_support)
b 1b
SYM_FUNC_END(__no_granule_support)
-#ifdef CONFIG_RELOCATABLE
-SYM_FUNC_START_LOCAL(__relocate_kernel)
- /*
- * Iterate over each entry in the relocation table, and apply the
- * relocations in place.
- */
- adr_l x9, __rela_start
- adr_l x10, __rela_end
- mov_q x11, KIMAGE_VADDR // default virtual offset
- add x11, x11, x23 // actual virtual offset
-
-0: cmp x9, x10
- b.hs 1f
- ldp x12, x13, [x9], #24
- ldr x14, [x9, #-8]
- cmp w13, #R_AARCH64_RELATIVE
- b.ne 0b
- add x14, x14, x23 // relocate
- str x14, [x12, x23]
- b 0b
-
-1:
-#ifdef CONFIG_RELR
- /*
- * Apply RELR relocations.
- *
- * RELR is a compressed format for storing relative relocations. The
- * encoded sequence of entries looks like:
- * [ AAAAAAAA BBBBBBB1 BBBBBBB1 ... AAAAAAAA BBBBBB1 ... ]
- *
- * i.e. start with an address, followed by any number of bitmaps. The
- * address entry encodes 1 relocation. The subsequent bitmap entries
- * encode up to 63 relocations each, at subsequent offsets following
- * the last address entry.
- *
- * The bitmap entries must have 1 in the least significant bit. The
- * assumption here is that an address cannot have 1 in lsb. Odd
- * addresses are not supported. Any odd addresses are stored in the RELA
- * section, which is handled above.
- *
- * Excluding the least significant bit in the bitmap, each non-zero
- * bit in the bitmap represents a relocation to be applied to
- * a corresponding machine word that follows the base address
- * word. The second least significant bit represents the machine
- * word immediately following the initial address, and each bit
- * that follows represents the next word, in linear order. As such,
- * a single bitmap can encode up to 63 relocations in a 64-bit object.
- *
- * In this implementation we store the address of the next RELR table
- * entry in x9, the address being relocated by the current address or
- * bitmap entry in x13 and the address being relocated by the current
- * bit in x14.
- */
- adr_l x9, __relr_start
- adr_l x10, __relr_end
-
-2: cmp x9, x10
- b.hs 7f
- ldr x11, [x9], #8
- tbnz x11, #0, 3f // branch to handle bitmaps
- add x13, x11, x23
- ldr x12, [x13] // relocate address entry
- add x12, x12, x23
- str x12, [x13], #8 // adjust to start of bitmap
- b 2b
-
-3: mov x14, x13
-4: lsr x11, x11, #1
- cbz x11, 6f
- tbz x11, #0, 5f // skip bit if not set
- ldr x12, [x14] // relocate bit
- add x12, x12, x23
- str x12, [x14]
-
-5: add x14, x14, #8 // move to next bit's address
- b 4b
-
-6: /*
- * Move to the next bitmap's address. 8 is the word size, and 63 is the
- * number of significant bits in a bitmap entry.
- */
- add x13, x13, #(8 * 63)
- b 2b
-
-7:
-#endif
- ret
-
-SYM_FUNC_END(__relocate_kernel)
-#endif
-
SYM_FUNC_START_LOCAL(__primary_switch)
adrp x1, reserved_pg_dir
adrp x2, init_idmap_pg_dir
bl __enable_mmu
-#ifdef CONFIG_RELOCATABLE
- adrp x23, KERNEL_START
- and x23, x23, MIN_KIMG_ALIGN - 1
-#ifdef CONFIG_RANDOMIZE_BASE
- mov x0, x22
- adrp x1, init_pg_end
+
+ adrp x1, early_init_stack
mov sp, x1
mov x29, xzr
- bl __pi_kaslr_early_init
- and x24, x0, #SZ_2M - 1 // capture memstart offset seed
- bic x0, x0, #SZ_2M - 1
- orr x23, x23, x0 // record kernel offset
-#endif
-#endif
- bl clear_page_tables
- bl create_kernel_mapping
+ mov x0, x20 // pass the full boot status
+ mov x1, x21 // pass the FDT
+ bl __pi_early_map_kernel // Map and relocate the kernel
- adrp x1, init_pg_dir
- load_ttbr1 x1, x1, x2
-#ifdef CONFIG_RELOCATABLE
- bl __relocate_kernel
-#endif
ldr x8, =__primary_switched
adrp x0, KERNEL_START // __pa(KERNEL_START)
br x8
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index e931ce078a00..ba4f8f7d6a91 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -36,7 +36,40 @@ PROVIDE(__pi___memcpy = __pi_memcpy);
PROVIDE(__pi___memmove = __pi_memmove);
PROVIDE(__pi___memset = __pi_memset);
-PROVIDE(__pi_vabits_actual = vabits_actual);
+PROVIDE(__pi_id_aa64isar1_override = id_aa64isar1_override);
+PROVIDE(__pi_id_aa64isar2_override = id_aa64isar2_override);
+PROVIDE(__pi_id_aa64mmfr0_override = id_aa64mmfr0_override);
+PROVIDE(__pi_id_aa64mmfr1_override = id_aa64mmfr1_override);
+PROVIDE(__pi_id_aa64mmfr2_override = id_aa64mmfr2_override);
+PROVIDE(__pi_id_aa64pfr0_override = id_aa64pfr0_override);
+PROVIDE(__pi_id_aa64pfr1_override = id_aa64pfr1_override);
+PROVIDE(__pi_id_aa64smfr0_override = id_aa64smfr0_override);
+PROVIDE(__pi_id_aa64zfr0_override = id_aa64zfr0_override);
+PROVIDE(__pi_arm64_sw_feature_override = arm64_sw_feature_override);
+PROVIDE(__pi_arm64_use_ng_mappings = arm64_use_ng_mappings);
+#ifdef CONFIG_CAVIUM_ERRATUM_27456
+PROVIDE(__pi_cavium_erratum_27456_cpus = cavium_erratum_27456_cpus);
+#endif
+PROVIDE(__pi__ctype = _ctype);
+PROVIDE(__pi_memstart_offset_seed = memstart_offset_seed);
+
+PROVIDE(__pi_init_idmap_pg_dir = init_idmap_pg_dir);
+PROVIDE(__pi_init_idmap_pg_end = init_idmap_pg_end);
+PROVIDE(__pi_init_pg_dir = init_pg_dir);
+PROVIDE(__pi_init_pg_end = init_pg_end);
+PROVIDE(__pi_swapper_pg_dir = swapper_pg_dir);
+
+PROVIDE(__pi__text = _text);
+PROVIDE(__pi__stext = _stext);
+PROVIDE(__pi__etext = _etext);
+PROVIDE(__pi___start_rodata = __start_rodata);
+PROVIDE(__pi___inittext_begin = __inittext_begin);
+PROVIDE(__pi___inittext_end = __inittext_end);
+PROVIDE(__pi___initdata_begin = __initdata_begin);
+PROVIDE(__pi___initdata_end = __initdata_end);
+PROVIDE(__pi__data = _data);
+PROVIDE(__pi___bss_start = __bss_start);
+PROVIDE(__pi__end = _end);
#ifdef CONFIG_KVM
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index 12c7f3c8ba76..1da3e25f9d9e 100644
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@@ -16,9 +16,7 @@ bool __ro_after_init __kaslr_is_enabled = false;
void __init kaslr_init(void)
{
- if (cpuid_feature_extract_unsigned_field(arm64_sw_feature_override.val &
- arm64_sw_feature_override.mask,
- ARM64_SW_FEATURE_OVERRIDE_NOKASLR)) {
+ if (kaslr_disabled_cmdline()) {
pr_info("KASLR disabled on command line\n");
return;
}
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index dd851297596e..47e0be610bb6 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -595,7 +595,7 @@ int module_finalize(const Elf_Ehdr *hdr,
if (scs_is_dynamic()) {
s = find_section(hdr, sechdrs, ".init.eh_frame");
if (s)
- scs_patch((void *)s->sh_addr, s->sh_size);
+ __pi_scs_patch((void *)s->sh_addr, s->sh_size);
}
return module_init_ftrace_plt(hdr, sechdrs, me);
diff --git a/arch/arm64/kernel/pi/.gitignore b/arch/arm64/kernel/pi/.gitignore
new file mode 100644
index 000000000000..efb29b663e85
--- /dev/null
+++ b/arch/arm64/kernel/pi/.gitignore
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+relacheck
diff --git a/arch/arm64/kernel/pi/Makefile b/arch/arm64/kernel/pi/Makefile
index c844a0546d7f..4393b41f0b71 100644
--- a/arch/arm64/kernel/pi/Makefile
+++ b/arch/arm64/kernel/pi/Makefile
@@ -11,6 +11,9 @@ KBUILD_CFLAGS := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) -fpie \
-fno-asynchronous-unwind-tables -fno-unwind-tables \
$(call cc-option,-fno-addrsig)
+# this code may run with the MMU off so disable unaligned accesses
+CFLAGS_map_range.o += -mstrict-align
+
# remove SCS flags from all objects in this directory
KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
# disable LTO
@@ -22,14 +25,26 @@ KCSAN_SANITIZE := n
UBSAN_SANITIZE := n
KCOV_INSTRUMENT := n
+hostprogs := relacheck
+
+quiet_cmd_piobjcopy = $(quiet_cmd_objcopy)
+ cmd_piobjcopy = $(cmd_objcopy) && $(obj)/relacheck $(@) $(<)
+
$(obj)/%.pi.o: OBJCOPYFLAGS := --prefix-symbols=__pi_ \
- --remove-section=.note.gnu.property \
- --prefix-alloc-sections=.init
-$(obj)/%.pi.o: $(obj)/%.o FORCE
- $(call if_changed,objcopy)
+ --remove-section=.note.gnu.property
+$(obj)/%.pi.o: $(obj)/%.o $(obj)/relacheck FORCE
+ $(call if_changed,piobjcopy)
+
+# ensure that all the lib- code ends up as __init code and data
+$(obj)/lib-%.pi.o: OBJCOPYFLAGS += --prefix-alloc-sections=.init
$(obj)/lib-%.o: $(srctree)/lib/%.c FORCE
$(call if_changed_rule,cc_o_c)
-obj-y := kaslr_early.pi.o lib-fdt.pi.o lib-fdt_ro.pi.o
-extra-y := $(patsubst %.pi.o,%.o,$(obj-y))
+obj-y := idreg-override.pi.o \
+ map_kernel.pi.o map_range.pi.o \
+ lib-fdt.pi.o lib-fdt_ro.pi.o
+obj-$(CONFIG_RELOCATABLE) += relocate.pi.o
+obj-$(CONFIG_RANDOMIZE_BASE) += kaslr_early.pi.o
+obj-$(CONFIG_UNWIND_PATCH_PAC_INTO_SCS) += patch-scs.pi.o
+extra-y := $(patsubst %.pi.o,%.o,$(obj-y))
diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/pi/idreg-override.c
index e30fd9e32ef3..bccfee34f62f 100644
--- a/arch/arm64/kernel/idreg-override.c
+++ b/arch/arm64/kernel/pi/idreg-override.c
@@ -14,6 +14,8 @@
#include <asm/cpufeature.h>
#include <asm/setup.h>
+#include "pi.h"
+
#define FTR_DESC_NAME_LEN 20
#define FTR_DESC_FIELD_LEN 10
#define FTR_ALIAS_NAME_LEN 30
@@ -21,15 +23,6 @@
static u64 __boot_status __initdata;
-// temporary __prel64 related definitions
-// to be removed when this code is moved under pi/
-
-#define __prel64_initconst __initconst
-
-#define PREL64(type, name) union { type *name; }
-
-#define prel64_pointer(__d) (__d)
-
typedef bool filter_t(u64 val);
struct ftr_set_desc {
@@ -66,6 +59,35 @@ static const struct ftr_set_desc mmfr1 __prel64_initconst = {
},
};
+
+static bool __init mmfr2_varange_filter(u64 val)
+{
+ int __maybe_unused feat;
+
+ if (val)
+ return false;
+
+#ifdef CONFIG_ARM64_LPA2
+ feat = cpuid_feature_extract_signed_field(read_sysreg(id_aa64mmfr0_el1),
+ ID_AA64MMFR0_EL1_TGRAN_SHIFT);
+ if (feat >= ID_AA64MMFR0_EL1_TGRAN_LPA2) {
+ id_aa64mmfr0_override.val |=
+ (ID_AA64MMFR0_EL1_TGRAN_LPA2 - 1) << ID_AA64MMFR0_EL1_TGRAN_SHIFT;
+ id_aa64mmfr0_override.mask |= 0xfU << ID_AA64MMFR0_EL1_TGRAN_SHIFT;
+ }
+#endif
+ return true;
+}
+
+static const struct ftr_set_desc mmfr2 __prel64_initconst = {
+ .name = "id_aa64mmfr2",
+ .override = &id_aa64mmfr2_override,
+ .fields = {
+ FIELD("varange", ID_AA64MMFR2_EL1_VARange_SHIFT, mmfr2_varange_filter),
+ {}
+ },
+};
+
static bool __init pfr0_sve_filter(u64 val)
{
/*
@@ -166,6 +188,8 @@ static const struct ftr_set_desc sw_features __prel64_initconst = {
.fields = {
FIELD("nokaslr", ARM64_SW_FEATURE_OVERRIDE_NOKASLR, NULL),
FIELD("hvhe", ARM64_SW_FEATURE_OVERRIDE_HVHE, hvhe_filter),
+ FIELD("rodataoff", ARM64_SW_FEATURE_OVERRIDE_RODATA_OFF, NULL),
+ FIELD("nowxn", ARM64_SW_FEATURE_OVERRIDE_NOWXN, NULL),
{}
},
};
@@ -173,6 +197,7 @@ static const struct ftr_set_desc sw_features __prel64_initconst = {
static const
PREL64(const struct ftr_set_desc, reg) regs[] __prel64_initconst = {
{ &mmfr1 },
+ { &mmfr2 },
{ &pfr0 },
{ &pfr1 },
{ &isar1 },
@@ -197,6 +222,9 @@ static const struct {
{ "arm64.nomops", "id_aa64isar2.mops=0" },
{ "arm64.nomte", "id_aa64pfr1.mte=0" },
{ "nokaslr", "arm64_sw.nokaslr=1" },
+ { "rodata=off", "arm64_sw.rodataoff=1 arm64_sw.nowxn=1" },
+ { "arm64.nolva", "id_aa64mmfr2.varange=0" },
+ { "arm64.nowxn", "arm64_sw.nowxn=1" },
};
static int __init parse_hexdigit(const char *p, u64 *v)
@@ -313,42 +341,35 @@ static __init void __parse_cmdline(const char *cmdline, bool parse_aliases)
} while (1);
}
-static __init const u8 *get_bootargs_cmdline(void)
+static __init const u8 *get_bootargs_cmdline(const void *fdt, int node)
{
+ static char const bootargs[] __initconst = "bootargs";
const u8 *prop;
- void *fdt;
- int node;
- fdt = get_early_fdt_ptr();
- if (!fdt)
- return NULL;
-
- node = fdt_path_offset(fdt, "/chosen");
if (node < 0)
return NULL;
- prop = fdt_getprop(fdt, node, "bootargs", NULL);
+ prop = fdt_getprop(fdt, node, bootargs, NULL);
if (!prop)
return NULL;
return strlen(prop) ? prop : NULL;
}
-static __init void parse_cmdline(void)
+static __init void parse_cmdline(const void *fdt, int chosen)
{
- const u8 *prop = get_bootargs_cmdline();
+ static char const cmdline[] __initconst = CONFIG_CMDLINE;
+ const u8 *prop = get_bootargs_cmdline(fdt, chosen);
if (IS_ENABLED(CONFIG_CMDLINE_FORCE) || !prop)
- __parse_cmdline(CONFIG_CMDLINE, true);
+ __parse_cmdline(cmdline, true);
if (!IS_ENABLED(CONFIG_CMDLINE_FORCE) && prop)
__parse_cmdline(prop, true);
}
-/* Keep checkers quiet */
-void init_feature_override(u64 boot_status);
-
-asmlinkage void __init init_feature_override(u64 boot_status)
+void __init init_feature_override(u64 boot_status, const void *fdt,
+ int chosen)
{
struct arm64_ftr_override *override;
const struct ftr_set_desc *reg;
@@ -364,7 +385,7 @@ asmlinkage void __init init_feature_override(u64 boot_status)
__boot_status = boot_status;
- parse_cmdline();
+ parse_cmdline(fdt, chosen);
for (i = 0; i < ARRAY_SIZE(regs); i++) {
reg = prel64_pointer(regs[i].reg);
@@ -373,3 +394,10 @@ asmlinkage void __init init_feature_override(u64 boot_status)
(unsigned long)(override + 1));
}
}
+
+char * __init skip_spaces(const char *str)
+{
+ while (isspace(*str))
+ ++str;
+ return (char *)str;
+}
diff --git a/arch/arm64/kernel/pi/kaslr_early.c b/arch/arm64/kernel/pi/kaslr_early.c
index b9e0bb4bc6a9..0257b43819db 100644
--- a/arch/arm64/kernel/pi/kaslr_early.c
+++ b/arch/arm64/kernel/pi/kaslr_early.c
@@ -16,68 +16,21 @@
#include <asm/memory.h>
#include <asm/pgtable.h>
-/* taken from lib/string.c */
-static char *__strstr(const char *s1, const char *s2)
-{
- size_t l1, l2;
-
- l2 = strlen(s2);
- if (!l2)
- return (char *)s1;
- l1 = strlen(s1);
- while (l1 >= l2) {
- l1--;
- if (!memcmp(s1, s2, l2))
- return (char *)s1;
- s1++;
- }
- return NULL;
-}
-static bool cmdline_contains_nokaslr(const u8 *cmdline)
-{
- const u8 *str;
-
- str = __strstr(cmdline, "nokaslr");
- return str == cmdline || (str > cmdline && *(str - 1) == ' ');
-}
-
-static bool is_kaslr_disabled_cmdline(void *fdt)
-{
- if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) {
- int node;
- const u8 *prop;
-
- node = fdt_path_offset(fdt, "/chosen");
- if (node < 0)
- goto out;
-
- prop = fdt_getprop(fdt, node, "bootargs", NULL);
- if (!prop)
- goto out;
-
- if (cmdline_contains_nokaslr(prop))
- return true;
+#include "pi.h"
- if (IS_ENABLED(CONFIG_CMDLINE_EXTEND))
- goto out;
+extern u16 memstart_offset_seed;
- return false;
- }
-out:
- return cmdline_contains_nokaslr(CONFIG_CMDLINE);
-}
-
-static u64 get_kaslr_seed(void *fdt)
+static u64 __init get_kaslr_seed(void *fdt, int node)
{
- int node, len;
+ static char const seed_str[] __initconst = "kaslr-seed";
fdt64_t *prop;
u64 ret;
+ int len;
- node = fdt_path_offset(fdt, "/chosen");
if (node < 0)
return 0;
- prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len);
+ prop = fdt_getprop_w(fdt, node, seed_str, &len);
if (!prop || len != sizeof(u64))
return 0;
@@ -86,20 +39,22 @@ static u64 get_kaslr_seed(void *fdt)
return ret;
}
-asmlinkage u64 kaslr_early_init(void *fdt)
+u64 __init kaslr_early_init(void *fdt, int chosen)
{
u64 seed, range;
- if (is_kaslr_disabled_cmdline(fdt))
+ if (kaslr_disabled_cmdline())
return 0;
- seed = get_kaslr_seed(fdt);
+ seed = get_kaslr_seed(fdt, chosen);
if (!seed) {
if (!__early_cpu_has_rndr() ||
!__arm64_rndr((unsigned long *)&seed))
return 0;
}
+ memstart_offset_seed = seed & U16_MAX;
+
/*
* OK, so we are proceeding with KASLR enabled. Calculate a suitable
* kernel image offset from the seed. Let's place the kernel in the
diff --git a/arch/arm64/kernel/pi/map_kernel.c b/arch/arm64/kernel/pi/map_kernel.c
new file mode 100644
index 000000000000..cac1e1f63c44
--- /dev/null
+++ b/arch/arm64/kernel/pi/map_kernel.c
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright 2023 Google LLC
+// Author: Ard Biesheuvel <ardb@google.com>
+
+#include <linux/init.h>
+#include <linux/libfdt.h>
+#include <linux/linkage.h>
+#include <linux/types.h>
+#include <linux/sizes.h>
+#include <linux/string.h>
+
+#include <asm/memory.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+
+#include "pi.h"
+
+extern const u8 __eh_frame_start[], __eh_frame_end[];
+
+extern void idmap_cpu_replace_ttbr1(void *pgdir);
+
+static void __init map_segment(pgd_t *pg_dir, u64 *pgd, u64 va_offset,
+ void *start, void *end, pgprot_t prot,
+ bool may_use_cont, int root_level)
+{
+ map_range(pgd, ((u64)start + va_offset) & ~PAGE_OFFSET,
+ ((u64)end + va_offset) & ~PAGE_OFFSET, (u64)start,
+ prot, root_level, (pte_t *)pg_dir, may_use_cont, 0);
+}
+
+static void __init unmap_segment(pgd_t *pg_dir, u64 va_offset, void *start,
+ void *end, int root_level)
+{
+ map_segment(pg_dir, NULL, va_offset, start, end, __pgprot(0),
+ false, root_level);
+}
+
+static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level)
+{
+ bool enable_scs = IS_ENABLED(CONFIG_UNWIND_PATCH_PAC_INTO_SCS);
+ bool twopass = IS_ENABLED(CONFIG_RELOCATABLE);
+ u64 pgdp = (u64)init_pg_dir + PAGE_SIZE;
+ pgprot_t text_prot = PAGE_KERNEL_ROX;
+ pgprot_t data_prot = PAGE_KERNEL;
+ pgprot_t prot;
+
+ /*
+ * External debuggers may need to write directly to the text mapping to
+ * install SW breakpoints. Allow this (only) when explicitly requested
+ * with rodata=off.
+ */
+ if (arm64_test_sw_feature_override(ARM64_SW_FEATURE_OVERRIDE_RODATA_OFF))
+ text_prot = PAGE_KERNEL_EXEC;
+
+ /*
+ * We only enable the shadow call stack dynamically if we are running
+ * on a system that does not implement PAC or BTI. PAC and SCS provide
+ * roughly the same level of protection, and BTI relies on the PACIASP
+ * instructions serving as landing pads, preventing us from patching
+ * those instructions into something else.
+ */
+ if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) && cpu_has_pac())
+ enable_scs = false;
+
+ if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) && cpu_has_bti()) {
+ enable_scs = false;
+
+ /*
+ * If we have a CPU that supports BTI and a kernel built for
+ * BTI then mark the kernel executable text as guarded pages
+ * now so we don't have to rewrite the page tables later.
+ */
+ text_prot = __pgprot_modify(text_prot, PTE_GP, PTE_GP);
+ }
+
+ /* Map all code read-write on the first pass if needed */
+ twopass |= enable_scs;
+ prot = twopass ? data_prot : text_prot;
+
+ map_segment(init_pg_dir, &pgdp, va_offset, _stext, _etext, prot,
+ !twopass, root_level);
+ map_segment(init_pg_dir, &pgdp, va_offset, __start_rodata,
+ __inittext_begin, data_prot, false, root_level);
+ map_segment(init_pg_dir, &pgdp, va_offset, __inittext_begin,
+ __inittext_end, prot, false, root_level);
+ map_segment(init_pg_dir, &pgdp, va_offset, __initdata_begin,
+ __initdata_end, data_prot, false, root_level);
+ map_segment(init_pg_dir, &pgdp, va_offset, _data, _end, data_prot,
+ true, root_level);
+ dsb(ishst);
+
+ idmap_cpu_replace_ttbr1(init_pg_dir);
+
+ if (twopass) {
+ if (IS_ENABLED(CONFIG_RELOCATABLE))
+ relocate_kernel(kaslr_offset);
+
+ if (enable_scs) {
+ scs_patch(__eh_frame_start + va_offset,
+ __eh_frame_end - __eh_frame_start);
+ asm("ic ialluis");
+
+ dynamic_scs_is_enabled = true;
+ }
+
+ /*
+ * Unmap the text region before remapping it, to avoid
+ * potential TLB conflicts when creating the contiguous
+ * descriptors.
+ */
+ unmap_segment(init_pg_dir, va_offset, _stext, _etext,
+ root_level);
+ dsb(ishst);
+ isb();
+ __tlbi(vmalle1);
+ isb();
+
+ /*
+ * Remap these segments with different permissions
+ * No new page table allocations should be needed
+ */
+ map_segment(init_pg_dir, NULL, va_offset, _stext, _etext,
+ text_prot, true, root_level);
+ map_segment(init_pg_dir, NULL, va_offset, __inittext_begin,
+ __inittext_end, text_prot, false, root_level);
+ }
+
+ /* Copy the root page table to its final location */
+ memcpy((void *)swapper_pg_dir + va_offset, init_pg_dir, PAGE_SIZE);
+ dsb(ishst);
+ idmap_cpu_replace_ttbr1(swapper_pg_dir);
+}
+
+static void noinline __section(".idmap.text") disable_wxn(void)
+{
+ u64 sctlr = read_sysreg(sctlr_el1) & ~SCTLR_ELx_WXN;
+
+ /*
+ * We cannot safely clear the WXN bit while the MMU and caches are on,
+ * so turn the MMU off, flush the TLBs and turn it on again but with
+ * the WXN bit cleared this time.
+ */
+ asm(" msr sctlr_el1, %0 ;"
+ " isb ;"
+ " tlbi vmalle1 ;"
+ " dsb nsh ;"
+ " isb ;"
+ " msr sctlr_el1, %1 ;"
+ " isb ;"
+ :: "r"(sctlr & ~SCTLR_ELx_M), "r"(sctlr));
+}
+
+static void noinline __section(".idmap.text") set_ttbr0_for_lpa2(u64 ttbr)
+{
+ u64 sctlr = read_sysreg(sctlr_el1);
+ u64 tcr = read_sysreg(tcr_el1) | TCR_DS;
+
+ asm(" msr sctlr_el1, %0 ;"
+ " isb ;"
+ " msr ttbr0_el1, %1 ;"
+ " msr tcr_el1, %2 ;"
+ " isb ;"
+ " tlbi vmalle1 ;"
+ " dsb nsh ;"
+ " isb ;"
+ " msr sctlr_el1, %3 ;"
+ " isb ;"
+ :: "r"(sctlr & ~SCTLR_ELx_M), "r"(ttbr), "r"(tcr), "r"(sctlr));
+}
+
+static void __init remap_idmap_for_lpa2(void)
+{
+ /* clear the bits that change meaning once LPA2 is turned on */
+ pteval_t mask = PTE_SHARED;
+
+ /*
+ * We have to clear bits [9:8] in all block or page descriptors in the
+ * initial ID map, as otherwise they will be (mis)interpreted as
+ * physical address bits once we flick the LPA2 switch (TCR.DS). Since
+ * we cannot manipulate live descriptors in that way without creating
+ * potential TLB conflicts, let's create another temporary ID map in a
+ * LPA2 compatible fashion, and update the initial ID map while running
+ * from that.
+ */
+ create_init_idmap(init_pg_dir, mask);
+ dsb(ishst);
+ set_ttbr0_for_lpa2((u64)init_pg_dir);
+
+ /*
+ * Recreate the initial ID map with the same granularity as before.
+ * Don't bother with the FDT, we no longer need it after this.
+ */
+ memset(init_idmap_pg_dir, 0,
+ (u64)init_idmap_pg_dir - (u64)init_idmap_pg_end);
+
+ create_init_idmap(init_idmap_pg_dir, mask);
+ dsb(ishst);
+
+ /* switch back to the updated initial ID map */
+ set_ttbr0_for_lpa2((u64)init_idmap_pg_dir);
+
+ /* wipe the temporary ID map from memory */
+ memset(init_pg_dir, 0, (u64)init_pg_end - (u64)init_pg_dir);
+}
+
+static void __init map_fdt(u64 fdt)
+{
+ static u8 ptes[INIT_IDMAP_FDT_SIZE] __initdata __aligned(PAGE_SIZE);
+ u64 efdt = fdt + MAX_FDT_SIZE;
+ u64 ptep = (u64)ptes;
+
+ /*
+ * Map up to MAX_FDT_SIZE bytes, but avoid overlap with
+ * the kernel image.
+ */
+ map_range(&ptep, fdt, (u64)_text > fdt ? min((u64)_text, efdt) : efdt,
+ fdt, PAGE_KERNEL, IDMAP_ROOT_LEVEL,
+ (pte_t *)init_idmap_pg_dir, false, 0);
+ dsb(ishst);
+}
+
+asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt)
+{
+ static char const chosen_str[] __initconst = "/chosen";
+ u64 va_base, pa_base = (u64)&_text;
+ u64 kaslr_offset = pa_base % MIN_KIMG_ALIGN;
+ int root_level = 4 - CONFIG_PGTABLE_LEVELS;
+ int va_bits = VA_BITS;
+ int chosen;
+
+ map_fdt((u64)fdt);
+
+ /* Clear BSS and the initial page tables */
+ memset(__bss_start, 0, (u64)init_pg_end - (u64)__bss_start);
+
+ /* Parse the command line for CPU feature overrides */
+ chosen = fdt_path_offset(fdt, chosen_str);
+ init_feature_override(boot_status, fdt, chosen);
+
+ if (IS_ENABLED(CONFIG_ARM64_64K_PAGES) && !cpu_has_lva()) {
+ va_bits = VA_BITS_MIN;
+ } else if (IS_ENABLED(CONFIG_ARM64_LPA2) && !cpu_has_lpa2()) {
+ va_bits = VA_BITS_MIN;
+ root_level++;
+ }
+
+ if (va_bits > VA_BITS_MIN)
+ sysreg_clear_set(tcr_el1, TCR_T1SZ_MASK, TCR_T1SZ(va_bits));
+
+ if (IS_ENABLED(CONFIG_ARM64_WXN) &&
+ arm64_test_sw_feature_override(ARM64_SW_FEATURE_OVERRIDE_NOWXN))
+ disable_wxn();
+
+ /*
+ * The virtual KASLR displacement modulo 2MiB is decided by the
+ * physical placement of the image, as otherwise, we might not be able
+ * to create the early kernel mapping using 2 MiB block descriptors. So
+ * take the low bits of the KASLR offset from the physical address, and
+ * fill in the high bits from the seed.
+ */
+ if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
+ u64 kaslr_seed = kaslr_early_init(fdt, chosen);
+
+ if (kaslr_seed && kaslr_requires_kpti())
+ arm64_use_ng_mappings = true;
+
+ kaslr_offset |= kaslr_seed & ~(MIN_KIMG_ALIGN - 1);
+ }
+
+ if (IS_ENABLED(CONFIG_ARM64_LPA2) && va_bits > VA_BITS_MIN)
+ remap_idmap_for_lpa2();
+
+ va_base = KIMAGE_VADDR + kaslr_offset;
+ map_kernel(kaslr_offset, va_base - pa_base, root_level);
+}
diff --git a/arch/arm64/kernel/pi/map_range.c b/arch/arm64/kernel/pi/map_range.c
new file mode 100644
index 000000000000..5410b2cac590
--- /dev/null
+++ b/arch/arm64/kernel/pi/map_range.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright 2023 Google LLC
+// Author: Ard Biesheuvel <ardb@google.com>
+
+#include <linux/types.h>
+#include <linux/sizes.h>
+
+#include <asm/memory.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+
+#include "pi.h"
+
+/**
+ * map_range - Map a contiguous range of physical pages into virtual memory
+ *
+ * @pte: Address of physical pointer to array of pages to
+ * allocate page tables from
+ * @start: Virtual address of the start of the range
+ * @end: Virtual address of the end of the range (exclusive)
+ * @pa: Physical address of the start of the range
+ * @prot: Access permissions of the range
+ * @level: Translation level for the mapping
+ * @tbl: The level @level page table to create the mappings in
+ * @may_use_cont: Whether the use of the contiguous attribute is allowed
+ * @va_offset: Offset between a physical page and its current mapping
+ * in the VA space
+ */
+void __init map_range(u64 *pte, u64 start, u64 end, u64 pa, pgprot_t prot,
+ int level, pte_t *tbl, bool may_use_cont, u64 va_offset)
+{
+ u64 cmask = (level == 3) ? CONT_PTE_SIZE - 1 : U64_MAX;
+ u64 protval = pgprot_val(prot) & ~PTE_TYPE_MASK;
+ int lshift = (3 - level) * (PAGE_SHIFT - 3);
+ u64 lmask = (PAGE_SIZE << lshift) - 1;
+
+ start &= PAGE_MASK;
+ pa &= PAGE_MASK;
+
+ /* Advance tbl to the entry that covers start */
+ tbl += (start >> (lshift + PAGE_SHIFT)) % PTRS_PER_PTE;
+
+ /*
+ * Set the right block/page bits for this level unless we are
+ * clearing the mapping
+ */
+ if (protval)
+ protval |= (level < 3) ? PMD_TYPE_SECT : PTE_TYPE_PAGE;
+
+ while (start < end) {
+ u64 next = min((start | lmask) + 1, PAGE_ALIGN(end));
+
+ if (level < 3 && (start | next | pa) & lmask) {
+ /*
+ * This chunk needs a finer grained mapping. Create a
+ * table mapping if necessary and recurse.
+ */
+ if (pte_none(*tbl)) {
+ *tbl = __pte(__phys_to_pte_val(*pte) |
+ PMD_TYPE_TABLE | PMD_TABLE_UXN);
+ *pte += PTRS_PER_PTE * sizeof(pte_t);
+ }
+ map_range(pte, start, next, pa, prot, level + 1,
+ (pte_t *)(__pte_to_phys(*tbl) + va_offset),
+ may_use_cont, va_offset);
+ } else {
+ /*
+ * Start a contiguous range if start and pa are
+ * suitably aligned
+ */
+ if (((start | pa) & cmask) == 0 && may_use_cont)
+ protval |= PTE_CONT;
+
+ /*
+ * Clear the contiguous attribute if the remaining
+ * range does not cover a contiguous block
+ */
+ if ((end & ~cmask) <= start)
+ protval &= ~PTE_CONT;
+
+ /* Put down a block or page mapping */
+ *tbl = __pte(__phys_to_pte_val(pa) | protval);
+ }
+ pa += next - start;
+ start = next;
+ tbl++;
+ }
+}
+
+asmlinkage u64 __init create_init_idmap(pgd_t *pg_dir, pteval_t clrmask)
+{
+ u64 ptep = (u64)pg_dir + PAGE_SIZE;
+ pgprot_t text_prot = PAGE_KERNEL_ROX;
+ pgprot_t data_prot = PAGE_KERNEL;
+
+ pgprot_val(text_prot) &= ~clrmask;
+ pgprot_val(data_prot) &= ~clrmask;
+
+ map_range(&ptep, (u64)_stext, (u64)__initdata_begin, (u64)_stext,
+ text_prot, IDMAP_ROOT_LEVEL, (pte_t *)pg_dir, false, 0);
+ map_range(&ptep, (u64)__initdata_begin, (u64)_end, (u64)__initdata_begin,
+ data_prot, IDMAP_ROOT_LEVEL, (pte_t *)pg_dir, false, 0);
+
+ return ptep;
+}
diff --git a/arch/arm64/kernel/patch-scs.c b/arch/arm64/kernel/pi/patch-scs.c
index a1fe4b4ff591..49d8b40e61bc 100644
--- a/arch/arm64/kernel/patch-scs.c
+++ b/arch/arm64/kernel/pi/patch-scs.c
@@ -4,16 +4,17 @@
* Author: Ard Biesheuvel <ardb@google.com>
*/
-#include <linux/bug.h>
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/linkage.h>
-#include <linux/printk.h>
#include <linux/types.h>
-#include <asm/cacheflush.h>
#include <asm/scs.h>
+#include "pi.h"
+
+bool dynamic_scs_is_enabled;
+
//
// This minimal DWARF CFI parser is partially based on the code in
// arch/arc/kernel/unwind.c, and on the document below:
@@ -49,8 +50,6 @@
#define DW_CFA_GNU_negative_offset_extended 0x2f
#define DW_CFA_hi_user 0x3f
-extern const u8 __eh_frame_start[], __eh_frame_end[];
-
enum {
PACIASP = 0xd503233f,
AUTIASP = 0xd50323bf,
@@ -81,7 +80,11 @@ static void __always_inline scs_patch_loc(u64 loc)
*/
return;
}
- dcache_clean_pou(loc, loc + sizeof(u32));
+ if (IS_ENABLED(CONFIG_ARM64_WORKAROUND_CLEAN_CACHE))
+ asm("dc civac, %0" :: "r"(loc));
+ else
+ asm(ALTERNATIVE("dc cvau, %0", "nop", ARM64_HAS_CACHE_IDC)
+ :: "r"(loc));
}
/*
@@ -128,10 +131,10 @@ struct eh_frame {
};
};
-static int noinstr scs_handle_fde_frame(const struct eh_frame *frame,
- bool fde_has_augmentation_data,
- int code_alignment_factor,
- bool dry_run)
+static int scs_handle_fde_frame(const struct eh_frame *frame,
+ bool fde_has_augmentation_data,
+ int code_alignment_factor,
+ bool dry_run)
{
int size = frame->size - offsetof(struct eh_frame, opcodes) + 4;
u64 loc = (u64)offset_to_ptr(&frame->initial_loc);
@@ -198,14 +201,13 @@ static int noinstr scs_handle_fde_frame(const struct eh_frame *frame,
break;
default:
- pr_err("unhandled opcode: %02x in FDE frame %lx\n", opcode[-1], (uintptr_t)frame);
return -ENOEXEC;
}
}
return 0;
}
-int noinstr scs_patch(const u8 eh_frame[], int size)
+int scs_patch(const u8 eh_frame[], int size)
{
const u8 *p = eh_frame;
@@ -250,13 +252,3 @@ int noinstr scs_patch(const u8 eh_frame[], int size)
}
return 0;
}
-
-asmlinkage void __init scs_patch_vmlinux(void)
-{
- if (!should_patch_pac_into_scs())
- return;
-
- WARN_ON(scs_patch(__eh_frame_start, __eh_frame_end - __eh_frame_start));
- icache_inval_all_pou();
- isb();
-}
diff --git a/arch/arm64/kernel/pi/pi.h b/arch/arm64/kernel/pi/pi.h
new file mode 100644
index 000000000000..c91e5e965cd3
--- /dev/null
+++ b/arch/arm64/kernel/pi/pi.h
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright 2023 Google LLC
+// Author: Ard Biesheuvel <ardb@google.com>
+
+#include <linux/types.h>
+
+#define __prel64_initconst __section(".init.rodata.prel64")
+
+#define PREL64(type, name) union { type *name; prel64_t name ## _prel; }
+
+#define prel64_pointer(__d) (typeof(__d))prel64_to_pointer(&__d##_prel)
+
+typedef volatile signed long prel64_t;
+
+static inline void *prel64_to_pointer(const prel64_t *offset)
+{
+ if (!*offset)
+ return NULL;
+ return (void *)offset + *offset;
+}
+
+extern bool dynamic_scs_is_enabled;
+
+extern pgd_t init_idmap_pg_dir[], init_idmap_pg_end[];
+
+void init_feature_override(u64 boot_status, const void *fdt, int chosen);
+u64 kaslr_early_init(void *fdt, int chosen);
+void relocate_kernel(u64 offset);
+int scs_patch(const u8 eh_frame[], int size);
+
+void map_range(u64 *pgd, u64 start, u64 end, u64 pa, pgprot_t prot,
+ int level, pte_t *tbl, bool may_use_cont, u64 va_offset);
+
+asmlinkage void early_map_kernel(u64 boot_status, void *fdt);
+
+asmlinkage u64 create_init_idmap(pgd_t *pgd, pteval_t clrmask);
diff --git a/arch/arm64/kernel/pi/relacheck.c b/arch/arm64/kernel/pi/relacheck.c
new file mode 100644
index 000000000000..b0cd4d0d275b
--- /dev/null
+++ b/arch/arm64/kernel/pi/relacheck.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023 - Google LLC
+ * Author: Ard Biesheuvel <ardb@google.com>
+ */
+
+#include <elf.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define HOST_ORDER ELFDATA2LSB
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define HOST_ORDER ELFDATA2MSB
+#endif
+
+static Elf64_Ehdr *ehdr;
+static Elf64_Shdr *shdr;
+static const char *strtab;
+static bool swap;
+
+static uint64_t swab_elfxword(uint64_t val)
+{
+ return swap ? __builtin_bswap64(val) : val;
+}
+
+static uint32_t swab_elfword(uint32_t val)
+{
+ return swap ? __builtin_bswap32(val) : val;
+}
+
+static uint16_t swab_elfhword(uint16_t val)
+{
+ return swap ? __builtin_bswap16(val) : val;
+}
+
+int main(int argc, char *argv[])
+{
+ struct stat stat;
+ int fd, ret;
+
+ if (argc < 3) {
+ fprintf(stderr, "file arguments missing\n");
+ exit(EXIT_FAILURE);
+ }
+
+ fd = open(argv[1], O_RDWR);
+ if (fd < 0) {
+ fprintf(stderr, "failed to open %s\n", argv[1]);
+ exit(EXIT_FAILURE);
+ }
+
+ ret = fstat(fd, &stat);
+ if (ret < 0) {
+ fprintf(stderr, "failed to stat() %s\n", argv[1]);
+ exit(EXIT_FAILURE);
+ }
+
+ ehdr = mmap(0, stat.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if (ehdr == MAP_FAILED) {
+ fprintf(stderr, "failed to mmap() %s\n", argv[1]);
+ exit(EXIT_FAILURE);
+ }
+
+ swap = ehdr->e_ident[EI_DATA] != HOST_ORDER;
+ shdr = (void *)ehdr + swab_elfxword(ehdr->e_shoff);
+ strtab = (void *)ehdr +
+ swab_elfxword(shdr[swab_elfhword(ehdr->e_shstrndx)].sh_offset);
+
+ for (int i = 0; i < swab_elfhword(ehdr->e_shnum); i++) {
+ unsigned long info, flags;
+ bool prel64 = false;
+ Elf64_Rela *rela;
+ int numrela;
+
+ if (swab_elfword(shdr[i].sh_type) != SHT_RELA)
+ continue;
+
+ /* only consider RELA sections operating on data */
+ info = swab_elfword(shdr[i].sh_info);
+ flags = swab_elfxword(shdr[info].sh_flags);
+ if ((flags & (SHF_ALLOC | SHF_EXECINSTR)) != SHF_ALLOC)
+ continue;
+
+ /*
+ * We generally don't permit ABS64 relocations in the code that
+ * runs before relocation processing occurs. If statically
+ * initialized absolute symbol references are unavoidable, they
+ * may be emitted into a *.rodata.prel64 section and they will
+ * be converted to place-relative 64-bit references. This
+ * requires special handling in the referring code.
+ */
+ if (strstr(strtab + swab_elfword(shdr[info].sh_name),
+ ".rodata.prel64")) {
+ prel64 = true;
+ }
+
+ rela = (void *)ehdr + swab_elfxword(shdr[i].sh_offset);
+ numrela = swab_elfxword(shdr[i].sh_size) / sizeof(*rela);
+
+ for (int j = 0; j < numrela; j++) {
+ uint64_t info = swab_elfxword(rela[j].r_info);
+
+ if (ELF64_R_TYPE(info) != R_AARCH64_ABS64)
+ continue;
+
+ if (prel64) {
+ /* convert ABS64 into PREL64 */
+ info ^= R_AARCH64_ABS64 ^ R_AARCH64_PREL64;
+ rela[j].r_info = swab_elfxword(info);
+ } else {
+ fprintf(stderr,
+ "Unexpected absolute relocations detected in %s\n",
+ argv[2]);
+ close(fd);
+ unlink(argv[1]);
+ exit(EXIT_FAILURE);
+ }
+ }
+ }
+ close(fd);
+ return 0;
+}
diff --git a/arch/arm64/kernel/pi/relocate.c b/arch/arm64/kernel/pi/relocate.c
new file mode 100644
index 000000000000..2407d2696398
--- /dev/null
+++ b/arch/arm64/kernel/pi/relocate.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright 2023 Google LLC
+// Authors: Ard Biesheuvel <ardb@google.com>
+// Peter Collingbourne <pcc@google.com>
+
+#include <linux/elf.h>
+#include <linux/init.h>
+#include <linux/types.h>
+
+#include "pi.h"
+
+extern const Elf64_Rela rela_start[], rela_end[];
+extern const u64 relr_start[], relr_end[];
+
+void __init relocate_kernel(u64 offset)
+{
+ u64 *place = NULL;
+
+ for (const Elf64_Rela *rela = rela_start; rela < rela_end; rela++) {
+ if (ELF64_R_TYPE(rela->r_info) != R_AARCH64_RELATIVE)
+ continue;
+ *(u64 *)(rela->r_offset + offset) = rela->r_addend + offset;
+ }
+
+ if (!IS_ENABLED(CONFIG_RELR) || !offset)
+ return;
+
+ /*
+ * Apply RELR relocations.
+ *
+ * RELR is a compressed format for storing relative relocations. The
+ * encoded sequence of entries looks like:
+ * [ AAAAAAAA BBBBBBB1 BBBBBBB1 ... AAAAAAAA BBBBBB1 ... ]
+ *
+ * i.e. start with an address, followed by any number of bitmaps. The
+ * address entry encodes 1 relocation. The subsequent bitmap entries
+ * encode up to 63 relocations each, at subsequent offsets following
+ * the last address entry.
+ *
+ * The bitmap entries must have 1 in the least significant bit. The
+ * assumption here is that an address cannot have 1 in lsb. Odd
+ * addresses are not supported. Any odd addresses are stored in the
+ * RELA section, which is handled above.
+ *
+ * With the exception of the least significant bit, each bit in the
+ * bitmap corresponds with a machine word that follows the base address
+ * word, and the bit value indicates whether or not a relocation needs
+ * to be applied to it. The second least significant bit represents the
+ * machine word immediately following the initial address, and each bit
+ * that follows represents the next word, in linear order. As such, a
+ * single bitmap can encode up to 63 relocations in a 64-bit object.
+ */
+ for (const u64 *relr = relr_start; relr < relr_end; relr++) {
+ if ((*relr & 1) == 0) {
+ place = (u64 *)(*relr + offset);
+ *place++ += offset;
+ } else {
+ for (u64 *p = place, r = *relr >> 1; r; p++, r >>= 1)
+ if (r & 1)
+ *p += offset;
+ place += 63;
+ }
+ }
+}
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index ab43bfa85368..65a052bf741f 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -166,21 +166,6 @@ static void __init smp_build_mpidr_hash(void)
pr_warn("Large number of MPIDR hash buckets detected\n");
}
-static void *early_fdt_ptr __initdata;
-
-void __init *get_early_fdt_ptr(void)
-{
- return early_fdt_ptr;
-}
-
-asmlinkage void __init early_fdt_map(u64 dt_phys)
-{
- int fdt_size;
-
- early_fixmap_init();
- early_fdt_ptr = fixmap_remap_fdt(dt_phys, &fdt_size, PAGE_KERNEL);
-}
-
static void __init setup_machine_fdt(phys_addr_t dt_phys)
{
int size;
@@ -298,13 +283,6 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p)
kaslr_init();
- /*
- * If know now we are going to need KPTI then use non-global
- * mappings from the start, avoiding the cost of rewriting
- * everything later.
- */
- arm64_use_ng_mappings = kaslr_requires_kpti();
-
early_fixmap_init();
early_ioremap_init();
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index 2aa5129d8253..f093cdf71be1 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -102,9 +102,6 @@ SYM_CODE_START(cpu_resume)
mov x0, xzr
bl init_kernel_el
mov x19, x0 // preserve boot mode
-#if VA_BITS > 48
- ldr_l x0, vabits_actual
-#endif
bl __cpu_setup
/* enable the MMU early - so we can access sleep_save_stash by va */
adrp x1, swapper_pg_dir
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 3cd7e76cc562..755a22d4f840 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -126,9 +126,9 @@ jiffies = jiffies_64;
#ifdef CONFIG_UNWIND_TABLES
#define UNWIND_DATA_SECTIONS \
.eh_frame : { \
- __eh_frame_start = .; \
+ __pi___eh_frame_start = .; \
*(.eh_frame) \
- __eh_frame_end = .; \
+ __pi___eh_frame_end = .; \
}
#else
#define UNWIND_DATA_SECTIONS
@@ -270,15 +270,15 @@ SECTIONS
HYPERVISOR_RELOC_SECTION
.rela.dyn : ALIGN(8) {
- __rela_start = .;
+ __pi_rela_start = .;
*(.rela .rela*)
- __rela_end = .;
+ __pi_rela_end = .;
}
.relr.dyn : ALIGN(8) {
- __relr_start = .;
+ __pi_relr_start = .;
*(.relr.dyn)
- __relr_end = .;
+ __pi_relr_end = .;
}
. = ALIGN(SEGMENT_ALIGN);
@@ -311,12 +311,17 @@ SECTIONS
__pecoff_data_rawsize = ABSOLUTE(. - __initdata_begin);
_edata = .;
+ /* start of zero-init region */
BSS_SECTION(SBSS_ALIGN, 0, 0)
. = ALIGN(PAGE_SIZE);
init_pg_dir = .;
. += INIT_DIR_SIZE;
init_pg_end = .;
+ /* end of zero-init region */
+
+ . += SZ_4K; /* stack for the early C runtime */
+ early_init_stack = .;
. = ALIGN(SEGMENT_ALIGN);
__pecoff_data_size = ABSOLUTE(. - __initdata_begin);