diff options
Diffstat (limited to 'arch/arm64/kernel')
70 files changed, 2547 insertions, 1330 deletions
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 763824963ed1..71c29a2a2f19 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -33,7 +33,8 @@ obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ return_address.o cpuinfo.o cpu_errata.o \ cpufeature.o alternative.o cacheinfo.o \ smp.o smp_spin_table.o topology.o smccc-call.o \ - syscall.o proton-pack.o idle.o patching.o pi/ + syscall.o proton-pack.o idle.o patching.o pi/ \ + rsi.o obj-$(CONFIG_COMPAT) += sys32.o signal32.o \ sys_compat.o @@ -46,7 +47,6 @@ obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_CPU_PM) += sleep.o suspend.o -obj-$(CONFIG_CPU_IDLE) += cpuidle.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_EFI) += efi.o efi-rt-wrapper.o diff --git a/arch/arm64/kernel/Makefile.syscalls b/arch/arm64/kernel/Makefile.syscalls new file mode 100644 index 000000000000..0542a718871a --- /dev/null +++ b/arch/arm64/kernel/Makefile.syscalls @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 + +syscall_abis_32 += +syscall_abis_64 += renameat rlimit memfd_secret + +syscalltbl = arch/arm64/tools/syscall_%.tbl diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index dba8fcec7f33..e6f66491fbe9 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -26,9 +26,11 @@ #include <linux/libfdt.h> #include <linux/smp.h> #include <linux/serial_core.h> +#include <linux/suspend.h> #include <linux/pgtable.h> #include <acpi/ghes.h> +#include <acpi/processor.h> #include <asm/cputype.h> #include <asm/cpu_ops.h> #include <asm/daifflags.h> @@ -44,6 +46,7 @@ EXPORT_SYMBOL(acpi_pci_disabled); static bool param_acpi_off __initdata; static bool param_acpi_on __initdata; static bool param_acpi_force __initdata; +static bool param_acpi_nospcr __initdata; static int __init parse_acpi(char *arg) { @@ -57,6 +60,8 @@ static int __init parse_acpi(char *arg) param_acpi_on = true; else if (strcmp(arg, "force") == 0) /* force ACPI to be enabled */ param_acpi_force = true; + else if (strcmp(arg, "nospcr") == 0) /* disable SPCR as default console */ + param_acpi_nospcr = true; else return -EINVAL; /* Core will print when we return error */ @@ -227,7 +232,29 @@ done: if (earlycon_acpi_spcr_enable) early_init_dt_scan_chosen_stdout(); } else { - acpi_parse_spcr(earlycon_acpi_spcr_enable, true); +#ifdef CONFIG_HIBERNATION + struct acpi_table_header *facs = NULL; + acpi_get_table(ACPI_SIG_FACS, 1, &facs); + if (facs) { + swsusp_hardware_signature = + ((struct acpi_table_facs *)facs)->hardware_signature; + acpi_put_table(facs); + } +#endif + + /* + * For varying privacy and security reasons, sometimes need + * to completely silence the serial console output, and only + * enable it when needed. + * But there are many existing systems that depend on this + * behaviour, use acpi=nospcr to disable console in ACPI SPCR + * table as default serial console. + */ + acpi_parse_spcr(earlycon_acpi_spcr_enable, + !param_acpi_nospcr); + pr_info("Use ACPI SPCR as default console: %s\n", + param_acpi_nospcr ? "No" : "Yes"); + if (IS_ENABLED(CONFIG_ACPI_BGRT)) acpi_table_parse(ACPI_SIG_BGRT, acpi_parse_bgrt); } @@ -413,107 +440,23 @@ void arch_reserve_mem_area(acpi_physical_address addr, size_t size) memblock_mark_nomap(addr, size); } -#ifdef CONFIG_ACPI_FFH -/* - * Implements ARM64 specific callbacks to support ACPI FFH Operation Region as - * specified in https://developer.arm.com/docs/den0048/latest - */ -struct acpi_ffh_data { - struct acpi_ffh_info info; - void (*invoke_ffh_fn)(unsigned long a0, unsigned long a1, - unsigned long a2, unsigned long a3, - unsigned long a4, unsigned long a5, - unsigned long a6, unsigned long a7, - struct arm_smccc_res *args, - struct arm_smccc_quirk *res); - void (*invoke_ffh64_fn)(const struct arm_smccc_1_2_regs *args, - struct arm_smccc_1_2_regs *res); -}; - -int acpi_ffh_address_space_arch_setup(void *handler_ctxt, void **region_ctxt) +#ifdef CONFIG_ACPI_HOTPLUG_CPU +int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 apci_id, + int *pcpu) { - enum arm_smccc_conduit conduit; - struct acpi_ffh_data *ffh_ctxt; - - if (arm_smccc_get_version() < ARM_SMCCC_VERSION_1_2) - return -EOPNOTSUPP; - - conduit = arm_smccc_1_1_get_conduit(); - if (conduit == SMCCC_CONDUIT_NONE) { - pr_err("%s: invalid SMCCC conduit\n", __func__); - return -EOPNOTSUPP; + /* If an error code is passed in this stub can't fix it */ + if (*pcpu < 0) { + pr_warn_once("Unable to map CPU to valid ID\n"); + return *pcpu; } - ffh_ctxt = kzalloc(sizeof(*ffh_ctxt), GFP_KERNEL); - if (!ffh_ctxt) - return -ENOMEM; - - if (conduit == SMCCC_CONDUIT_SMC) { - ffh_ctxt->invoke_ffh_fn = __arm_smccc_smc; - ffh_ctxt->invoke_ffh64_fn = arm_smccc_1_2_smc; - } else { - ffh_ctxt->invoke_ffh_fn = __arm_smccc_hvc; - ffh_ctxt->invoke_ffh64_fn = arm_smccc_1_2_hvc; - } - - memcpy(ffh_ctxt, handler_ctxt, sizeof(ffh_ctxt->info)); - - *region_ctxt = ffh_ctxt; - return AE_OK; -} - -static bool acpi_ffh_smccc_owner_allowed(u32 fid) -{ - int owner = ARM_SMCCC_OWNER_NUM(fid); - - if (owner == ARM_SMCCC_OWNER_STANDARD || - owner == ARM_SMCCC_OWNER_SIP || owner == ARM_SMCCC_OWNER_OEM) - return true; - - return false; + return 0; } +EXPORT_SYMBOL(acpi_map_cpu); -int acpi_ffh_address_space_arch_handler(acpi_integer *value, void *region_context) +int acpi_unmap_cpu(int cpu) { - int ret = 0; - struct acpi_ffh_data *ffh_ctxt = region_context; - - if (ffh_ctxt->info.offset == 0) { - /* SMC/HVC 32bit call */ - struct arm_smccc_res res; - u32 a[8] = { 0 }, *ptr = (u32 *)value; - - if (!ARM_SMCCC_IS_FAST_CALL(*ptr) || ARM_SMCCC_IS_64(*ptr) || - !acpi_ffh_smccc_owner_allowed(*ptr) || - ffh_ctxt->info.length > 32) { - ret = AE_ERROR; - } else { - int idx, len = ffh_ctxt->info.length >> 2; - - for (idx = 0; idx < len; idx++) - a[idx] = *(ptr + idx); - - ffh_ctxt->invoke_ffh_fn(a[0], a[1], a[2], a[3], a[4], - a[5], a[6], a[7], &res, NULL); - memcpy(value, &res, sizeof(res)); - } - - } else if (ffh_ctxt->info.offset == 1) { - /* SMC/HVC 64bit call */ - struct arm_smccc_1_2_regs *r = (struct arm_smccc_1_2_regs *)value; - - if (!ARM_SMCCC_IS_FAST_CALL(r->a0) || !ARM_SMCCC_IS_64(r->a0) || - !acpi_ffh_smccc_owner_allowed(r->a0) || - ffh_ctxt->info.length > sizeof(*r)) { - ret = AE_ERROR; - } else { - ffh_ctxt->invoke_ffh64_fn(r, r); - memcpy(value, r, ffh_ctxt->info.length); - } - } else { - ret = AE_ERROR; - } - - return ret; + return 0; } -#endif /* CONFIG_ACPI_FFH */ +EXPORT_SYMBOL(acpi_unmap_cpu); +#endif /* CONFIG_ACPI_HOTPLUG_CPU */ diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c index e51535a5f939..2465f291c7e1 100644 --- a/arch/arm64/kernel/acpi_numa.c +++ b/arch/arm64/kernel/acpi_numa.c @@ -27,24 +27,13 @@ #include <asm/numa.h> -static int acpi_early_node_map[NR_CPUS] __initdata = { NUMA_NO_NODE }; +static int acpi_early_node_map[NR_CPUS] __initdata = { [0 ... NR_CPUS - 1] = NUMA_NO_NODE }; int __init acpi_numa_get_nid(unsigned int cpu) { return acpi_early_node_map[cpu]; } -static inline int get_cpu_for_acpi_id(u32 uid) -{ - int cpu; - - for (cpu = 0; cpu < nr_cpu_ids; cpu++) - if (uid == get_acpi_id_for_cpu(cpu)) - return cpu; - - return -EINVAL; -} - static int __init acpi_parse_gicc_pxm(union acpi_subtable_headers *header, const unsigned long end) { diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index dd6ce86d4332..e737c6295ec7 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -462,6 +462,9 @@ static int run_all_insn_set_hw_mode(unsigned int cpu) for (int i = 0; i < ARRAY_SIZE(insn_emulations); i++) { struct insn_emulation *insn = insn_emulations[i]; bool enable = READ_ONCE(insn->current_mode) == INSN_HW; + if (insn->status == INSN_UNAVAILABLE) + continue; + if (insn->set_hw_mode && insn->set_hw_mode(enable)) { pr_warn("CPU[%u] cannot support the emulation of %s", cpu, insn->name); @@ -504,7 +507,7 @@ static int update_insn_emulation_mode(struct insn_emulation *insn, return ret; } -static int emulation_proc_handler(struct ctl_table *table, int write, +static int emulation_proc_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 81496083c041..eb1a840e4110 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -12,15 +12,12 @@ #include <linux/ftrace.h> #include <linux/kexec.h> #include <linux/mm.h> -#include <linux/dma-mapping.h> #include <linux/kvm_host.h> -#include <linux/preempt.h> #include <linux/suspend.h> #include <asm/cpufeature.h> #include <asm/fixmap.h> #include <asm/thread_info.h> #include <asm/memory.h> -#include <asm/signal32.h> #include <asm/smp_plat.h> #include <asm/suspend.h> #include <linux/kbuild.h> @@ -28,8 +25,6 @@ int main(void) { - DEFINE(TSK_ACTIVE_MM, offsetof(struct task_struct, active_mm)); - BLANK(); DEFINE(TSK_TI_CPU, offsetof(struct task_struct, thread_info.cpu)); DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags)); DEFINE(TSK_TI_PREEMPT, offsetof(struct task_struct, thread_info.preempt_count)); @@ -79,45 +74,27 @@ int main(void) DEFINE(S_PSTATE, offsetof(struct pt_regs, pstate)); DEFINE(S_SYSCALLNO, offsetof(struct pt_regs, syscallno)); DEFINE(S_SDEI_TTBR1, offsetof(struct pt_regs, sdei_ttbr1)); - DEFINE(S_PMR_SAVE, offsetof(struct pt_regs, pmr_save)); + DEFINE(S_PMR, offsetof(struct pt_regs, pmr)); DEFINE(S_STACKFRAME, offsetof(struct pt_regs, stackframe)); + DEFINE(S_STACKFRAME_TYPE, offsetof(struct pt_regs, stackframe.type)); DEFINE(PT_REGS_SIZE, sizeof(struct pt_regs)); BLANK(); #ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS - DEFINE(FREGS_X0, offsetof(struct ftrace_regs, regs[0])); - DEFINE(FREGS_X2, offsetof(struct ftrace_regs, regs[2])); - DEFINE(FREGS_X4, offsetof(struct ftrace_regs, regs[4])); - DEFINE(FREGS_X6, offsetof(struct ftrace_regs, regs[6])); - DEFINE(FREGS_X8, offsetof(struct ftrace_regs, regs[8])); - DEFINE(FREGS_FP, offsetof(struct ftrace_regs, fp)); - DEFINE(FREGS_LR, offsetof(struct ftrace_regs, lr)); - DEFINE(FREGS_SP, offsetof(struct ftrace_regs, sp)); - DEFINE(FREGS_PC, offsetof(struct ftrace_regs, pc)); + DEFINE(FREGS_X0, offsetof(struct __arch_ftrace_regs, regs[0])); + DEFINE(FREGS_X2, offsetof(struct __arch_ftrace_regs, regs[2])); + DEFINE(FREGS_X4, offsetof(struct __arch_ftrace_regs, regs[4])); + DEFINE(FREGS_X6, offsetof(struct __arch_ftrace_regs, regs[6])); + DEFINE(FREGS_X8, offsetof(struct __arch_ftrace_regs, regs[8])); + DEFINE(FREGS_FP, offsetof(struct __arch_ftrace_regs, fp)); + DEFINE(FREGS_LR, offsetof(struct __arch_ftrace_regs, lr)); + DEFINE(FREGS_SP, offsetof(struct __arch_ftrace_regs, sp)); + DEFINE(FREGS_PC, offsetof(struct __arch_ftrace_regs, pc)); #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS - DEFINE(FREGS_DIRECT_TRAMP, offsetof(struct ftrace_regs, direct_tramp)); -#endif - DEFINE(FREGS_SIZE, sizeof(struct ftrace_regs)); - BLANK(); + DEFINE(FREGS_DIRECT_TRAMP, offsetof(struct __arch_ftrace_regs, direct_tramp)); #endif -#ifdef CONFIG_COMPAT - DEFINE(COMPAT_SIGFRAME_REGS_OFFSET, offsetof(struct compat_sigframe, uc.uc_mcontext.arm_r0)); - DEFINE(COMPAT_RT_SIGFRAME_REGS_OFFSET, offsetof(struct compat_rt_sigframe, sig.uc.uc_mcontext.arm_r0)); + DEFINE(FREGS_SIZE, sizeof(struct __arch_ftrace_regs)); BLANK(); #endif - DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id.counter)); - BLANK(); - DEFINE(VMA_VM_MM, offsetof(struct vm_area_struct, vm_mm)); - DEFINE(VMA_VM_FLAGS, offsetof(struct vm_area_struct, vm_flags)); - BLANK(); - DEFINE(VM_EXEC, VM_EXEC); - BLANK(); - DEFINE(PAGE_SZ, PAGE_SIZE); - BLANK(); - DEFINE(DMA_TO_DEVICE, DMA_TO_DEVICE); - DEFINE(DMA_FROM_DEVICE, DMA_FROM_DEVICE); - BLANK(); - DEFINE(PREEMPT_DISABLE_OFFSET, PREEMPT_DISABLE_OFFSET); - BLANK(); DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task)); BLANK(); DEFINE(FTR_OVR_VAL_OFFSET, offsetof(struct arm64_ftr_override, val)); @@ -128,6 +105,7 @@ int main(void) DEFINE(VCPU_FAULT_DISR, offsetof(struct kvm_vcpu, arch.fault.disr_el1)); DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2)); DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_cpu_context, regs)); + DEFINE(CPU_ELR_EL2, offsetof(struct kvm_cpu_context, sys_regs[ELR_EL2])); DEFINE(CPU_RGSR_EL1, offsetof(struct kvm_cpu_context, sys_regs[RGSR_EL1])); DEFINE(CPU_GCR_EL1, offsetof(struct kvm_cpu_context, sys_regs[GCR_EL1])); DEFINE(CPU_APIAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIAKEYLO_EL1])); @@ -145,6 +123,7 @@ int main(void) DEFINE(NVHE_INIT_HCR_EL2, offsetof(struct kvm_nvhe_init_params, hcr_el2)); DEFINE(NVHE_INIT_VTTBR, offsetof(struct kvm_nvhe_init_params, vttbr)); DEFINE(NVHE_INIT_VTCR, offsetof(struct kvm_nvhe_init_params, vtcr)); + DEFINE(NVHE_INIT_TMP, offsetof(struct kvm_nvhe_init_params, tmp)); #endif #ifdef CONFIG_CPU_PM DEFINE(CPU_CTX_SP, offsetof(struct cpu_suspend_ctx, sp)); @@ -200,18 +179,6 @@ int main(void) DEFINE(FTRACE_OPS_FUNC, offsetof(struct ftrace_ops, func)); #endif BLANK(); -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - DEFINE(FGRET_REGS_X0, offsetof(struct fgraph_ret_regs, regs[0])); - DEFINE(FGRET_REGS_X1, offsetof(struct fgraph_ret_regs, regs[1])); - DEFINE(FGRET_REGS_X2, offsetof(struct fgraph_ret_regs, regs[2])); - DEFINE(FGRET_REGS_X3, offsetof(struct fgraph_ret_regs, regs[3])); - DEFINE(FGRET_REGS_X4, offsetof(struct fgraph_ret_regs, regs[4])); - DEFINE(FGRET_REGS_X5, offsetof(struct fgraph_ret_regs, regs[5])); - DEFINE(FGRET_REGS_X6, offsetof(struct fgraph_ret_regs, regs[6])); - DEFINE(FGRET_REGS_X7, offsetof(struct fgraph_ret_regs, regs[7])); - DEFINE(FGRET_REGS_FP, offsetof(struct fgraph_ret_regs, fp)); - DEFINE(FGRET_REGS_SIZE, sizeof(struct fgraph_ret_regs)); -#endif #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS DEFINE(FTRACE_OPS_DIRECT_CALL, offsetof(struct ftrace_ops, direct_call)); #endif diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c index d9c9218fa1fd..309942b06c5b 100644 --- a/arch/arm64/kernel/cacheinfo.c +++ b/arch/arm64/kernel/cacheinfo.c @@ -101,16 +101,18 @@ int populate_cache_leaves(unsigned int cpu) unsigned int level, idx; enum cache_type type; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); - struct cacheinfo *this_leaf = this_cpu_ci->info_list; + struct cacheinfo *infos = this_cpu_ci->info_list; for (idx = 0, level = 1; level <= this_cpu_ci->num_levels && - idx < this_cpu_ci->num_leaves; idx++, level++) { + idx < this_cpu_ci->num_leaves; level++) { type = get_cache_type(level); if (type == CACHE_TYPE_SEPARATE) { - ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level); - ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level); + if (idx + 1 >= this_cpu_ci->num_leaves) + break; + ci_leaf_init(&infos[idx++], CACHE_TYPE_DATA, level); + ci_leaf_init(&infos[idx++], CACHE_TYPE_INST, level); } else { - ci_leaf_init(this_leaf++, type, level); + ci_leaf_init(&infos[idx++], type, level); } } return 0; diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 76b8dd37092a..7ce555862895 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -432,6 +432,41 @@ static const struct midr_range erratum_spec_unpriv_load_list[] = { }; #endif +#ifdef CONFIG_ARM64_ERRATUM_3194386 +static const struct midr_range erratum_spec_ssbs_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A76), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A77), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A78), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A715), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A720), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A725), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X1), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X1C), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X2), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X3), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X4), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X925), + MIDR_ALL_VERSIONS(MIDR_MICROSOFT_AZURE_COBALT_100), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N3), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V3), + {} +}; +#endif + +#ifdef CONFIG_AMPERE_ERRATUM_AC03_CPU_38 +static const struct midr_range erratum_ac03_cpu_38_list[] = { + MIDR_ALL_VERSIONS(MIDR_AMPERE1), + MIDR_ALL_VERSIONS(MIDR_AMPERE1A), + {}, +}; +#endif + const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE { @@ -729,6 +764,13 @@ const struct arm64_cpu_capabilities arm64_errata[] = { MIDR_FIXED(MIDR_CPU_VAR_REV(1,1), BIT(25)), }, #endif +#ifdef CONFIG_ARM64_ERRATUM_3194386 + { + .desc = "SSBS not fully self-synchronizing", + .capability = ARM64_WORKAROUND_SPECULATIVE_SSBS, + ERRATA_MIDR_RANGE_LIST(erratum_spec_ssbs_list), + }, +#endif #ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD { .desc = "ARM errata 2966298, 3117295", @@ -741,9 +783,17 @@ const struct arm64_cpu_capabilities arm64_errata[] = { { .desc = "AmpereOne erratum AC03_CPU_38", .capability = ARM64_WORKAROUND_AMPERE_AC03_CPU_38, - ERRATA_MIDR_ALL_VERSIONS(MIDR_AMPERE1), + ERRATA_MIDR_RANGE_LIST(erratum_ac03_cpu_38_list), }, #endif { + .desc = "Broken CNTVOFF_EL2", + .capability = ARM64_WORKAROUND_QCOM_ORYON_CNTVOFF, + ERRATA_MIDR_RANGE_LIST(((const struct midr_range[]) { + MIDR_ALL_VERSIONS(MIDR_QCOM_ORYON_X1), + {} + })), + }, + { } }; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 56583677c1f2..d561cf3b8ac7 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -75,6 +75,7 @@ #include <linux/cpu.h> #include <linux/kasan.h> #include <linux/percpu.h> +#include <linux/sched/isolation.h> #include <asm/cpu.h> #include <asm/cpufeature.h> @@ -103,6 +104,7 @@ static DECLARE_BITMAP(elf_hwcap, MAX_CPU_FEATURES) __read_mostly; COMPAT_HWCAP_LPAE) unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT; unsigned int compat_elf_hwcap2 __read_mostly; +unsigned int compat_elf_hwcap3 __read_mostly; #endif DECLARE_BITMAP(system_cpucaps, ARM64_NCAPS); @@ -228,6 +230,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_XS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_I8MM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_DGH_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_BF16_SHIFT, 4, 0), @@ -266,6 +269,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar2[] = { }; static const struct arm64_ftr_bits ftr_id_aa64isar3[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR3_EL1_FPRCVT_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR3_EL1_FAMINMAX_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -285,12 +289,14 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_FP_SHIFT, 4, ID_AA64PFR0_EL1_FP_NI), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL2_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL1_SHIFT, 4, ID_AA64PFR0_EL1_ELx_64BIT_ONLY), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL0_SHIFT, 4, ID_AA64PFR0_EL1_ELx_64BIT_ONLY), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL1_SHIFT, 4, ID_AA64PFR0_EL1_EL1_IMP), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL0_SHIFT, 4, ID_AA64PFR0_EL1_EL0_IMP), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = { + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_GCS), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_GCS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_SME_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_MPAM_frac_SHIFT, 4, 0), @@ -314,6 +320,8 @@ static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = { ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_F32MM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_F16MM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_I8MM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_SM4_SHIFT, 4, 0), @@ -326,6 +334,8 @@ static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = { ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_BitPerm_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_EltPerm_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_AES_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_SVEver_SHIFT, 4, 0), @@ -369,6 +379,16 @@ static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = { FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SF8DP4_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SF8DP2_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SBitPerm_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_AES_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SFEXPA_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_STMOP_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SMOP4_SHIFT, 1, 0), ARM64_FTR_END, }; @@ -377,6 +397,8 @@ static const struct arm64_ftr_bits ftr_id_aa64fpfr0[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8FMA_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8DP4_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8DP2_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8MM8_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8MM4_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8E4M3_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8E5M2_SHIFT, 1, 0), ARM64_FTR_END, @@ -429,6 +451,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_ECBHB_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_TIDCP1_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_AFP_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_HCX_SHIFT, 4, 0), @@ -465,6 +488,8 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { }; static const struct arm64_ftr_bits ftr_id_aa64mmfr3[] = { + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_POE), + FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_S1POE_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_S1PIE_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_TCRX_SHIFT, 4, 0), ARM64_FTR_END, @@ -681,6 +706,14 @@ static const struct arm64_ftr_bits ftr_id_dfr1[] = { ARM64_FTR_END, }; +static const struct arm64_ftr_bits ftr_mpamidr[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, MPAMIDR_EL1_PMG_MAX_SHIFT, MPAMIDR_EL1_PMG_MAX_WIDTH, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, MPAMIDR_EL1_VPMR_MAX_SHIFT, MPAMIDR_EL1_VPMR_MAX_WIDTH, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MPAMIDR_EL1_HAS_HCR_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, MPAMIDR_EL1_PARTID_MAX_SHIFT, MPAMIDR_EL1_PARTID_MAX_WIDTH, 0), + ARM64_FTR_END, +}; + /* * Common ftr bits for a 32bit register with all hidden, strict * attributes, with 4bit feature fields and a default safe value of @@ -801,6 +834,9 @@ static const struct __ftr_reg_entry { ARM64_FTR_REG(SYS_ID_AA64MMFR3_EL1, ftr_id_aa64mmfr3), ARM64_FTR_REG(SYS_ID_AA64MMFR4_EL1, ftr_id_aa64mmfr4), + /* Op1 = 0, CRn = 10, CRm = 4 */ + ARM64_FTR_REG(SYS_MPAMIDR_EL1, ftr_mpamidr), + /* Op1 = 1, CRn = 0, CRm = 0 */ ARM64_FTR_REG(SYS_GMID_EL1, ftr_gmid), @@ -986,17 +1022,16 @@ static void init_cpu_ftr_reg(u32 sys_reg, u64 new) /* Override was valid */ ftr_new = tmp; str = "forced"; - } else if (ftr_ovr == tmp) { + } else { /* Override was the safe value */ str = "already set"; } - if (str) - pr_warn("%s[%d:%d]: %s to %llx\n", - reg->name, - ftrp->shift + ftrp->width - 1, - ftrp->shift, str, - tmp & (BIT(ftrp->width) - 1)); + pr_warn("%s[%d:%d]: %s to %llx\n", + reg->name, + ftrp->shift + ftrp->width - 1, + ftrp->shift, str, + tmp & (BIT(ftrp->width) - 1)); } else if ((ftr_mask & reg->override->val) == ftr_mask) { reg->override->val &= ~ftr_mask; pr_warn("%s[%d:%d]: impossible override, ignored\n", @@ -1149,17 +1184,14 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1))) { unsigned long cpacr = cpacr_save_enable_kernel_sme(); - /* - * We mask out SMPS since even if the hardware - * supports priorities the kernel does not at present - * and we block access to them. - */ - info->reg_smidr = read_cpuid(SMIDR_EL1) & ~SMIDR_EL1_SMPS; vec_init_vq_map(ARM64_VEC_SME); cpacr_restore(cpacr); } + if (id_aa64pfr0_mpam(info->reg_id_aa64pfr0)) + init_cpu_ftr_reg(SYS_MPAMIDR_EL1, info->reg_mpamidr); + if (id_aa64pfr1_mte(info->reg_id_aa64pfr1)) init_cpu_ftr_reg(SYS_GMID_EL1, info->reg_gmid); } @@ -1402,13 +1434,6 @@ void update_cpu_features(int cpu, id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1))) { unsigned long cpacr = cpacr_save_enable_kernel_sme(); - /* - * We mask out SMPS since even if the hardware - * supports priorities the kernel does not at present - * and we block access to them. - */ - info->reg_smidr = read_cpuid(SMIDR_EL1) & ~SMIDR_EL1_SMPS; - /* Probe vector lengths */ if (!system_capabilities_finalized()) vec_update_vq_map(ARM64_VEC_SME); @@ -1416,6 +1441,11 @@ void update_cpu_features(int cpu, cpacr_restore(cpacr); } + if (id_aa64pfr0_mpam(info->reg_id_aa64pfr0)) { + taint |= check_update_ftr_reg(SYS_MPAMIDR_EL1, cpu, + info->reg_mpamidr, boot->reg_mpamidr); + } + /* * The kernel uses the LDGM/STGM instructions and the number of tags * they read/write depends on the GMID_EL1.BS field. Check that the @@ -1615,6 +1645,11 @@ const struct cpumask *system_32bit_el0_cpumask(void) return cpu_possible_mask; } +const struct cpumask *task_cpu_fallback_mask(struct task_struct *p) +{ + return __task_cpu_possible_mask(p, housekeeping_cpumask(HK_TYPE_TICK)); +} + static int __init parse_32bit_el0_param(char *str) { allow_mismatched_32bit_el0 = true; @@ -2307,6 +2342,14 @@ static void user_feature_fixup(void) if (regp) regp->user_mask &= ~ID_AA64ISAR1_EL1_BF16_MASK; } + + if (cpus_have_cap(ARM64_WORKAROUND_SPECULATIVE_SSBS)) { + struct arm64_ftr_reg *regp; + + regp = get_arm64_ftr_reg(SYS_ID_AA64PFR1_EL1); + if (regp) + regp->user_mask &= ~ID_AA64PFR1_EL1_SSBS_MASK; + } } static void elf_hwcap_fixup(void) @@ -2339,6 +2382,22 @@ static void cpu_enable_mops(const struct arm64_cpu_capabilities *__unused) sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_MSCEn); } +#ifdef CONFIG_ARM64_POE +static void cpu_enable_poe(const struct arm64_cpu_capabilities *__unused) +{ + sysreg_clear_set(REG_TCR2_EL1, 0, TCR2_EL1_E0POE); + sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_E0POE); +} +#endif + +#ifdef CONFIG_ARM64_GCS +static void cpu_enable_gcs(const struct arm64_cpu_capabilities *__unused) +{ + /* GCSPR_EL0 is always readable */ + write_sysreg_s(GCSCRE0_EL1_nTR, SYS_GCSCRE0_EL1); +} +#endif + /* Internal helper functions to match cpu capability type */ static bool cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap) @@ -2358,6 +2417,36 @@ cpucap_panic_on_conflict(const struct arm64_cpu_capabilities *cap) return !!(cap->type & ARM64_CPUCAP_PANIC_ON_CONFLICT); } +static bool +test_has_mpam(const struct arm64_cpu_capabilities *entry, int scope) +{ + if (!has_cpuid_feature(entry, scope)) + return false; + + /* Check firmware actually enabled MPAM on this cpu. */ + return (read_sysreg_s(SYS_MPAM1_EL1) & MPAM1_EL1_MPAMEN); +} + +static void +cpu_enable_mpam(const struct arm64_cpu_capabilities *entry) +{ + /* + * Access by the kernel (at EL1) should use the reserved PARTID + * which is configured unrestricted. This avoids priority-inversion + * where latency sensitive tasks have to wait for a task that has + * been throttled to release the lock. + */ + write_sysreg_s(0, SYS_MPAM1_EL1); +} + +static bool +test_has_mpam_hcr(const struct arm64_cpu_capabilities *entry, int scope) +{ + u64 idr = read_sanitised_ftr_reg(SYS_MPAMIDR_EL1); + + return idr & MPAMIDR_EL1_HAS_HCR; +} + static const struct arm64_cpu_capabilities arm64_features[] = { { .capability = ARM64_ALWAYS_BOOT, @@ -2572,6 +2661,21 @@ static const struct arm64_cpu_capabilities arm64_features[] = { ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, HAFDBS, DBM) }, #endif +#ifdef CONFIG_ARM64_HAFT + { + .desc = "Hardware managed Access Flag for Table Descriptors", + /* + * Contrary to the page/block access flag, the table access flag + * cannot be emulated in software (no access fault will occur). + * Therefore this should be used only if it's supported system + * wide. + */ + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .capability = ARM64_HAFT, + .matches = has_cpuid_feature, + ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, HAFDBS, HAFT) + }, +#endif { .desc = "CRC32 instructions", .capability = ARM64_HAS_CRC32, @@ -2855,12 +2959,46 @@ static const struct arm64_cpu_capabilities arm64_features[] = { }, #endif { + .desc = "Memory Partitioning And Monitoring", + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .capability = ARM64_MPAM, + .matches = test_has_mpam, + .cpu_enable = cpu_enable_mpam, + ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, MPAM, 1) + }, + { + .desc = "Memory Partitioning And Monitoring Virtualisation", + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .capability = ARM64_MPAM_HCR, + .matches = test_has_mpam_hcr, + }, + { .desc = "NV1", .capability = ARM64_HAS_HCR_NV1, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_nv1, ARM64_CPUID_FIELDS_NEG(ID_AA64MMFR4_EL1, E2H0, NI_NV1) }, +#ifdef CONFIG_ARM64_POE + { + .desc = "Stage-1 Permission Overlay Extension (S1POE)", + .capability = ARM64_HAS_S1POE, + .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, + .matches = has_cpuid_feature, + .cpu_enable = cpu_enable_poe, + ARM64_CPUID_FIELDS(ID_AA64MMFR3_EL1, S1POE, IMP) + }, +#endif +#ifdef CONFIG_ARM64_GCS + { + .desc = "Guarded Control Stack (GCS)", + .capability = ARM64_HAS_GCS, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .cpu_enable = cpu_enable_gcs, + .matches = has_cpuid_feature, + ARM64_CPUID_FIELDS(ID_AA64PFR1_EL1, GCS, IMP) + }, +#endif {}, }; @@ -2893,6 +3031,13 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = match, \ } +#define HWCAP_CAP_MATCH_ID(match, reg, field, min_value, cap_type, cap) \ + { \ + __HWCAP_CAP(#cap, cap_type, cap) \ + HWCAP_CPUID_MATCH(reg, field, min_value) \ + .matches = match, \ + } + #ifdef CONFIG_ARM64_PTR_AUTH static const struct arm64_cpu_capabilities ptr_auth_hwcap_addr_matches[] = { { @@ -2921,6 +3066,13 @@ static const struct arm64_cpu_capabilities ptr_auth_hwcap_gen_matches[] = { }; #endif +#ifdef CONFIG_ARM64_SVE +static bool has_sve_feature(const struct arm64_cpu_capabilities *cap, int scope) +{ + return system_supports_sve() && has_user_cpuid_feature(cap, scope); +} +#endif + static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(ID_AA64ISAR0_EL1, AES, PMULL, CAP_HWCAP, KERNEL_HWCAP_PMULL), HWCAP_CAP(ID_AA64ISAR0_EL1, AES, AES, CAP_HWCAP, KERNEL_HWCAP_AES), @@ -2939,6 +3091,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(ID_AA64ISAR0_EL1, TS, FLAGM, CAP_HWCAP, KERNEL_HWCAP_FLAGM), HWCAP_CAP(ID_AA64ISAR0_EL1, TS, FLAGM2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2), HWCAP_CAP(ID_AA64ISAR0_EL1, RNDR, IMP, CAP_HWCAP, KERNEL_HWCAP_RNG), + HWCAP_CAP(ID_AA64ISAR3_EL1, FPRCVT, IMP, CAP_HWCAP, KERNEL_HWCAP_FPRCVT), HWCAP_CAP(ID_AA64PFR0_EL1, FP, IMP, CAP_HWCAP, KERNEL_HWCAP_FP), HWCAP_CAP(ID_AA64PFR0_EL1, FP, FP16, CAP_HWCAP, KERNEL_HWCAP_FPHP), HWCAP_CAP(ID_AA64PFR0_EL1, AdvSIMD, IMP, CAP_HWCAP, KERNEL_HWCAP_ASIMD), @@ -2963,19 +3116,27 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(ID_AA64MMFR2_EL1, AT, IMP, CAP_HWCAP, KERNEL_HWCAP_USCAT), #ifdef CONFIG_ARM64_SVE HWCAP_CAP(ID_AA64PFR0_EL1, SVE, IMP, CAP_HWCAP, KERNEL_HWCAP_SVE), - HWCAP_CAP(ID_AA64ZFR0_EL1, SVEver, SVE2p1, CAP_HWCAP, KERNEL_HWCAP_SVE2P1), - HWCAP_CAP(ID_AA64ZFR0_EL1, SVEver, SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2), - HWCAP_CAP(ID_AA64ZFR0_EL1, AES, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEAES), - HWCAP_CAP(ID_AA64ZFR0_EL1, AES, PMULL128, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL), - HWCAP_CAP(ID_AA64ZFR0_EL1, BitPerm, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM), - HWCAP_CAP(ID_AA64ZFR0_EL1, B16B16, IMP, CAP_HWCAP, KERNEL_HWCAP_SVE_B16B16), - HWCAP_CAP(ID_AA64ZFR0_EL1, BF16, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEBF16), - HWCAP_CAP(ID_AA64ZFR0_EL1, BF16, EBF16, CAP_HWCAP, KERNEL_HWCAP_SVE_EBF16), - HWCAP_CAP(ID_AA64ZFR0_EL1, SHA3, IMP, CAP_HWCAP, KERNEL_HWCAP_SVESHA3), - HWCAP_CAP(ID_AA64ZFR0_EL1, SM4, IMP, CAP_HWCAP, KERNEL_HWCAP_SVESM4), - HWCAP_CAP(ID_AA64ZFR0_EL1, I8MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM), - HWCAP_CAP(ID_AA64ZFR0_EL1, F32MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM), - HWCAP_CAP(ID_AA64ZFR0_EL1, F64MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM), + HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, SVEver, SVE2p2, CAP_HWCAP, KERNEL_HWCAP_SVE2P2), + HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, SVEver, SVE2p1, CAP_HWCAP, KERNEL_HWCAP_SVE2P1), + HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, SVEver, SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2), + HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, AES, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEAES), + HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, AES, PMULL128, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL), + HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, AES, AES2, CAP_HWCAP, KERNEL_HWCAP_SVE_AES2), + HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, BitPerm, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM), + HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, B16B16, IMP, CAP_HWCAP, KERNEL_HWCAP_SVE_B16B16), + HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, B16B16, BFSCALE, CAP_HWCAP, KERNEL_HWCAP_SVE_BFSCALE), + HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, BF16, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEBF16), + HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, BF16, EBF16, CAP_HWCAP, KERNEL_HWCAP_SVE_EBF16), + HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, SHA3, IMP, CAP_HWCAP, KERNEL_HWCAP_SVESHA3), + HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, SM4, IMP, CAP_HWCAP, KERNEL_HWCAP_SVESM4), + HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, I8MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM), + HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, F32MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM), + HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, F64MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM), + HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, F16MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVE_F16MM), + HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, EltPerm, IMP, CAP_HWCAP, KERNEL_HWCAP_SVE_ELTPERM), +#endif +#ifdef CONFIG_ARM64_GCS + HWCAP_CAP(ID_AA64PFR1_EL1, GCS, IMP, CAP_HWCAP, KERNEL_HWCAP_GCS), #endif HWCAP_CAP(ID_AA64PFR1_EL1, SSBS, SSBS2, CAP_HWCAP, KERNEL_HWCAP_SSBS), #ifdef CONFIG_ARM64_BTI @@ -2992,6 +3153,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(ID_AA64MMFR0_EL1, ECV, IMP, CAP_HWCAP, KERNEL_HWCAP_ECV), HWCAP_CAP(ID_AA64MMFR1_EL1, AFP, IMP, CAP_HWCAP, KERNEL_HWCAP_AFP), HWCAP_CAP(ID_AA64ISAR2_EL1, CSSC, IMP, CAP_HWCAP, KERNEL_HWCAP_CSSC), + HWCAP_CAP(ID_AA64ISAR2_EL1, CSSC, CMPBR, CAP_HWCAP, KERNEL_HWCAP_CMPBR), HWCAP_CAP(ID_AA64ISAR2_EL1, RPRFM, IMP, CAP_HWCAP, KERNEL_HWCAP_RPRFM), HWCAP_CAP(ID_AA64ISAR2_EL1, RPRES, IMP, CAP_HWCAP, KERNEL_HWCAP_RPRES), HWCAP_CAP(ID_AA64ISAR2_EL1, WFxT, IMP, CAP_HWCAP, KERNEL_HWCAP_WFXT), @@ -3001,6 +3163,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(ID_AA64PFR1_EL1, SME, IMP, CAP_HWCAP, KERNEL_HWCAP_SME), HWCAP_CAP(ID_AA64SMFR0_EL1, FA64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64), HWCAP_CAP(ID_AA64SMFR0_EL1, LUTv2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_LUTV2), + HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2p2, CAP_HWCAP, KERNEL_HWCAP_SME2P2), HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2p1, CAP_HWCAP, KERNEL_HWCAP_SME2P1), HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2, CAP_HWCAP, KERNEL_HWCAP_SME2), HWCAP_CAP(ID_AA64SMFR0_EL1, I16I64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64), @@ -3018,13 +3181,23 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(ID_AA64SMFR0_EL1, SF8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8FMA), HWCAP_CAP(ID_AA64SMFR0_EL1, SF8DP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP4), HWCAP_CAP(ID_AA64SMFR0_EL1, SF8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP2), + HWCAP_CAP(ID_AA64SMFR0_EL1, SBitPerm, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SBITPERM), + HWCAP_CAP(ID_AA64SMFR0_EL1, AES, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_AES), + HWCAP_CAP(ID_AA64SMFR0_EL1, SFEXPA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SFEXPA), + HWCAP_CAP(ID_AA64SMFR0_EL1, STMOP, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_STMOP), + HWCAP_CAP(ID_AA64SMFR0_EL1, SMOP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SMOP4), #endif /* CONFIG_ARM64_SME */ HWCAP_CAP(ID_AA64FPFR0_EL1, F8CVT, IMP, CAP_HWCAP, KERNEL_HWCAP_F8CVT), HWCAP_CAP(ID_AA64FPFR0_EL1, F8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_F8FMA), HWCAP_CAP(ID_AA64FPFR0_EL1, F8DP4, IMP, CAP_HWCAP, KERNEL_HWCAP_F8DP4), HWCAP_CAP(ID_AA64FPFR0_EL1, F8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_F8DP2), + HWCAP_CAP(ID_AA64FPFR0_EL1, F8MM8, IMP, CAP_HWCAP, KERNEL_HWCAP_F8MM8), + HWCAP_CAP(ID_AA64FPFR0_EL1, F8MM4, IMP, CAP_HWCAP, KERNEL_HWCAP_F8MM4), HWCAP_CAP(ID_AA64FPFR0_EL1, F8E4M3, IMP, CAP_HWCAP, KERNEL_HWCAP_F8E4M3), HWCAP_CAP(ID_AA64FPFR0_EL1, F8E5M2, IMP, CAP_HWCAP, KERNEL_HWCAP_F8E5M2), +#ifdef CONFIG_ARM64_POE + HWCAP_CAP(ID_AA64MMFR3_EL1, S1POE, IMP, CAP_HWCAP, KERNEL_HWCAP_POE), +#endif {}, }; @@ -3343,7 +3516,7 @@ static void verify_hyp_capabilities(void) return; safe_mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); - mmfr0 = read_cpuid(ID_AA64MMFR0_EL1); + mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); mmfr1 = read_cpuid(ID_AA64MMFR1_EL1); /* Verify VMID bits */ @@ -3364,6 +3537,36 @@ static void verify_hyp_capabilities(void) } } +static void verify_mpam_capabilities(void) +{ + u64 cpu_idr = read_cpuid(ID_AA64PFR0_EL1); + u64 sys_idr = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); + u16 cpu_partid_max, cpu_pmg_max, sys_partid_max, sys_pmg_max; + + if (FIELD_GET(ID_AA64PFR0_EL1_MPAM_MASK, cpu_idr) != + FIELD_GET(ID_AA64PFR0_EL1_MPAM_MASK, sys_idr)) { + pr_crit("CPU%d: MPAM version mismatch\n", smp_processor_id()); + cpu_die_early(); + } + + cpu_idr = read_cpuid(MPAMIDR_EL1); + sys_idr = read_sanitised_ftr_reg(SYS_MPAMIDR_EL1); + if (FIELD_GET(MPAMIDR_EL1_HAS_HCR, cpu_idr) != + FIELD_GET(MPAMIDR_EL1_HAS_HCR, sys_idr)) { + pr_crit("CPU%d: Missing MPAM HCR\n", smp_processor_id()); + cpu_die_early(); + } + + cpu_partid_max = FIELD_GET(MPAMIDR_EL1_PARTID_MAX, cpu_idr); + cpu_pmg_max = FIELD_GET(MPAMIDR_EL1_PMG_MAX, cpu_idr); + sys_partid_max = FIELD_GET(MPAMIDR_EL1_PARTID_MAX, sys_idr); + sys_pmg_max = FIELD_GET(MPAMIDR_EL1_PMG_MAX, sys_idr); + if (cpu_partid_max < sys_partid_max || cpu_pmg_max < sys_pmg_max) { + pr_crit("CPU%d: MPAM PARTID/PMG max values are mismatched\n", smp_processor_id()); + cpu_die_early(); + } +} + /* * Run through the enabled system capabilities and enable() it on this CPU. * The capabilities were decided based on the available CPUs at the boot time. @@ -3390,6 +3593,9 @@ static void verify_local_cpu_capabilities(void) if (is_hyp_mode_available()) verify_hyp_capabilities(); + + if (system_supports_mpam()) + verify_mpam_capabilities(); } void check_local_cpu_capabilities(void) @@ -3467,6 +3673,11 @@ unsigned long cpu_get_elf_hwcap2(void) return elf_hwcap[1]; } +unsigned long cpu_get_elf_hwcap3(void) +{ + return elf_hwcap[2]; +} + static void __init setup_boot_cpu_capabilities(void) { /* @@ -3569,7 +3780,14 @@ static int enable_mismatched_32bit_el0(unsigned int cpu) static int lucky_winner = -1; struct cpuinfo_arm64 *info = &per_cpu(cpu_data, cpu); - bool cpu_32bit = id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0); + bool cpu_32bit = false; + + if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) { + if (!housekeeping_cpu(cpu, HK_TYPE_TICK)) + pr_info("Treating adaptive-ticks CPU %u as 64-bit only\n", cpu); + else + cpu_32bit = true; + } if (cpu_32bit) { cpumask_set_cpu(cpu, cpu_32bit_el0_mask); diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c deleted file mode 100644 index f372295207fb..000000000000 --- a/arch/arm64/kernel/cpuidle.c +++ /dev/null @@ -1,74 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * ARM64 CPU idle arch support - * - * Copyright (C) 2014 ARM Ltd. - * Author: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> - */ - -#include <linux/acpi.h> -#include <linux/cpuidle.h> -#include <linux/cpu_pm.h> -#include <linux/psci.h> - -#ifdef CONFIG_ACPI_PROCESSOR_IDLE - -#include <acpi/processor.h> - -#define ARM64_LPI_IS_RETENTION_STATE(arch_flags) (!(arch_flags)) - -static int psci_acpi_cpu_init_idle(unsigned int cpu) -{ - int i, count; - struct acpi_lpi_state *lpi; - struct acpi_processor *pr = per_cpu(processors, cpu); - - if (unlikely(!pr || !pr->flags.has_lpi)) - return -EINVAL; - - /* - * If the PSCI cpu_suspend function hook has not been initialized - * idle states must not be enabled, so bail out - */ - if (!psci_ops.cpu_suspend) - return -EOPNOTSUPP; - - count = pr->power.count - 1; - if (count <= 0) - return -ENODEV; - - for (i = 0; i < count; i++) { - u32 state; - - lpi = &pr->power.lpi_states[i + 1]; - /* - * Only bits[31:0] represent a PSCI power_state while - * bits[63:32] must be 0x0 as per ARM ACPI FFH Specification - */ - state = lpi->address; - if (!psci_power_state_is_valid(state)) { - pr_warn("Invalid PSCI power state %#x\n", state); - return -EINVAL; - } - } - - return 0; -} - -int acpi_processor_ffh_lpi_probe(unsigned int cpu) -{ - return psci_acpi_cpu_init_idle(cpu); -} - -__cpuidle int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi) -{ - u32 state = lpi->address; - - if (ARM64_LPI_IS_RETENTION_STATE(lpi->arch_flags)) - return CPU_PM_CPU_IDLE_ENTER_RETENTION_PARAM_RCU(psci_cpu_suspend_enter, - lpi->index, state); - else - return CPU_PM_CPU_IDLE_ENTER_PARAM_RCU(psci_cpu_suspend_enter, - lpi->index, state); -} -#endif diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 09eeaa24d456..285d7d538342 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -80,6 +80,7 @@ static const char *const hwcap_str[] = { [KERNEL_HWCAP_SB] = "sb", [KERNEL_HWCAP_PACA] = "paca", [KERNEL_HWCAP_PACG] = "pacg", + [KERNEL_HWCAP_GCS] = "gcs", [KERNEL_HWCAP_DCPODP] = "dcpodp", [KERNEL_HWCAP_SVE2] = "sve2", [KERNEL_HWCAP_SVEAES] = "sveaes", @@ -143,6 +144,22 @@ static const char *const hwcap_str[] = { [KERNEL_HWCAP_SME_SF8FMA] = "smesf8fma", [KERNEL_HWCAP_SME_SF8DP4] = "smesf8dp4", [KERNEL_HWCAP_SME_SF8DP2] = "smesf8dp2", + [KERNEL_HWCAP_POE] = "poe", + [KERNEL_HWCAP_CMPBR] = "cmpbr", + [KERNEL_HWCAP_FPRCVT] = "fprcvt", + [KERNEL_HWCAP_F8MM8] = "f8mm8", + [KERNEL_HWCAP_F8MM4] = "f8mm4", + [KERNEL_HWCAP_SVE_F16MM] = "svef16mm", + [KERNEL_HWCAP_SVE_ELTPERM] = "sveeltperm", + [KERNEL_HWCAP_SVE_AES2] = "sveaes2", + [KERNEL_HWCAP_SVE_BFSCALE] = "svebfscale", + [KERNEL_HWCAP_SVE2P2] = "sve2p2", + [KERNEL_HWCAP_SME2P2] = "sme2p2", + [KERNEL_HWCAP_SME_SBITPERM] = "smesbitperm", + [KERNEL_HWCAP_SME_AES] = "smeaes", + [KERNEL_HWCAP_SME_SFEXPA] = "smesfexpa", + [KERNEL_HWCAP_SME_STMOP] = "smestmop", + [KERNEL_HWCAP_SME_SMOP4] = "smesmop4", }; #ifdef CONFIG_COMPAT @@ -280,7 +297,7 @@ const struct seq_operations cpuinfo_op = { }; -static struct kobj_type cpuregs_kobj_type = { +static const struct kobj_type cpuregs_kobj_type = { .sysfs_ops = &kobj_sysfs_ops, }; @@ -477,6 +494,19 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) __cpuinfo_store_cpu_32bit(&info->aarch32); + if (id_aa64pfr0_mpam(info->reg_id_aa64pfr0)) + info->reg_mpamidr = read_cpuid(MPAMIDR_EL1); + + if (IS_ENABLED(CONFIG_ARM64_SME) && + id_aa64pfr1_sme(info->reg_id_aa64pfr1)) { + /* + * We mask out SMPS since even if the hardware + * supports priorities the kernel does not at present + * and we block access to them. + */ + info->reg_smidr = read_cpuid(SMIDR_EL1) & ~SMIDR_EL1_SMPS; + } + cpuinfo_detect_icache_policy(info); } diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 64f2ecbdfe5c..58f047de3e1c 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -303,7 +303,6 @@ static int call_break_hook(struct pt_regs *regs, unsigned long esr) { struct break_hook *hook; struct list_head *list; - int (*fn)(struct pt_regs *regs, unsigned long esr) = NULL; list = user_mode(regs) ? &user_break_hook : &kernel_break_hook; @@ -312,13 +311,11 @@ static int call_break_hook(struct pt_regs *regs, unsigned long esr) * entirely not preemptible, and we can use rcu list safely here. */ list_for_each_entry_rcu(hook, list, node) { - unsigned long comment = esr & ESR_ELx_BRK64_ISS_COMMENT_MASK; - - if ((comment & ~hook->mask) == hook->imm) - fn = hook->fn; + if ((esr_brk_comment(esr) & ~hook->mask) == hook->imm) + return hook->fn(regs, esr); } - return fn ? fn(regs, esr) : DBG_HOOK_ERROR; + return DBG_HOOK_ERROR; } NOKPROBE_SYMBOL(call_break_hook); @@ -443,6 +440,11 @@ void kernel_rewind_single_step(struct pt_regs *regs) set_regs_spsr_ss(regs); } +void kernel_fastforward_single_step(struct pt_regs *regs) +{ + clear_regs_spsr_ss(regs); +} + /* ptrace API */ void user_enable_single_step(struct task_struct *task) { diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 9afcc690fe73..1d25d8899dbf 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -9,7 +9,9 @@ #include <linux/efi.h> #include <linux/init.h> +#include <linux/kmemleak.h> #include <linux/screen_info.h> +#include <linux/vmalloc.h> #include <asm/efi.h> #include <asm/stacktrace.h> @@ -32,8 +34,16 @@ static __init pteval_t create_mapping_protection(efi_memory_desc_t *md) u64 attr = md->attribute; u32 type = md->type; - if (type == EFI_MEMORY_MAPPED_IO) - return PROT_DEVICE_nGnRE; + if (type == EFI_MEMORY_MAPPED_IO) { + pgprot_t prot = __pgprot(PROT_DEVICE_nGnRE); + + if (arm64_is_protected_mmio(md->phys_addr, + md->num_pages << EFI_PAGE_SHIFT)) + prot = pgprot_encrypted(prot); + else + prot = pgprot_decrypted(prot); + return pgprot_val(prot); + } if (region_is_misaligned(md)) { static bool __initdata code_is_misaligned; @@ -212,6 +222,7 @@ l: if (!p) { return -ENOMEM; } + kmemleak_not_leak(p); efi_rt_stack_top = p + THREAD_SIZE; return 0; } diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index b77a15955f28..b260ddc4d3e9 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -103,7 +103,7 @@ static void noinstr exit_to_kernel_mode(struct pt_regs *regs) static __always_inline void __enter_from_user_mode(void) { lockdep_hardirqs_off(CALLER_ADDR0); - CT_WARN_ON(ct_state() != CONTEXT_USER); + CT_WARN_ON(ct_state() != CT_STATE_USER); user_exit_irqoff(); trace_hardirqs_off_finish(); mte_disable_tco_entry(current); @@ -463,6 +463,24 @@ static void noinstr el1_bti(struct pt_regs *regs, unsigned long esr) exit_to_kernel_mode(regs); } +static void noinstr el1_gcs(struct pt_regs *regs, unsigned long esr) +{ + enter_from_kernel_mode(regs); + local_daif_inherit(regs); + do_el1_gcs(regs, esr); + local_daif_mask(); + exit_to_kernel_mode(regs); +} + +static void noinstr el1_mops(struct pt_regs *regs, unsigned long esr) +{ + enter_from_kernel_mode(regs); + local_daif_inherit(regs); + do_el1_mops(regs, esr); + local_daif_mask(); + exit_to_kernel_mode(regs); +} + static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); @@ -505,6 +523,12 @@ asmlinkage void noinstr el1h_64_sync_handler(struct pt_regs *regs) case ESR_ELx_EC_BTI: el1_bti(regs, esr); break; + case ESR_ELx_EC_GCS: + el1_gcs(regs, esr); + break; + case ESR_ELx_EC_MOPS: + el1_mops(regs, esr); + break; case ESR_ELx_EC_BREAKPT_CUR: case ESR_ELx_EC_SOFTSTP_CUR: case ESR_ELx_EC_WATCHPT_CUR: @@ -684,6 +708,14 @@ static void noinstr el0_mops(struct pt_regs *regs, unsigned long esr) exit_to_user_mode(regs); } +static void noinstr el0_gcs(struct pt_regs *regs, unsigned long esr) +{ + enter_from_user_mode(regs); + local_daif_restore(DAIF_PROCCTX); + do_el0_gcs(regs, esr); + exit_to_user_mode(regs); +} + static void noinstr el0_inv(struct pt_regs *regs, unsigned long esr) { enter_from_user_mode(regs); @@ -766,6 +798,9 @@ asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs) case ESR_ELx_EC_MOPS: el0_mops(regs, esr); break; + case ESR_ELx_EC_GCS: + el0_gcs(regs, esr); + break; case ESR_ELx_EC_BREAKPT_LOW: case ESR_ELx_EC_SOFTSTP_LOW: case ESR_ELx_EC_WATCHPT_LOW: diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S index f0c16640ef21..169ccf600066 100644 --- a/arch/arm64/kernel/entry-ftrace.S +++ b/arch/arm64/kernel/entry-ftrace.S @@ -329,24 +329,28 @@ SYM_FUNC_END(ftrace_stub_graph) * @fp is checked against the value passed by ftrace_graph_caller(). */ SYM_CODE_START(return_to_handler) - /* save return value regs */ - sub sp, sp, #FGRET_REGS_SIZE - stp x0, x1, [sp, #FGRET_REGS_X0] - stp x2, x3, [sp, #FGRET_REGS_X2] - stp x4, x5, [sp, #FGRET_REGS_X4] - stp x6, x7, [sp, #FGRET_REGS_X6] - str x29, [sp, #FGRET_REGS_FP] // parent's fp + /* Make room for ftrace_regs */ + sub sp, sp, #FREGS_SIZE + + /* Save return value regs */ + stp x0, x1, [sp, #FREGS_X0] + stp x2, x3, [sp, #FREGS_X2] + stp x4, x5, [sp, #FREGS_X4] + stp x6, x7, [sp, #FREGS_X6] + + /* Save the callsite's FP */ + str x29, [sp, #FREGS_FP] mov x0, sp - bl ftrace_return_to_handler // addr = ftrace_return_to_hander(regs); + bl ftrace_return_to_handler // addr = ftrace_return_to_hander(fregs); mov x30, x0 // restore the original return address - /* restore return value regs */ - ldp x0, x1, [sp, #FGRET_REGS_X0] - ldp x2, x3, [sp, #FGRET_REGS_X2] - ldp x4, x5, [sp, #FGRET_REGS_X4] - ldp x6, x7, [sp, #FGRET_REGS_X6] - add sp, sp, #FGRET_REGS_SIZE + /* Restore return value regs */ + ldp x0, x1, [sp, #FREGS_X0] + ldp x2, x3, [sp, #FREGS_X2] + ldp x4, x5, [sp, #FREGS_X4] + ldp x6, x7, [sp, #FREGS_X6] + add sp, sp, #FREGS_SIZE ret SYM_CODE_END(return_to_handler) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 7ef0e127b149..5ae2a34b50bd 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -25,6 +25,7 @@ #include <asm/processor.h> #include <asm/ptrace.h> #include <asm/scs.h> +#include <asm/stacktrace/frame.h> #include <asm/thread_info.h> #include <asm/asm-uaccess.h> #include <asm/unistd.h> @@ -284,15 +285,16 @@ alternative_else_nop_endif stp lr, x21, [sp, #S_LR] /* - * For exceptions from EL0, create a final frame record. - * For exceptions from EL1, create a synthetic frame record so the - * interrupted code shows up in the backtrace. + * Create a metadata frame record. The unwinder will use this to + * identify and unwind exception boundaries. */ - .if \el == 0 stp xzr, xzr, [sp, #S_STACKFRAME] + .if \el == 0 + mov x0, #FRAME_META_TYPE_FINAL .else - stp x29, x22, [sp, #S_STACKFRAME] + mov x0, #FRAME_META_TYPE_PT_REGS .endif + str x0, [sp, #S_STACKFRAME_TYPE] add x29, sp, #S_STACKFRAME #ifdef CONFIG_ARM64_SW_TTBR0_PAN @@ -315,7 +317,7 @@ alternative_if_not ARM64_HAS_GIC_PRIO_MASKING alternative_else_nop_endif mrs_s x20, SYS_ICC_PMR_EL1 - str x20, [sp, #S_PMR_SAVE] + str w20, [sp, #S_PMR] mov x20, #GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET msr_s SYS_ICC_PMR_EL1, x20 @@ -342,7 +344,7 @@ alternative_if_not ARM64_HAS_GIC_PRIO_MASKING b .Lskip_pmr_restore\@ alternative_else_nop_endif - ldr x20, [sp, #S_PMR_SAVE] + ldr w20, [sp, #S_PMR] msr_s SYS_ICC_PMR_EL1, x20 /* Ensure priority change is seen by redistributor */ diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index ebb0158997ca..2b601d88762d 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -386,7 +386,7 @@ static void task_fpsimd_load(void) * fpsimd_save_user_state() or memory corruption, we * should always record an explicit format * when we save. We always at least have the - * memory allocated for FPSMID registers so + * memory allocated for FPSIMD registers so * try that and hope for the best. */ WARN_ON_ONCE(1); @@ -535,7 +535,7 @@ static unsigned int find_supported_vector_length(enum vec_type type, #if defined(CONFIG_ARM64_SVE) && defined(CONFIG_SYSCTL) -static int vec_proc_do_default_vl(struct ctl_table *table, int write, +static int vec_proc_do_default_vl(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct vl_info *info = table->extra1; @@ -562,7 +562,7 @@ static int vec_proc_do_default_vl(struct ctl_table *table, int write, return 0; } -static struct ctl_table sve_default_vl_table[] = { +static const struct ctl_table sve_default_vl_table[] = { { .procname = "sve_default_vector_length", .mode = 0644, @@ -585,7 +585,7 @@ static int __init sve_sysctl_init(void) { return 0; } #endif /* ! (CONFIG_ARM64_SVE && CONFIG_SYSCTL) */ #if defined(CONFIG_ARM64_SME) && defined(CONFIG_SYSCTL) -static struct ctl_table sme_default_vl_table[] = { +static const struct ctl_table sme_default_vl_table[] = { { .procname = "sme_default_vector_length", .mode = 0644, @@ -1367,6 +1367,7 @@ static void sve_init_regs(void) } else { fpsimd_to_sve(current); current->thread.fp_type = FP_STATE_SVE; + fpsimd_flush_task_state(current); } } @@ -1535,6 +1536,27 @@ static void fpsimd_save_kernel_state(struct task_struct *task) task->thread.kernel_fpsimd_cpu = smp_processor_id(); } +/* + * Invalidate any task's FPSIMD state that is present on this cpu. + * The FPSIMD context should be acquired with get_cpu_fpsimd_context() + * before calling this function. + */ +static void fpsimd_flush_cpu_state(void) +{ + WARN_ON(!system_supports_fpsimd()); + __this_cpu_write(fpsimd_last_state.st, NULL); + + /* + * Leaving streaming mode enabled will cause issues for any kernel + * NEON and leaving streaming mode or ZA enabled may increase power + * consumption. + */ + if (system_supports_sme()) + sme_smstop(); + + set_thread_flag(TIF_FOREIGN_FPSTATE); +} + void fpsimd_thread_switch(struct task_struct *next) { bool wrong_task, wrong_cpu; @@ -1552,7 +1574,7 @@ void fpsimd_thread_switch(struct task_struct *next) if (test_tsk_thread_flag(next, TIF_KERNEL_FPSTATE)) { fpsimd_load_kernel_state(next); - set_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE); + fpsimd_flush_cpu_state(); } else { /* * Fix up TIF_FOREIGN_FPSTATE to correctly describe next's @@ -1843,27 +1865,6 @@ void fpsimd_flush_task_state(struct task_struct *t) } /* - * Invalidate any task's FPSIMD state that is present on this cpu. - * The FPSIMD context should be acquired with get_cpu_fpsimd_context() - * before calling this function. - */ -static void fpsimd_flush_cpu_state(void) -{ - WARN_ON(!system_supports_fpsimd()); - __this_cpu_write(fpsimd_last_state.st, NULL); - - /* - * Leaving streaming mode enabled will cause issues for any kernel - * NEON and leaving streaming mode or ZA enabled may increase power - * consumption. - */ - if (system_supports_sme()) - sme_smstop(); - - set_thread_flag(TIF_FOREIGN_FPSTATE); -} - -/* * Save the FPSIMD state to memory and invalidate cpu view. * This function must be called with preemption disabled. */ diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index a650f5e11fc5..d7c0d023dfe5 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -15,7 +15,7 @@ #include <asm/debug-monitors.h> #include <asm/ftrace.h> #include <asm/insn.h> -#include <asm/patching.h> +#include <asm/text-patching.h> #ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS struct fregs_offset { @@ -23,10 +23,10 @@ struct fregs_offset { int offset; }; -#define FREGS_OFFSET(n, field) \ -{ \ - .name = n, \ - .offset = offsetof(struct ftrace_regs, field), \ +#define FREGS_OFFSET(n, field) \ +{ \ + .name = n, \ + .offset = offsetof(struct __arch_ftrace_regs, field), \ } static const struct fregs_offset fregs_offsets[] = { @@ -143,6 +143,69 @@ unsigned long ftrace_call_adjust(unsigned long addr) return addr; } +/* Convert fentry_ip to the symbol address without kallsyms */ +unsigned long arch_ftrace_get_symaddr(unsigned long fentry_ip) +{ + u32 insn; + + /* + * When using patchable-function-entry without pre-function NOPS, ftrace + * entry is the address of the first NOP after the function entry point. + * + * The compiler has either generated: + * + * func+00: func: NOP // To be patched to MOV X9, LR + * func+04: NOP // To be patched to BL <caller> + * + * Or: + * + * func-04: BTI C + * func+00: func: NOP // To be patched to MOV X9, LR + * func+04: NOP // To be patched to BL <caller> + * + * The fentry_ip is the address of `BL <caller>` which is at `func + 4` + * bytes in either case. + */ + if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS)) + return fentry_ip - AARCH64_INSN_SIZE; + + /* + * When using patchable-function-entry with pre-function NOPs, BTI is + * a bit different. + * + * func+00: func: NOP // To be patched to MOV X9, LR + * func+04: NOP // To be patched to BL <caller> + * + * Or: + * + * func+00: func: BTI C + * func+04: NOP // To be patched to MOV X9, LR + * func+08: NOP // To be patched to BL <caller> + * + * The fentry_ip is the address of `BL <caller>` which is at either + * `func + 4` or `func + 8` depends on whether there is a BTI. + */ + + /* If there is no BTI, the func address should be one instruction before. */ + if (!IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) + return fentry_ip - AARCH64_INSN_SIZE; + + /* We want to be extra safe in case entry ip is on the page edge, + * but otherwise we need to avoid get_kernel_nofault()'s overhead. + */ + if ((fentry_ip & ~PAGE_MASK) < AARCH64_INSN_SIZE * 2) { + if (get_kernel_nofault(insn, (u32 *)(fentry_ip - AARCH64_INSN_SIZE * 2))) + return 0; + } else { + insn = *(u32 *)(fentry_ip - AARCH64_INSN_SIZE * 2); + } + + if (aarch64_insn_is_bti(le32_to_cpu((__le32)insn))) + return fentry_ip - AARCH64_INSN_SIZE * 2; + + return fentry_ip - AARCH64_INSN_SIZE; +} + /* * Replace a single instruction, which may be a branch or NOP. * If @validate == true, a replaced instruction is checked against 'old'. @@ -481,7 +544,20 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs) { - prepare_ftrace_return(ip, &fregs->lr, fregs->fp); + unsigned long return_hooker = (unsigned long)&return_to_handler; + unsigned long frame_pointer = arch_ftrace_regs(fregs)->fp; + unsigned long *parent = &arch_ftrace_regs(fregs)->lr; + unsigned long old; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + return; + + old = *parent; + + if (!function_graph_enter_regs(old, ip, frame_pointer, + (void *)frame_pointer, fregs)) { + *parent = return_hooker; + } } #else /* diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index cb68adcabe07..5ab1970ee543 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -32,6 +32,7 @@ #include <asm/scs.h> #include <asm/smp.h> #include <asm/sysreg.h> +#include <asm/stacktrace/frame.h> #include <asm/thread_info.h> #include <asm/virt.h> @@ -199,6 +200,8 @@ SYM_CODE_END(preserve_boot_args) sub sp, sp, #PT_REGS_SIZE stp xzr, xzr, [sp, #S_STACKFRAME] + mov \tmp1, #FRAME_META_TYPE_FINAL + str \tmp1, [sp, #S_STACKFRAME_TYPE] add x29, sp, #S_STACKFRAME scs_load_current diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 02870beb271e..18749e9a6c2d 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -266,9 +266,15 @@ static int swsusp_mte_save_tags(void) max_zone_pfn = zone_end_pfn(zone); for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) { struct page *page = pfn_to_online_page(pfn); + struct folio *folio; if (!page) continue; + folio = page_folio(page); + + if (folio_test_hugetlb(folio) && + !folio_test_hugetlb_mte_tagged(folio)) + continue; if (!page_mte_tagged(page)) continue; @@ -407,7 +413,7 @@ int swsusp_arch_resume(void) void *, phys_addr_t, phys_addr_t); struct trans_pgd_info trans_info = { .trans_alloc_page = hibernate_page_alloc, - .trans_alloc_arg = (void *)GFP_ATOMIC, + .trans_alloc_arg = (__force void *)GFP_ATOMIC, }; /* diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 2f5755192c2b..722ac45f9f7b 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -655,7 +655,7 @@ static int breakpoint_handler(unsigned long unused, unsigned long esr, perf_bp_event(bp, regs); /* Do we need to handle the stepping? */ - if (uses_default_overflow_handler(bp)) + if (is_default_overflow_handler(bp)) step = 1; unlock: rcu_read_unlock(); @@ -734,7 +734,7 @@ static u64 get_distance_from_watchpoint(unsigned long addr, u64 val, static int watchpoint_report(struct perf_event *wp, unsigned long addr, struct pt_regs *regs) { - int step = uses_default_overflow_handler(wp); + int step = is_default_overflow_handler(wp); struct arch_hw_breakpoint *info = counter_arch_bp(wp); info->trigger = addr; diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index 65f76064c86b..ae990da1eae5 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -114,8 +114,8 @@ SYM_CODE_START_LOCAL(__finalise_el2) // Use EL2 translations for SPE & TRBE and disable access from EL1 mrs x0, mdcr_el2 - bic x0, x0, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT) - bic x0, x0, #(MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT) + bic x0, x0, #MDCR_EL2_E2PB_MASK + bic x0, x0, #MDCR_EL2_E2TB_MASK msr mdcr_el2, x0 // Transfer the MM state from EL1 to EL2 diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index ba4f8f7d6a91..ef3a69cc398e 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -105,10 +105,8 @@ KVM_NVHE_ALIAS(__hyp_stub_vectors); KVM_NVHE_ALIAS(vgic_v2_cpuif_trap); KVM_NVHE_ALIAS(vgic_v3_cpuif_trap); -#ifdef CONFIG_ARM64_PSEUDO_NMI -/* Static key checked in GIC_PRIO_IRQOFF. */ -KVM_NVHE_ALIAS(gic_nonsecure_priorities); -#endif +/* Static key which is set if CNTVOFF_EL2 is unusable */ +KVM_NVHE_ALIAS(broken_cntvoff_key); /* EL2 exception handling */ KVM_NVHE_ALIAS(__start___kvm_ex_table); diff --git a/arch/arm64/kernel/io.c b/arch/arm64/kernel/io.c index aa7a4ec6a3ae..fe86ada23c7d 100644 --- a/arch/arm64/kernel/io.c +++ b/arch/arm64/kernel/io.c @@ -10,88 +10,43 @@ #include <linux/io.h> /* - * Copy data from IO memory space to "real" memory space. + * This generates a memcpy that works on a from/to address which is aligned to + * bits. Count is in terms of the number of bits sized quantities to copy. It + * optimizes to use the STR groupings when possible so that it is WC friendly. */ -void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count) +#define memcpy_toio_aligned(to, from, count, bits) \ + ({ \ + volatile u##bits __iomem *_to = to; \ + const u##bits *_from = from; \ + size_t _count = count; \ + const u##bits *_end_from = _from + ALIGN_DOWN(_count, 8); \ + \ + for (; _from < _end_from; _from += 8, _to += 8) \ + __const_memcpy_toio_aligned##bits(_to, _from, 8); \ + if ((_count % 8) >= 4) { \ + __const_memcpy_toio_aligned##bits(_to, _from, 4); \ + _from += 4; \ + _to += 4; \ + } \ + if ((_count % 4) >= 2) { \ + __const_memcpy_toio_aligned##bits(_to, _from, 2); \ + _from += 2; \ + _to += 2; \ + } \ + if (_count % 2) \ + __const_memcpy_toio_aligned##bits(_to, _from, 1); \ + }) + +void __iowrite64_copy_full(void __iomem *to, const void *from, size_t count) { - while (count && !IS_ALIGNED((unsigned long)from, 8)) { - *(u8 *)to = __raw_readb(from); - from++; - to++; - count--; - } - - while (count >= 8) { - *(u64 *)to = __raw_readq(from); - from += 8; - to += 8; - count -= 8; - } - - while (count) { - *(u8 *)to = __raw_readb(from); - from++; - to++; - count--; - } + memcpy_toio_aligned(to, from, count, 64); + dgh(); } -EXPORT_SYMBOL(__memcpy_fromio); +EXPORT_SYMBOL(__iowrite64_copy_full); -/* - * Copy data from "real" memory space to IO memory space. - */ -void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count) +void __iowrite32_copy_full(void __iomem *to, const void *from, size_t count) { - while (count && !IS_ALIGNED((unsigned long)to, 8)) { - __raw_writeb(*(u8 *)from, to); - from++; - to++; - count--; - } - - while (count >= 8) { - __raw_writeq(*(u64 *)from, to); - from += 8; - to += 8; - count -= 8; - } - - while (count) { - __raw_writeb(*(u8 *)from, to); - from++; - to++; - count--; - } -} -EXPORT_SYMBOL(__memcpy_toio); - -/* - * "memset" on IO memory space. - */ -void __memset_io(volatile void __iomem *dst, int c, size_t count) -{ - u64 qc = (u8)c; - - qc |= qc << 8; - qc |= qc << 16; - qc |= qc << 32; - - while (count && !IS_ALIGNED((unsigned long)dst, 8)) { - __raw_writeb(c, dst); - dst++; - count--; - } - - while (count >= 8) { - __raw_writeq(qc, dst); - dst += 8; - count -= 8; - } - - while (count) { - __raw_writeb(c, dst); - dst++; - count--; - } + memcpy_toio_aligned(to, from, count, 32); + dgh(); } -EXPORT_SYMBOL(__memset_io); +EXPORT_SYMBOL(__iowrite32_copy_full); diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c index faf88ec9c48e..b345425193d2 100644 --- a/arch/arm64/kernel/jump_label.c +++ b/arch/arm64/kernel/jump_label.c @@ -7,11 +7,12 @@ */ #include <linux/kernel.h> #include <linux/jump_label.h> +#include <linux/smp.h> #include <asm/insn.h> -#include <asm/patching.h> +#include <asm/text-patching.h> -void arch_jump_label_transform(struct jump_entry *entry, - enum jump_label_type type) +bool arch_jump_label_transform_queue(struct jump_entry *entry, + enum jump_label_type type) { void *addr = (void *)jump_entry_code(entry); u32 insn; @@ -25,4 +26,10 @@ void arch_jump_label_transform(struct jump_entry *entry, } aarch64_insn_patch_text_nosync(addr, insn); + return true; +} + +void arch_jump_label_transform_apply(void) +{ + kick_all_cpus_sync(); } diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c index 4e1f983df3d1..f3c4d3a8a20f 100644 --- a/arch/arm64/kernel/kgdb.c +++ b/arch/arm64/kernel/kgdb.c @@ -17,7 +17,7 @@ #include <asm/debug-monitors.h> #include <asm/insn.h> -#include <asm/patching.h> +#include <asm/text-patching.h> #include <asm/traps.h> struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index 82e2203d86a3..6f121a0164a4 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -207,37 +207,6 @@ void machine_kexec(struct kimage *kimage) BUG(); /* Should never get here. */ } -static void machine_kexec_mask_interrupts(void) -{ - unsigned int i; - struct irq_desc *desc; - - for_each_irq_desc(i, desc) { - struct irq_chip *chip; - int ret; - - chip = irq_desc_get_chip(desc); - if (!chip) - continue; - - /* - * First try to remove the active state. If this - * fails, try to EOI the interrupt. - */ - ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false); - - if (ret && irqd_irq_inprogress(&desc->irq_data) && - chip->irq_eoi) - chip->irq_eoi(&desc->irq_data); - - if (chip->irq_mask) - chip->irq_mask(&desc->irq_data); - - if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data)) - chip->irq_disable(&desc->irq_data); - } -} - /** * machine_crash_shutdown - shutdown non-crashing cpus and save registers */ diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 47e0be610bb6..06bb680bfe97 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -12,144 +12,18 @@ #include <linux/bitops.h> #include <linux/elf.h> #include <linux/ftrace.h> -#include <linux/gfp.h> #include <linux/kasan.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/moduleloader.h> #include <linux/random.h> #include <linux/scs.h> -#include <linux/vmalloc.h> #include <asm/alternative.h> #include <asm/insn.h> #include <asm/scs.h> #include <asm/sections.h> -static u64 module_direct_base __ro_after_init = 0; -static u64 module_plt_base __ro_after_init = 0; - -/* - * Choose a random page-aligned base address for a window of 'size' bytes which - * entirely contains the interval [start, end - 1]. - */ -static u64 __init random_bounding_box(u64 size, u64 start, u64 end) -{ - u64 max_pgoff, pgoff; - - if ((end - start) >= size) - return 0; - - max_pgoff = (size - (end - start)) / PAGE_SIZE; - pgoff = get_random_u32_inclusive(0, max_pgoff); - - return start - pgoff * PAGE_SIZE; -} - -/* - * Modules may directly reference data and text anywhere within the kernel - * image and other modules. References using PREL32 relocations have a +/-2G - * range, and so we need to ensure that the entire kernel image and all modules - * fall within a 2G window such that these are always within range. - * - * Modules may directly branch to functions and code within the kernel text, - * and to functions and code within other modules. These branches will use - * CALL26/JUMP26 relocations with a +/-128M range. Without PLTs, we must ensure - * that the entire kernel text and all module text falls within a 128M window - * such that these are always within range. With PLTs, we can expand this to a - * 2G window. - * - * We chose the 128M region to surround the entire kernel image (rather than - * just the text) as using the same bounds for the 128M and 2G regions ensures - * by construction that we never select a 128M region that is not a subset of - * the 2G region. For very large and unusual kernel configurations this means - * we may fall back to PLTs where they could have been avoided, but this keeps - * the logic significantly simpler. - */ -static int __init module_init_limits(void) -{ - u64 kernel_end = (u64)_end; - u64 kernel_start = (u64)_text; - u64 kernel_size = kernel_end - kernel_start; - - /* - * The default modules region is placed immediately below the kernel - * image, and is large enough to use the full 2G relocation range. - */ - BUILD_BUG_ON(KIMAGE_VADDR != MODULES_END); - BUILD_BUG_ON(MODULES_VSIZE < SZ_2G); - - if (!kaslr_enabled()) { - if (kernel_size < SZ_128M) - module_direct_base = kernel_end - SZ_128M; - if (kernel_size < SZ_2G) - module_plt_base = kernel_end - SZ_2G; - } else { - u64 min = kernel_start; - u64 max = kernel_end; - - if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) { - pr_info("2G module region forced by RANDOMIZE_MODULE_REGION_FULL\n"); - } else { - module_direct_base = random_bounding_box(SZ_128M, min, max); - if (module_direct_base) { - min = module_direct_base; - max = module_direct_base + SZ_128M; - } - } - - module_plt_base = random_bounding_box(SZ_2G, min, max); - } - - pr_info("%llu pages in range for non-PLT usage", - module_direct_base ? (SZ_128M - kernel_size) / PAGE_SIZE : 0); - pr_info("%llu pages in range for PLT usage", - module_plt_base ? (SZ_2G - kernel_size) / PAGE_SIZE : 0); - - return 0; -} -subsys_initcall(module_init_limits); - -void *module_alloc(unsigned long size) -{ - void *p = NULL; - - /* - * Where possible, prefer to allocate within direct branch range of the - * kernel such that no PLTs are necessary. - */ - if (module_direct_base) { - p = __vmalloc_node_range(size, MODULE_ALIGN, - module_direct_base, - module_direct_base + SZ_128M, - GFP_KERNEL | __GFP_NOWARN, - PAGE_KERNEL, 0, NUMA_NO_NODE, - __builtin_return_address(0)); - } - - if (!p && module_plt_base) { - p = __vmalloc_node_range(size, MODULE_ALIGN, - module_plt_base, - module_plt_base + SZ_2G, - GFP_KERNEL | __GFP_NOWARN, - PAGE_KERNEL, 0, NUMA_NO_NODE, - __builtin_return_address(0)); - } - - if (!p) { - pr_warn_ratelimited("%s: unable to allocate memory\n", - __func__); - } - - if (p && (kasan_alloc_module_shadow(p, size, GFP_KERNEL) < 0)) { - vfree(p); - return NULL; - } - - /* Memory is intended to be executable, reset the pointer tag. */ - return kasan_reset_tag(p); -} - enum aarch64_reloc_op { RELOC_OP_NONE, RELOC_OP_ABS, @@ -588,14 +462,20 @@ int module_finalize(const Elf_Ehdr *hdr, struct module *me) { const Elf_Shdr *s; + int ret; + s = find_section(hdr, sechdrs, ".altinstructions"); if (s) apply_alternatives_module((void *)s->sh_addr, s->sh_size); if (scs_is_dynamic()) { s = find_section(hdr, sechdrs, ".init.eh_frame"); - if (s) - __pi_scs_patch((void *)s->sh_addr, s->sh_size); + if (s) { + ret = __pi_scs_patch((void *)s->sh_addr, s->sh_size); + if (ret) + pr_err("module %s: error occurred during dynamic SCS patching (%d)\n", + me->name, ret); + } } return module_init_ftrace_plt(hdr, sechdrs, me); diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index dcdcccd40891..2fbfd27ff5f2 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -38,7 +38,24 @@ EXPORT_SYMBOL_GPL(mte_async_or_asymm_mode); void mte_sync_tags(pte_t pte, unsigned int nr_pages) { struct page *page = pte_page(pte); - unsigned int i; + struct folio *folio = page_folio(page); + unsigned long i; + + if (folio_test_hugetlb(folio)) { + unsigned long nr = folio_nr_pages(folio); + + /* Hugetlb MTE flags are set for head page only */ + if (folio_try_hugetlb_mte_tagging(folio)) { + for (i = 0; i < nr; i++, page++) + mte_clear_page_tags(page_address(page)); + folio_set_hugetlb_mte_tagged(folio); + } + + /* ensure the tags are visible before the PTE is set */ + smp_wmb(); + + return; + } /* if PG_mte_tagged is set, tags have already been initialised */ for (i = 0; i < nr_pages; i++, page++) { @@ -410,6 +427,7 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr, void *maddr; struct page *page = get_user_page_vma_remote(mm, addr, gup_flags, &vma); + struct folio *folio; if (IS_ERR(page)) { err = PTR_ERR(page); @@ -428,7 +446,12 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr, put_page(page); break; } - WARN_ON_ONCE(!page_mte_tagged(page)); + + folio = page_folio(page); + if (folio_test_hugetlb(folio)) + WARN_ON_ONCE(!folio_test_hugetlb_mte_tagged(folio)); + else + WARN_ON_ONCE(!page_mte_tagged(page)); /* limit access to the end of the page */ offset = offset_in_page(addr); @@ -582,12 +605,9 @@ subsys_initcall(register_mte_tcf_preferred_sysctl); size_t mte_probe_user_range(const char __user *uaddr, size_t size) { const char __user *end = uaddr + size; - int err = 0; char val; - __raw_get_user(val, uaddr, err); - if (err) - return size; + __raw_get_user(val, uaddr, efault); uaddr = PTR_ALIGN(uaddr, MTE_GRANULE_SIZE); while (uaddr < end) { @@ -595,12 +615,13 @@ size_t mte_probe_user_range(const char __user *uaddr, size_t size) * A read is sufficient for mte, the caller should have probed * for the pte write permission if required. */ - __raw_get_user(val, uaddr, err); - if (err) - return end - uaddr; + __raw_get_user(val, uaddr, efault); uaddr += MTE_GRANULE_SIZE; } (void)val; return 0; + +efault: + return end - uaddr; } diff --git a/arch/arm64/kernel/patching.c b/arch/arm64/kernel/patching.c index 255534930368..1041bc67a3ee 100644 --- a/arch/arm64/kernel/patching.c +++ b/arch/arm64/kernel/patching.c @@ -10,7 +10,7 @@ #include <asm/fixmap.h> #include <asm/insn.h> #include <asm/kprobes.h> -#include <asm/patching.h> +#include <asm/text-patching.h> #include <asm/sections.h> static DEFINE_RAW_SPINLOCK(patch_lock); @@ -30,20 +30,17 @@ static bool is_image_text(unsigned long addr) static void __kprobes *patch_map(void *addr, int fixmap) { - unsigned long uintaddr = (uintptr_t) addr; - bool image = is_image_text(uintaddr); - struct page *page; + phys_addr_t phys; - if (image) - page = phys_to_page(__pa_symbol(addr)); - else if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) - page = vmalloc_to_page(addr); - else - return addr; + if (is_image_text((unsigned long)addr)) { + phys = __pa_symbol(addr); + } else { + struct page *page = vmalloc_to_page(addr); + BUG_ON(!page); + phys = page_to_phys(page) + offset_in_page(addr); + } - BUG_ON(!page); - return (void *)set_fixmap_offset(fixmap, page_to_phys(page) + - (uintaddr & ~PAGE_MASK)); + return (void *)set_fixmap_offset(fixmap, phys); } static void __kprobes patch_unmap(int fixmap) diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c index f872c57e9909..fd9a7bed83ce 100644 --- a/arch/arm64/kernel/pci.c +++ b/arch/arm64/kernel/pci.c @@ -6,28 +6,7 @@ * Copyright (C) 2014 ARM Ltd. */ -#include <linux/acpi.h> -#include <linux/init.h> -#include <linux/io.h> -#include <linux/kernel.h> -#include <linux/mm.h> #include <linux/pci.h> -#include <linux/pci-acpi.h> -#include <linux/pci-ecam.h> -#include <linux/slab.h> - -#ifdef CONFIG_ACPI -/* - * Try to assign the IRQ number when probing a new device - */ -int pcibios_alloc_irq(struct pci_dev *dev) -{ - if (!acpi_disabled) - acpi_pci_irq_enable(dev); - - return 0; -} -#endif /* * raw_pci_read/write - Platform-specific PCI config space access. @@ -61,173 +40,3 @@ int pcibus_to_node(struct pci_bus *bus) EXPORT_SYMBOL(pcibus_to_node); #endif - -#ifdef CONFIG_ACPI - -struct acpi_pci_generic_root_info { - struct acpi_pci_root_info common; - struct pci_config_window *cfg; /* config space mapping */ -}; - -int acpi_pci_bus_find_domain_nr(struct pci_bus *bus) -{ - struct pci_config_window *cfg = bus->sysdata; - struct acpi_device *adev = to_acpi_device(cfg->parent); - struct acpi_pci_root *root = acpi_driver_data(adev); - - return root->segment; -} - -int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge) -{ - struct pci_config_window *cfg; - struct acpi_device *adev; - struct device *bus_dev; - - if (acpi_disabled) - return 0; - - cfg = bridge->bus->sysdata; - - /* - * On Hyper-V there is no corresponding ACPI device for a root bridge, - * therefore ->parent is set as NULL by the driver. And set 'adev' as - * NULL in this case because there is no proper ACPI device. - */ - if (!cfg->parent) - adev = NULL; - else - adev = to_acpi_device(cfg->parent); - - bus_dev = &bridge->bus->dev; - - ACPI_COMPANION_SET(&bridge->dev, adev); - set_dev_node(bus_dev, acpi_get_node(acpi_device_handle(adev))); - - return 0; -} - -static int pci_acpi_root_prepare_resources(struct acpi_pci_root_info *ci) -{ - struct resource_entry *entry, *tmp; - int status; - - status = acpi_pci_probe_root_resources(ci); - resource_list_for_each_entry_safe(entry, tmp, &ci->resources) { - if (!(entry->res->flags & IORESOURCE_WINDOW)) - resource_list_destroy_entry(entry); - } - return status; -} - -/* - * Lookup the bus range for the domain in MCFG, and set up config space - * mapping. - */ -static struct pci_config_window * -pci_acpi_setup_ecam_mapping(struct acpi_pci_root *root) -{ - struct device *dev = &root->device->dev; - struct resource *bus_res = &root->secondary; - u16 seg = root->segment; - const struct pci_ecam_ops *ecam_ops; - struct resource cfgres; - struct acpi_device *adev; - struct pci_config_window *cfg; - int ret; - - ret = pci_mcfg_lookup(root, &cfgres, &ecam_ops); - if (ret) { - dev_err(dev, "%04x:%pR ECAM region not found\n", seg, bus_res); - return NULL; - } - - adev = acpi_resource_consumer(&cfgres); - if (adev) - dev_info(dev, "ECAM area %pR reserved by %s\n", &cfgres, - dev_name(&adev->dev)); - else - dev_warn(dev, FW_BUG "ECAM area %pR not reserved in ACPI namespace\n", - &cfgres); - - cfg = pci_ecam_create(dev, &cfgres, bus_res, ecam_ops); - if (IS_ERR(cfg)) { - dev_err(dev, "%04x:%pR error %ld mapping ECAM\n", seg, bus_res, - PTR_ERR(cfg)); - return NULL; - } - - return cfg; -} - -/* release_info: free resources allocated by init_info */ -static void pci_acpi_generic_release_info(struct acpi_pci_root_info *ci) -{ - struct acpi_pci_generic_root_info *ri; - - ri = container_of(ci, struct acpi_pci_generic_root_info, common); - pci_ecam_free(ri->cfg); - kfree(ci->ops); - kfree(ri); -} - -/* Interface called from ACPI code to setup PCI host controller */ -struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) -{ - struct acpi_pci_generic_root_info *ri; - struct pci_bus *bus, *child; - struct acpi_pci_root_ops *root_ops; - struct pci_host_bridge *host; - - ri = kzalloc(sizeof(*ri), GFP_KERNEL); - if (!ri) - return NULL; - - root_ops = kzalloc(sizeof(*root_ops), GFP_KERNEL); - if (!root_ops) { - kfree(ri); - return NULL; - } - - ri->cfg = pci_acpi_setup_ecam_mapping(root); - if (!ri->cfg) { - kfree(ri); - kfree(root_ops); - return NULL; - } - - root_ops->release_info = pci_acpi_generic_release_info; - root_ops->prepare_resources = pci_acpi_root_prepare_resources; - root_ops->pci_ops = (struct pci_ops *)&ri->cfg->ops->pci_ops; - bus = acpi_pci_root_create(root, root_ops, &ri->common, ri->cfg); - if (!bus) - return NULL; - - /* If we must preserve the resource configuration, claim now */ - host = pci_find_host_bridge(bus); - if (host->preserve_config) - pci_bus_claim_resources(bus); - - /* - * Assign whatever was left unassigned. If we didn't claim above, - * this will reassign everything. - */ - pci_assign_unassigned_root_bus_resources(bus); - - list_for_each_entry(child, &bus->children, node) - pcie_bus_configure_settings(child); - - return bus; -} - -void pcibios_add_bus(struct pci_bus *bus) -{ - acpi_pci_add_bus(bus); -} - -void pcibios_remove_bus(struct pci_bus *bus) -{ - acpi_pci_remove_bus(bus); -} - -#endif diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c index 6d157f32187b..9b7f26b128b5 100644 --- a/arch/arm64/kernel/perf_callchain.c +++ b/arch/arm64/kernel/perf_callchain.c @@ -10,94 +10,12 @@ #include <asm/pointer_auth.h> -struct frame_tail { - struct frame_tail __user *fp; - unsigned long lr; -} __attribute__((packed)); - -/* - * Get the return address for a single stackframe and return a pointer to the - * next frame tail. - */ -static struct frame_tail __user * -user_backtrace(struct frame_tail __user *tail, - struct perf_callchain_entry_ctx *entry) -{ - struct frame_tail buftail; - unsigned long err; - unsigned long lr; - - /* Also check accessibility of one struct frame_tail beyond */ - if (!access_ok(tail, sizeof(buftail))) - return NULL; - - pagefault_disable(); - err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail)); - pagefault_enable(); - - if (err) - return NULL; - - lr = ptrauth_strip_user_insn_pac(buftail.lr); - - perf_callchain_store(entry, lr); - - /* - * Frame pointers should strictly progress back up the stack - * (towards higher addresses). - */ - if (tail >= buftail.fp) - return NULL; - - return buftail.fp; -} - -#ifdef CONFIG_COMPAT -/* - * The registers we're interested in are at the end of the variable - * length saved register structure. The fp points at the end of this - * structure so the address of this struct is: - * (struct compat_frame_tail *)(xxx->fp)-1 - * - * This code has been adapted from the ARM OProfile support. - */ -struct compat_frame_tail { - compat_uptr_t fp; /* a (struct compat_frame_tail *) in compat mode */ - u32 sp; - u32 lr; -} __attribute__((packed)); - -static struct compat_frame_tail __user * -compat_user_backtrace(struct compat_frame_tail __user *tail, - struct perf_callchain_entry_ctx *entry) +static bool callchain_trace(void *data, unsigned long pc) { - struct compat_frame_tail buftail; - unsigned long err; - - /* Also check accessibility of one struct frame_tail beyond */ - if (!access_ok(tail, sizeof(buftail))) - return NULL; - - pagefault_disable(); - err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail)); - pagefault_enable(); - - if (err) - return NULL; - - perf_callchain_store(entry, buftail.lr); - - /* - * Frame pointers should strictly progress back up the stack - * (towards higher addresses). - */ - if (tail + 1 >= (struct compat_frame_tail __user *) - compat_ptr(buftail.fp)) - return NULL; + struct perf_callchain_entry_ctx *entry = data; - return (struct compat_frame_tail __user *)compat_ptr(buftail.fp) - 1; + return perf_callchain_store(entry, pc) == 0; } -#endif /* CONFIG_COMPAT */ void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) @@ -107,35 +25,7 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry, return; } - perf_callchain_store(entry, regs->pc); - - if (!compat_user_mode(regs)) { - /* AARCH64 mode */ - struct frame_tail __user *tail; - - tail = (struct frame_tail __user *)regs->regs[29]; - - while (entry->nr < entry->max_stack && - tail && !((unsigned long)tail & 0x7)) - tail = user_backtrace(tail, entry); - } else { -#ifdef CONFIG_COMPAT - /* AARCH32 compat mode */ - struct compat_frame_tail __user *tail; - - tail = (struct compat_frame_tail __user *)regs->compat_fp - 1; - - while ((entry->nr < entry->max_stack) && - tail && !((unsigned long)tail & 0x3)) - tail = compat_user_backtrace(tail, entry); -#endif - } -} - -static bool callchain_trace(void *data, unsigned long pc) -{ - struct perf_callchain_entry_ctx *entry = data; - return perf_callchain_store(entry, pc) == 0; + arch_stack_walk_user(callchain_trace, entry, regs); } void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, @@ -148,31 +38,3 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, arch_stack_walk(callchain_trace, entry, current, regs); } - -unsigned long perf_instruction_pointer(struct pt_regs *regs) -{ - if (perf_guest_state()) - return perf_guest_get_ip(); - - return instruction_pointer(regs); -} - -unsigned long perf_misc_flags(struct pt_regs *regs) -{ - unsigned int guest_state = perf_guest_state(); - int misc = 0; - - if (guest_state) { - if (guest_state & PERF_GUEST_USER) - misc |= PERF_RECORD_MISC_GUEST_USER; - else - misc |= PERF_RECORD_MISC_GUEST_KERNEL; - } else { - if (user_mode(regs)) - misc |= PERF_RECORD_MISC_USER; - else - misc |= PERF_RECORD_MISC_KERNEL; - } - - return misc; -} diff --git a/arch/arm64/kernel/pi/Makefile b/arch/arm64/kernel/pi/Makefile index 4393b41f0b71..4d11a8c29181 100644 --- a/arch/arm64/kernel/pi/Makefile +++ b/arch/arm64/kernel/pi/Makefile @@ -19,12 +19,6 @@ KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS)) # disable LTO KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO), $(KBUILD_CFLAGS)) -GCOV_PROFILE := n -KASAN_SANITIZE := n -KCSAN_SANITIZE := n -UBSAN_SANITIZE := n -KCOV_INSTRUMENT := n - hostprogs := relacheck quiet_cmd_piobjcopy = $(quiet_cmd_objcopy) diff --git a/arch/arm64/kernel/pi/idreg-override.c b/arch/arm64/kernel/pi/idreg-override.c index aad399796e81..c6b185b885f7 100644 --- a/arch/arm64/kernel/pi/idreg-override.c +++ b/arch/arm64/kernel/pi/idreg-override.c @@ -38,6 +38,15 @@ struct ftr_set_desc { #define FIELD(n, s, f) { .name = n, .shift = s, .width = 4, .filter = f } +static const struct ftr_set_desc mmfr0 __prel64_initconst = { + .name = "id_aa64mmfr0", + .override = &id_aa64mmfr0_override, + .fields = { + FIELD("ecv", ID_AA64MMFR0_EL1_ECV_SHIFT, NULL), + {} + }, +}; + static bool __init mmfr1_vh_filter(u64 val) { /* @@ -74,6 +83,15 @@ static bool __init mmfr2_varange_filter(u64 val) id_aa64mmfr0_override.val |= (ID_AA64MMFR0_EL1_TGRAN_LPA2 - 1) << ID_AA64MMFR0_EL1_TGRAN_SHIFT; id_aa64mmfr0_override.mask |= 0xfU << ID_AA64MMFR0_EL1_TGRAN_SHIFT; + + /* + * Override PARange to 48 bits - the override will just be + * ignored if the actual PARange is smaller, but this is + * unlikely to be the case for LPA2 capable silicon. + */ + id_aa64mmfr0_override.val |= + ID_AA64MMFR0_EL1_PARANGE_48 << ID_AA64MMFR0_EL1_PARANGE_SHIFT; + id_aa64mmfr0_override.mask |= 0xfU << ID_AA64MMFR0_EL1_PARANGE_SHIFT; } #endif return true; @@ -108,6 +126,7 @@ static const struct ftr_set_desc pfr0 __prel64_initconst = { .override = &id_aa64pfr0_override, .fields = { FIELD("sve", ID_AA64PFR0_EL1_SVE_SHIFT, pfr0_sve_filter), + FIELD("el0", ID_AA64PFR0_EL1_EL0_SHIFT, NULL), {} }, }; @@ -132,6 +151,7 @@ static const struct ftr_set_desc pfr1 __prel64_initconst = { .override = &id_aa64pfr1_override, .fields = { FIELD("bt", ID_AA64PFR1_EL1_BT_SHIFT, NULL ), + FIELD("gcs", ID_AA64PFR1_EL1_GCS_SHIFT, NULL), FIELD("mte", ID_AA64PFR1_EL1_MTE_SHIFT, NULL), FIELD("sme", ID_AA64PFR1_EL1_SME_SHIFT, pfr1_sme_filter), {} @@ -195,6 +215,7 @@ static const struct ftr_set_desc sw_features __prel64_initconst = { static const PREL64(const struct ftr_set_desc, reg) regs[] __prel64_initconst = { + { &mmfr0 }, { &mmfr1 }, { &mmfr2 }, { &pfr0 }, @@ -209,11 +230,12 @@ static const struct { char alias[FTR_ALIAS_NAME_LEN]; char feature[FTR_ALIAS_OPTION_LEN]; } aliases[] __initconst = { - { "kvm_arm.mode=nvhe", "id_aa64mmfr1.vh=0" }, - { "kvm_arm.mode=protected", "id_aa64mmfr1.vh=0" }, + { "kvm_arm.mode=nvhe", "arm64_sw.hvhe=0 id_aa64mmfr1.vh=0" }, + { "kvm_arm.mode=protected", "arm64_sw.hvhe=1" }, { "arm64.nosve", "id_aa64pfr0.sve=0" }, { "arm64.nosme", "id_aa64pfr1.sme=0" }, { "arm64.nobti", "id_aa64pfr1.bt=0" }, + { "arm64.nogcs", "id_aa64pfr1.gcs=0" }, { "arm64.nopauth", "id_aa64isar1.gpi=0 id_aa64isar1.gpa=0 " "id_aa64isar1.api=0 id_aa64isar1.apa=0 " @@ -223,6 +245,7 @@ static const struct { { "nokaslr", "arm64_sw.nokaslr=1" }, { "rodata=off", "arm64_sw.rodataoff=1" }, { "arm64.nolva", "id_aa64mmfr2.varange=0" }, + { "arm64.no32bit_el0", "id_aa64pfr0.el0=1" }, }; static int __init parse_hexdigit(const char *p, u64 *v) diff --git a/arch/arm64/kernel/pi/map_kernel.c b/arch/arm64/kernel/pi/map_kernel.c index 5fa08e13e17e..e57b043f324b 100644 --- a/arch/arm64/kernel/pi/map_kernel.c +++ b/arch/arm64/kernel/pi/map_kernel.c @@ -136,6 +136,12 @@ static void noinline __section(".idmap.text") set_ttbr0_for_lpa2(u64 ttbr) { u64 sctlr = read_sysreg(sctlr_el1); u64 tcr = read_sysreg(tcr_el1) | TCR_DS; + u64 mmfr0 = read_sysreg(id_aa64mmfr0_el1); + u64 parange = cpuid_feature_extract_unsigned_field(mmfr0, + ID_AA64MMFR0_EL1_PARANGE_SHIFT); + + tcr &= ~TCR_IPS_MASK; + tcr |= parange << TCR_IPS_SHIFT; asm(" msr sctlr_el1, %0 ;" " isb ;" @@ -173,7 +179,7 @@ static void __init remap_idmap_for_lpa2(void) * Don't bother with the FDT, we no longer need it after this. */ memset(init_idmap_pg_dir, 0, - (u64)init_idmap_pg_dir - (u64)init_idmap_pg_end); + (u64)init_idmap_pg_end - (u64)init_idmap_pg_dir); create_init_idmap(init_idmap_pg_dir, mask); dsb(ishst); diff --git a/arch/arm64/kernel/pi/map_range.c b/arch/arm64/kernel/pi/map_range.c index 5410b2cac590..2b69e3beeef8 100644 --- a/arch/arm64/kernel/pi/map_range.c +++ b/arch/arm64/kernel/pi/map_range.c @@ -30,7 +30,7 @@ void __init map_range(u64 *pte, u64 start, u64 end, u64 pa, pgprot_t prot, int level, pte_t *tbl, bool may_use_cont, u64 va_offset) { u64 cmask = (level == 3) ? CONT_PTE_SIZE - 1 : U64_MAX; - u64 protval = pgprot_val(prot) & ~PTE_TYPE_MASK; + pteval_t protval = pgprot_val(prot) & ~PTE_TYPE_MASK; int lshift = (3 - level) * (PAGE_SHIFT - 3); u64 lmask = (PAGE_SIZE << lshift) - 1; diff --git a/arch/arm64/kernel/pi/patch-scs.c b/arch/arm64/kernel/pi/patch-scs.c index 49d8b40e61bc..55d0cd64ef71 100644 --- a/arch/arm64/kernel/pi/patch-scs.c +++ b/arch/arm64/kernel/pi/patch-scs.c @@ -50,6 +50,10 @@ bool dynamic_scs_is_enabled; #define DW_CFA_GNU_negative_offset_extended 0x2f #define DW_CFA_hi_user 0x3f +#define DW_EH_PE_sdata4 0x0b +#define DW_EH_PE_sdata8 0x0c +#define DW_EH_PE_pcrel 0x10 + enum { PACIASP = 0xd503233f, AUTIASP = 0xd50323bf, @@ -120,7 +124,12 @@ struct eh_frame { union { struct { // CIE u8 version; - u8 augmentation_string[]; + u8 augmentation_string[3]; + u8 code_alignment_factor; + u8 data_alignment_factor; + u8 return_address_register; + u8 augmentation_data_size; + u8 fde_pointer_format; }; struct { // FDE @@ -128,29 +137,38 @@ struct eh_frame { s32 range; u8 opcodes[]; }; + + struct { // FDE + s64 initial_loc64; + s64 range64; + u8 opcodes64[]; + }; }; }; static int scs_handle_fde_frame(const struct eh_frame *frame, - bool fde_has_augmentation_data, int code_alignment_factor, + bool use_sdata8, bool dry_run) { int size = frame->size - offsetof(struct eh_frame, opcodes) + 4; u64 loc = (u64)offset_to_ptr(&frame->initial_loc); const u8 *opcode = frame->opcodes; + int l; - if (fde_has_augmentation_data) { - int l; + if (use_sdata8) { + loc = (u64)&frame->initial_loc64 + frame->initial_loc64; + opcode = frame->opcodes64; + size -= 8; + } - // assume single byte uleb128_t - if (WARN_ON(*opcode & BIT(7))) - return -ENOEXEC; + // assume single byte uleb128_t for augmentation data size + if (*opcode & BIT(7)) + return EDYNSCS_INVALID_FDE_AUGM_DATA_SIZE; - l = *opcode++; - opcode += l; - size -= l + 1; - } + l = *opcode++; + opcode += l; + size -= l + 1; /* * Starting from 'loc', apply the CFA opcodes that advance the location @@ -201,7 +219,7 @@ static int scs_handle_fde_frame(const struct eh_frame *frame, break; default: - return -ENOEXEC; + return EDYNSCS_INVALID_CFA_OPCODE; } } return 0; @@ -209,12 +227,12 @@ static int scs_handle_fde_frame(const struct eh_frame *frame, int scs_patch(const u8 eh_frame[], int size) { + int code_alignment_factor = 1; + bool fde_use_sdata8 = false; const u8 *p = eh_frame; while (size > 4) { const struct eh_frame *frame = (const void *)p; - bool fde_has_augmentation_data = true; - int code_alignment_factor = 1; int ret; if (frame->size == 0 || @@ -223,28 +241,47 @@ int scs_patch(const u8 eh_frame[], int size) break; if (frame->cie_id_or_pointer == 0) { - const u8 *p = frame->augmentation_string; - - /* a 'z' in the augmentation string must come first */ - fde_has_augmentation_data = *p == 'z'; + /* + * Require presence of augmentation data (z) with a + * specifier for the size of the FDE initial_loc and + * range fields (R), and nothing else. + */ + if (strcmp(frame->augmentation_string, "zR")) + return EDYNSCS_INVALID_CIE_HEADER; /* * The code alignment factor is a uleb128 encoded field * but given that the only sensible values are 1 or 4, - * there is no point in decoding the whole thing. + * there is no point in decoding the whole thing. Also + * sanity check the size of the data alignment factor + * field, and the values of the return address register + * and augmentation data size fields. */ - p += strlen(p) + 1; - if (!WARN_ON(*p & BIT(7))) - code_alignment_factor = *p; + if ((frame->code_alignment_factor & BIT(7)) || + (frame->data_alignment_factor & BIT(7)) || + frame->return_address_register != 30 || + frame->augmentation_data_size != 1) + return EDYNSCS_INVALID_CIE_HEADER; + + code_alignment_factor = frame->code_alignment_factor; + + switch (frame->fde_pointer_format) { + case DW_EH_PE_pcrel | DW_EH_PE_sdata4: + fde_use_sdata8 = false; + break; + case DW_EH_PE_pcrel | DW_EH_PE_sdata8: + fde_use_sdata8 = true; + break; + default: + return EDYNSCS_INVALID_CIE_SDATA_SIZE; + } } else { - ret = scs_handle_fde_frame(frame, - fde_has_augmentation_data, - code_alignment_factor, - true); + ret = scs_handle_fde_frame(frame, code_alignment_factor, + fde_use_sdata8, true); if (ret) return ret; - scs_handle_fde_frame(frame, fde_has_augmentation_data, - code_alignment_factor, false); + scs_handle_fde_frame(frame, code_alignment_factor, + fde_use_sdata8, false); } p += sizeof(frame->size) + frame->size; diff --git a/arch/arm64/kernel/probes/decode-insn.c b/arch/arm64/kernel/probes/decode-insn.c index 968d5fffe233..6438bf62e753 100644 --- a/arch/arm64/kernel/probes/decode-insn.c +++ b/arch/arm64/kernel/probes/decode-insn.c @@ -58,10 +58,13 @@ static bool __kprobes aarch64_insn_is_steppable(u32 insn) * Instructions which load PC relative literals are not going to work * when executed from an XOL slot. Instructions doing an exclusive * load/store are not going to complete successfully when single-step - * exception handling happens in the middle of the sequence. + * exception handling happens in the middle of the sequence. Memory + * copy/set instructions require that all three instructions be placed + * consecutively in memory. */ if (aarch64_insn_uses_literal(insn) || - aarch64_insn_is_exclusive(insn)) + aarch64_insn_is_exclusive(insn) || + aarch64_insn_is_mops(insn)) return false; return true; @@ -73,9 +76,18 @@ static bool __kprobes aarch64_insn_is_steppable(u32 insn) * INSN_GOOD_NO_SLOT If instruction is supported but doesn't use its slot. */ enum probe_insn __kprobes -arm_probe_decode_insn(probe_opcode_t insn, struct arch_probe_insn *api) +arm_probe_decode_insn(u32 insn, struct arch_probe_insn *api) { /* + * While 'nop' instruction can execute in the out-of-line slot, + * simulating them in breakpoint handling offers better performance. + */ + if (aarch64_insn_is_nop(insn)) { + api->handler = simulate_nop; + return INSN_GOOD_NO_SLOT; + } + + /* * Instructions reading or modifying the PC won't work from the XOL * slot. */ @@ -99,10 +111,6 @@ arm_probe_decode_insn(probe_opcode_t insn, struct arch_probe_insn *api) aarch64_insn_is_blr(insn) || aarch64_insn_is_ret(insn)) { api->handler = simulate_br_blr_ret; - } else if (aarch64_insn_is_ldr_lit(insn)) { - api->handler = simulate_ldr_literal; - } else if (aarch64_insn_is_ldrsw_lit(insn)) { - api->handler = simulate_ldrsw_literal; } else { /* * Instruction cannot be stepped out-of-line and we don't @@ -137,9 +145,20 @@ enum probe_insn __kprobes arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi) { enum probe_insn decoded; - probe_opcode_t insn = le32_to_cpu(*addr); - probe_opcode_t *scan_end = NULL; + u32 insn = le32_to_cpu(*addr); + kprobe_opcode_t *scan_end = NULL; unsigned long size = 0, offset = 0; + struct arch_probe_insn *api = &asi->api; + + if (aarch64_insn_is_ldr_lit(insn)) { + api->handler = simulate_ldr_literal; + decoded = INSN_GOOD_NO_SLOT; + } else if (aarch64_insn_is_ldrsw_lit(insn)) { + api->handler = simulate_ldrsw_literal; + decoded = INSN_GOOD_NO_SLOT; + } else { + decoded = arm_probe_decode_insn(insn, &asi->api); + } /* * If there's a symbol defined in front of and near enough to @@ -157,7 +176,6 @@ arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi) else scan_end = addr - MAX_ATOMIC_CONTEXT_SIZE; } - decoded = arm_probe_decode_insn(insn, &asi->api); if (decoded != INSN_REJECTED && scan_end) if (is_probed_address_atomic(addr - 1, scan_end)) diff --git a/arch/arm64/kernel/probes/decode-insn.h b/arch/arm64/kernel/probes/decode-insn.h index 8b758c5a2062..0e4195de8206 100644 --- a/arch/arm64/kernel/probes/decode-insn.h +++ b/arch/arm64/kernel/probes/decode-insn.h @@ -28,6 +28,6 @@ enum probe_insn __kprobes arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi); #endif enum probe_insn __kprobes -arm_probe_decode_insn(probe_opcode_t insn, struct arch_probe_insn *asi); +arm_probe_decode_insn(u32 insn, struct arch_probe_insn *asi); #endif /* _ARM_KERNEL_KPROBES_ARM64_H */ diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 327855a11df2..d9e462eafb95 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -27,7 +27,7 @@ #include <asm/debug-monitors.h> #include <asm/insn.h> #include <asm/irq.h> -#include <asm/patching.h> +#include <asm/text-patching.h> #include <asm/ptrace.h> #include <asm/sections.h> #include <asm/system_misc.h> @@ -43,7 +43,7 @@ post_kprobe_handler(struct kprobe *, struct kprobe_ctlblk *, struct pt_regs *); static void __kprobes arch_prepare_ss_slot(struct kprobe *p) { - kprobe_opcode_t *addr = p->ainsn.api.insn; + kprobe_opcode_t *addr = p->ainsn.xol_insn; /* * Prepare insn slot, Mark Rutland points out it depends on a coupe of @@ -64,20 +64,20 @@ static void __kprobes arch_prepare_ss_slot(struct kprobe *p) * the BRK exception handler, so it is unnecessary to generate * Contex-Synchronization-Event via ISB again. */ - aarch64_insn_patch_text_nosync(addr, p->opcode); + aarch64_insn_patch_text_nosync(addr, le32_to_cpu(p->opcode)); aarch64_insn_patch_text_nosync(addr + 1, BRK64_OPCODE_KPROBES_SS); /* * Needs restoring of return address after stepping xol. */ - p->ainsn.api.restore = (unsigned long) p->addr + + p->ainsn.xol_restore = (unsigned long) p->addr + sizeof(kprobe_opcode_t); } static void __kprobes arch_prepare_simulate(struct kprobe *p) { /* This instructions is not executed xol. No need to adjust the PC */ - p->ainsn.api.restore = 0; + p->ainsn.xol_restore = 0; } static void __kprobes arch_simulate_insn(struct kprobe *p, struct pt_regs *regs) @@ -85,7 +85,7 @@ static void __kprobes arch_simulate_insn(struct kprobe *p, struct pt_regs *regs) struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); if (p->ainsn.api.handler) - p->ainsn.api.handler((u32)p->opcode, (long)p->addr, regs); + p->ainsn.api.handler(le32_to_cpu(p->opcode), (long)p->addr, regs); /* single step simulated, now go for post processing */ post_kprobe_handler(p, kcb, regs); @@ -99,7 +99,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) return -EINVAL; /* copy instruction */ - p->opcode = le32_to_cpu(*p->addr); + p->opcode = *p->addr; if (search_exception_tables(probe_addr)) return -EINVAL; @@ -110,18 +110,18 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) return -EINVAL; case INSN_GOOD_NO_SLOT: /* insn need simulation */ - p->ainsn.api.insn = NULL; + p->ainsn.xol_insn = NULL; break; case INSN_GOOD: /* instruction uses slot */ - p->ainsn.api.insn = get_insn_slot(); - if (!p->ainsn.api.insn) + p->ainsn.xol_insn = get_insn_slot(); + if (!p->ainsn.xol_insn) return -ENOMEM; break; } /* prepare the instruction */ - if (p->ainsn.api.insn) + if (p->ainsn.xol_insn) arch_prepare_ss_slot(p); else arch_prepare_simulate(p); @@ -129,13 +129,6 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) return 0; } -void *alloc_insn_page(void) -{ - return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END, - GFP_KERNEL, PAGE_KERNEL_ROX, VM_FLUSH_RESET_PERMS, - NUMA_NO_NODE, __builtin_return_address(0)); -} - /* arm kprobe: install breakpoint in text */ void __kprobes arch_arm_kprobe(struct kprobe *p) { @@ -149,15 +142,16 @@ void __kprobes arch_arm_kprobe(struct kprobe *p) void __kprobes arch_disarm_kprobe(struct kprobe *p) { void *addr = p->addr; + u32 insn = le32_to_cpu(p->opcode); - aarch64_insn_patch_text(&addr, &p->opcode, 1); + aarch64_insn_patch_text(&addr, &insn, 1); } void __kprobes arch_remove_kprobe(struct kprobe *p) { - if (p->ainsn.api.insn) { - free_insn_slot(p->ainsn.api.insn, 0); - p->ainsn.api.insn = NULL; + if (p->ainsn.xol_insn) { + free_insn_slot(p->ainsn.xol_insn, 0); + p->ainsn.xol_insn = NULL; } } @@ -212,9 +206,9 @@ static void __kprobes setup_singlestep(struct kprobe *p, } - if (p->ainsn.api.insn) { + if (p->ainsn.xol_insn) { /* prepare for single stepping */ - slot = (unsigned long)p->ainsn.api.insn; + slot = (unsigned long)p->ainsn.xol_insn; kprobes_save_local_irqflag(kcb, regs); instruction_pointer_set(regs, slot); @@ -252,8 +246,8 @@ static void __kprobes post_kprobe_handler(struct kprobe *cur, struct kprobe_ctlblk *kcb, struct pt_regs *regs) { /* return addr restore if non-branching insn */ - if (cur->ainsn.api.restore != 0) - instruction_pointer_set(regs, cur->ainsn.api.restore); + if (cur->ainsn.xol_restore != 0) + instruction_pointer_set(regs, cur->ainsn.xol_restore); /* restore back original saved kprobe variables and continue */ if (kcb->kprobe_status == KPROBE_REENTER) { @@ -355,7 +349,7 @@ kprobe_breakpoint_ss_handler(struct pt_regs *regs, unsigned long esr) struct kprobe *cur = kprobe_running(); if (cur && (kcb->kprobe_status & (KPROBE_HIT_SS | KPROBE_REENTER)) && - ((unsigned long)&cur->ainsn.api.insn[1] == addr)) { + ((unsigned long)&cur->ainsn.xol_insn[1] == addr)) { kprobes_restore_local_irqflag(kcb, regs); post_kprobe_handler(cur, kcb, regs); diff --git a/arch/arm64/kernel/probes/simulate-insn.c b/arch/arm64/kernel/probes/simulate-insn.c index 22d0b3252476..4c6d2d712fbd 100644 --- a/arch/arm64/kernel/probes/simulate-insn.c +++ b/arch/arm64/kernel/probes/simulate-insn.c @@ -171,17 +171,15 @@ simulate_tbz_tbnz(u32 opcode, long addr, struct pt_regs *regs) void __kprobes simulate_ldr_literal(u32 opcode, long addr, struct pt_regs *regs) { - u64 *load_addr; + unsigned long load_addr; int xn = opcode & 0x1f; - int disp; - disp = ldr_displacement(opcode); - load_addr = (u64 *) (addr + disp); + load_addr = addr + ldr_displacement(opcode); if (opcode & (1 << 30)) /* x0-x30 */ - set_x_reg(regs, xn, *load_addr); + set_x_reg(regs, xn, READ_ONCE(*(u64 *)load_addr)); else /* w0-w30 */ - set_w_reg(regs, xn, *load_addr); + set_w_reg(regs, xn, READ_ONCE(*(u32 *)load_addr)); instruction_pointer_set(regs, instruction_pointer(regs) + 4); } @@ -189,14 +187,18 @@ simulate_ldr_literal(u32 opcode, long addr, struct pt_regs *regs) void __kprobes simulate_ldrsw_literal(u32 opcode, long addr, struct pt_regs *regs) { - s32 *load_addr; + unsigned long load_addr; int xn = opcode & 0x1f; - int disp; - disp = ldr_displacement(opcode); - load_addr = (s32 *) (addr + disp); + load_addr = addr + ldr_displacement(opcode); - set_x_reg(regs, xn, *load_addr); + set_x_reg(regs, xn, READ_ONCE(*(s32 *)load_addr)); instruction_pointer_set(regs, instruction_pointer(regs) + 4); } + +void __kprobes +simulate_nop(u32 opcode, long addr, struct pt_regs *regs) +{ + arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); +} diff --git a/arch/arm64/kernel/probes/simulate-insn.h b/arch/arm64/kernel/probes/simulate-insn.h index e065dc92218e..efb2803ec943 100644 --- a/arch/arm64/kernel/probes/simulate-insn.h +++ b/arch/arm64/kernel/probes/simulate-insn.h @@ -16,5 +16,6 @@ void simulate_cbz_cbnz(u32 opcode, long addr, struct pt_regs *regs); void simulate_tbz_tbnz(u32 opcode, long addr, struct pt_regs *regs); void simulate_ldr_literal(u32 opcode, long addr, struct pt_regs *regs); void simulate_ldrsw_literal(u32 opcode, long addr, struct pt_regs *regs); +void simulate_nop(u32 opcode, long addr, struct pt_regs *regs); #endif /* _ARM_KERNEL_KPROBES_SIMULATE_INSN_H */ diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c index d49aef2657cd..cb3d05af36e3 100644 --- a/arch/arm64/kernel/probes/uprobes.c +++ b/arch/arm64/kernel/probes/uprobes.c @@ -17,12 +17,20 @@ void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, void *xol_page_kaddr = kmap_atomic(page); void *dst = xol_page_kaddr + (vaddr & ~PAGE_MASK); + /* + * Initial cache maintenance of the xol page done via set_pte_at(). + * Subsequent CMOs only needed if the xol slot changes. + */ + if (!memcmp(dst, src, len)) + goto done; + /* Initialize the slot */ memcpy(dst, src, len); /* flush caches (dcache/icache) */ sync_icache_aliases((unsigned long)dst, (unsigned long)dst + len); +done: kunmap_atomic(xol_page_kaddr); } @@ -34,7 +42,7 @@ unsigned long uprobe_get_swbp_addr(struct pt_regs *regs) int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr) { - probe_opcode_t insn; + u32 insn; /* TODO: Currently we do not support AARCH32 instruction probing */ if (mm->context.flags & MMCF_AARCH32) @@ -42,7 +50,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, else if (!IS_ALIGNED(addr, AARCH64_INSN_SIZE)) return -EINVAL; - insn = *(probe_opcode_t *)(&auprobe->insn[0]); + insn = le32_to_cpu(auprobe->insn); switch (arm_probe_decode_insn(insn, &auprobe->api)) { case INSN_REJECTED: @@ -102,13 +110,13 @@ bool arch_uprobe_xol_was_trapped(struct task_struct *t) bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) { - probe_opcode_t insn; + u32 insn; unsigned long addr; if (!auprobe->simulate) return false; - insn = *(probe_opcode_t *)(&auprobe->insn[0]); + insn = le32_to_cpu(auprobe->insn); addr = instruction_pointer(regs); if (auprobe->api.handler) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 4ae31b7af6c3..42faebb7b712 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -43,11 +43,13 @@ #include <linux/stacktrace.h> #include <asm/alternative.h> +#include <asm/arch_timer.h> #include <asm/compat.h> #include <asm/cpufeature.h> #include <asm/cacheflush.h> #include <asm/exec.h> #include <asm/fpsimd.h> +#include <asm/gcs.h> #include <asm/mmu_context.h> #include <asm/mte.h> #include <asm/processor.h> @@ -226,7 +228,7 @@ void __show_regs(struct pt_regs *regs) printk("sp : %016llx\n", sp); if (system_uses_irq_prio_masking()) - printk("pmr_save: %08llx\n", regs->pmr_save); + printk("pmr: %08x\n", regs->pmr); i = top_reg; @@ -271,17 +273,73 @@ static void flush_tagged_addr_state(void) clear_thread_flag(TIF_TAGGED_ADDR); } +static void flush_poe(void) +{ + if (!system_supports_poe()) + return; + + write_sysreg_s(POR_EL0_INIT, SYS_POR_EL0); +} + +#ifdef CONFIG_ARM64_GCS + +static void flush_gcs(void) +{ + if (!system_supports_gcs()) + return; + + gcs_free(current); + current->thread.gcs_el0_mode = 0; + write_sysreg_s(GCSCRE0_EL1_nTR, SYS_GCSCRE0_EL1); + write_sysreg_s(0, SYS_GCSPR_EL0); +} + +static int copy_thread_gcs(struct task_struct *p, + const struct kernel_clone_args *args) +{ + unsigned long gcs; + + if (!system_supports_gcs()) + return 0; + + p->thread.gcs_base = 0; + p->thread.gcs_size = 0; + + gcs = gcs_alloc_thread_stack(p, args); + if (IS_ERR_VALUE(gcs)) + return PTR_ERR((void *)gcs); + + p->thread.gcs_el0_mode = current->thread.gcs_el0_mode; + p->thread.gcs_el0_locked = current->thread.gcs_el0_locked; + + return 0; +} + +#else + +static void flush_gcs(void) { } +static int copy_thread_gcs(struct task_struct *p, + const struct kernel_clone_args *args) +{ + return 0; +} + +#endif + void flush_thread(void) { fpsimd_flush_thread(); tls_thread_flush(); flush_ptrace_hw_breakpoint(current); flush_tagged_addr_state(); + flush_poe(); + flush_gcs(); } void arch_release_task_struct(struct task_struct *tsk) { fpsimd_release_task(tsk); + gcs_free(tsk); } int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) @@ -345,6 +403,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) unsigned long stack_start = args->stack; unsigned long tls = args->tls; struct pt_regs *childregs = task_pt_regs(p); + int ret; memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context)); @@ -371,6 +430,9 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) if (system_supports_tpidr2()) p->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0); + if (system_supports_poe()) + p->thread.por_el0 = read_sysreg_s(SYS_POR_EL0); + if (stack_start) { if (is_compat_thread(task_thread_info(p))) childregs->compat_sp = stack_start; @@ -386,6 +448,10 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) p->thread.uw.tp_value = tls; p->thread.tpidr2_el0 = 0; } + + ret = copy_thread_gcs(p, args); + if (ret != 0) + return ret; } else { /* * A kthread has no context to ERET to, so ensure any buggy @@ -396,9 +462,13 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) */ memset(childregs, 0, sizeof(struct pt_regs)); childregs->pstate = PSR_MODE_EL1h | PSR_IL_BIT; + childregs->stackframe.type = FRAME_META_TYPE_FINAL; p->thread.cpu_context.x19 = (unsigned long)args->fn; p->thread.cpu_context.x20 = (unsigned long)args->fn_arg; + + if (system_supports_poe()) + p->thread.por_el0 = POR_EL0_INIT; } p->thread.cpu_context.pc = (unsigned long)ret_from_fork; p->thread.cpu_context.sp = (unsigned long)childregs; @@ -406,7 +476,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) * For the benefit of the unwinder, set up childregs->stackframe * as the final frame for the new task. */ - p->thread.cpu_context.fp = (unsigned long)childregs->stackframe; + p->thread.cpu_context.fp = (unsigned long)&childregs->stackframe; ptrace_hw_copy_thread(p); @@ -426,7 +496,7 @@ static void tls_thread_switch(struct task_struct *next) if (is_compat_thread(task_thread_info(next))) write_sysreg(next->thread.uw.tp_value, tpidrro_el0); - else if (!arm64_kernel_unmapped_at_el0()) + else write_sysreg(0, tpidrro_el0); write_sysreg(*task_user_tls(next), tpidr_el0); @@ -471,28 +541,104 @@ static void entry_task_switch(struct task_struct *next) __this_cpu_write(__entry_task, next); } +#ifdef CONFIG_ARM64_GCS + +void gcs_preserve_current_state(void) +{ + current->thread.gcspr_el0 = read_sysreg_s(SYS_GCSPR_EL0); +} + +static void gcs_thread_switch(struct task_struct *next) +{ + if (!system_supports_gcs()) + return; + + /* GCSPR_EL0 is always readable */ + gcs_preserve_current_state(); + write_sysreg_s(next->thread.gcspr_el0, SYS_GCSPR_EL0); + + if (current->thread.gcs_el0_mode != next->thread.gcs_el0_mode) + gcs_set_el0_mode(next); + + /* + * Ensure that GCS memory effects of the 'prev' thread are + * ordered before other memory accesses with release semantics + * (or preceded by a DMB) on the current PE. In addition, any + * memory accesses with acquire semantics (or succeeded by a + * DMB) are ordered before GCS memory effects of the 'next' + * thread. This will ensure that the GCS memory effects are + * visible to other PEs in case of migration. + */ + if (task_gcs_el0_enabled(current) || task_gcs_el0_enabled(next)) + gcsb_dsync(); +} + +#else + +static void gcs_thread_switch(struct task_struct *next) +{ +} + +#endif + /* - * ARM erratum 1418040 handling, affecting the 32bit view of CNTVCT. - * Ensure access is disabled when switching to a 32bit task, ensure - * access is enabled when switching to a 64bit task. + * Handle sysreg updates for ARM erratum 1418040 which affects the 32bit view of + * CNTVCT, various other errata which require trapping all CNTVCT{,_EL0} + * accesses and prctl(PR_SET_TSC). Ensure access is disabled iff a workaround is + * required or PR_TSC_SIGSEGV is set. */ -static void erratum_1418040_thread_switch(struct task_struct *next) +static void update_cntkctl_el1(struct task_struct *next) { - if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040) || - !this_cpu_has_cap(ARM64_WORKAROUND_1418040)) - return; + struct thread_info *ti = task_thread_info(next); - if (is_compat_thread(task_thread_info(next))) + if (test_ti_thread_flag(ti, TIF_TSC_SIGSEGV) || + has_erratum_handler(read_cntvct_el0) || + (IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040) && + this_cpu_has_cap(ARM64_WORKAROUND_1418040) && + is_compat_thread(ti))) sysreg_clear_set(cntkctl_el1, ARCH_TIMER_USR_VCT_ACCESS_EN, 0); else sysreg_clear_set(cntkctl_el1, 0, ARCH_TIMER_USR_VCT_ACCESS_EN); } -static void erratum_1418040_new_exec(void) +static void cntkctl_thread_switch(struct task_struct *prev, + struct task_struct *next) +{ + if ((read_ti_thread_flags(task_thread_info(prev)) & + (_TIF_32BIT | _TIF_TSC_SIGSEGV)) != + (read_ti_thread_flags(task_thread_info(next)) & + (_TIF_32BIT | _TIF_TSC_SIGSEGV))) + update_cntkctl_el1(next); +} + +static int do_set_tsc_mode(unsigned int val) { + bool tsc_sigsegv; + + if (val == PR_TSC_SIGSEGV) + tsc_sigsegv = true; + else if (val == PR_TSC_ENABLE) + tsc_sigsegv = false; + else + return -EINVAL; + preempt_disable(); - erratum_1418040_thread_switch(current); + update_thread_flag(TIF_TSC_SIGSEGV, tsc_sigsegv); + update_cntkctl_el1(current); preempt_enable(); + + return 0; +} + +static void permission_overlay_switch(struct task_struct *next) +{ + if (!system_supports_poe()) + return; + + current->thread.por_el0 = read_sysreg_s(SYS_POR_EL0); + if (current->thread.por_el0 != next->thread.por_el0) { + write_sysreg_s(next->thread.por_el0, SYS_POR_EL0); + } } /* @@ -528,8 +674,10 @@ struct task_struct *__switch_to(struct task_struct *prev, contextidr_thread_switch(next); entry_task_switch(next); ssbs_thread_switch(next); - erratum_1418040_thread_switch(next); + cntkctl_thread_switch(prev, next); ptrauth_thread_switch_user(next); + permission_overlay_switch(next); + gcs_thread_switch(next); /* * Complete any pending TLB or cache maintenance on this CPU in case @@ -645,7 +793,7 @@ void arch_setup_new_exec(void) current->mm->context.flags = mmflags; ptrauth_thread_init_user(); mte_thread_init_user(); - erratum_1418040_new_exec(); + do_set_tsc_mode(PR_TSC_ENABLE); if (task_spec_ssb_noexec(current)) { arch_prctl_spec_ctrl_set(current, PR_SPEC_STORE_BYPASS, @@ -711,7 +859,7 @@ long get_tagged_addr_ctrl(struct task_struct *task) * disable it for tasks that already opted in to the relaxed ABI. */ -static struct ctl_table tagged_addr_sysctl_table[] = { +static const struct ctl_table tagged_addr_sysctl_table[] = { { .procname = "tagged_addr_disabled", .mode = 0644, @@ -754,3 +902,26 @@ int arch_elf_adjust_prot(int prot, const struct arch_elf_state *state, return prot; } #endif + +int get_tsc_mode(unsigned long adr) +{ + unsigned int val; + + if (is_compat_task()) + return -EINVAL; + + if (test_thread_flag(TIF_TSC_SIGSEGV)) + val = PR_TSC_SIGSEGV; + else + val = PR_TSC_ENABLE; + + return put_user(val, (unsigned int __user *)adr); +} + +int set_tsc_mode(unsigned int val) +{ + if (is_compat_task()) + return -EINVAL; + + return do_set_tsc_mode(val); +} diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index 6268a13a1d58..da53722f95d4 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -558,6 +558,18 @@ static enum mitigation_state spectre_v4_enable_hw_mitigation(void) /* SCTLR_EL1.DSSBS was initialised to 0 during boot */ set_pstate_ssbs(0); + + /* + * SSBS is self-synchronizing and is intended to affect subsequent + * speculative instructions, but some CPUs can speculate with a stale + * value of SSBS. + * + * Mitigate this with an unconditional speculation barrier, as CPUs + * could mis-speculate branches and bypass a conditional barrier. + */ + if (IS_ENABLED(CONFIG_ARM64_ERRATUM_3194386)) + spec_bar(); + return SPECTRE_MITIGATED; } diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index 29a8e444db83..fabd732d0a2d 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -40,7 +40,7 @@ static int cpu_psci_cpu_boot(unsigned int cpu) { phys_addr_t pa_secondary_entry = __pa_symbol(secondary_entry); int err = psci_ops.cpu_on(cpu_logical_map(cpu), pa_secondary_entry); - if (err) + if (err && err != -EPERM) pr_err("failed to boot CPU%d (%d)\n", cpu, err); return err; diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 0d022599eb61..f79b0d5f71ac 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -34,6 +34,7 @@ #include <asm/cpufeature.h> #include <asm/debug-monitors.h> #include <asm/fpsimd.h> +#include <asm/gcs.h> #include <asm/mte.h> #include <asm/pointer_auth.h> #include <asm/stacktrace.h> @@ -719,6 +720,8 @@ static int fpmr_set(struct task_struct *target, const struct user_regset *regset if (!system_supports_fpmr()) return -EINVAL; + fpmr = target->thread.uw.fpmr; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &fpmr, 0, count); if (ret) return ret; @@ -898,7 +901,11 @@ static int sve_set_common(struct task_struct *target, if (ret) goto out; - /* Actual VL set may be less than the user asked for: */ + /* + * Actual VL set may be different from what the user asked + * for, or we may have configured the _ONEXEC VL not the + * current VL: + */ vq = sve_vq_from_vl(task_get_vl(target, type)); /* Enter/exit streaming mode */ @@ -1125,7 +1132,11 @@ static int za_set(struct task_struct *target, if (ret) goto out; - /* Actual VL set may be less than the user asked for: */ + /* + * Actual VL set may be different from what the user asked + * for, or we may have configured the _ONEXEC rather than + * current VL: + */ vq = sve_vq_from_vl(task_get_sme_vl(target)); /* Ensure there is some SVE storage for streaming mode */ @@ -1418,7 +1429,7 @@ static int tagged_addr_ctrl_get(struct task_struct *target, { long ctrl = get_tagged_addr_ctrl(target); - if (IS_ERR_VALUE(ctrl)) + if (WARN_ON_ONCE(IS_ERR_VALUE(ctrl))) return ctrl; return membuf_write(&to, &ctrl, sizeof(ctrl)); @@ -1432,6 +1443,10 @@ static int tagged_addr_ctrl_set(struct task_struct *target, const struct int ret; long ctrl; + ctrl = get_tagged_addr_ctrl(target); + if (WARN_ON_ONCE(IS_ERR_VALUE(ctrl))) + return ctrl; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, 0, -1); if (ret) return ret; @@ -1440,6 +1455,101 @@ static int tagged_addr_ctrl_set(struct task_struct *target, const struct } #endif +#ifdef CONFIG_ARM64_POE +static int poe_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + if (!system_supports_poe()) + return -EINVAL; + + return membuf_write(&to, &target->thread.por_el0, + sizeof(target->thread.por_el0)); +} + +static int poe_set(struct task_struct *target, const struct + user_regset *regset, unsigned int pos, + unsigned int count, const void *kbuf, const + void __user *ubuf) +{ + int ret; + long ctrl; + + if (!system_supports_poe()) + return -EINVAL; + + ctrl = target->thread.por_el0; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, 0, -1); + if (ret) + return ret; + + target->thread.por_el0 = ctrl; + + return 0; +} +#endif + +#ifdef CONFIG_ARM64_GCS +static void task_gcs_to_user(struct user_gcs *user_gcs, + const struct task_struct *target) +{ + user_gcs->features_enabled = target->thread.gcs_el0_mode; + user_gcs->features_locked = target->thread.gcs_el0_locked; + user_gcs->gcspr_el0 = target->thread.gcspr_el0; +} + +static void task_gcs_from_user(struct task_struct *target, + const struct user_gcs *user_gcs) +{ + target->thread.gcs_el0_mode = user_gcs->features_enabled; + target->thread.gcs_el0_locked = user_gcs->features_locked; + target->thread.gcspr_el0 = user_gcs->gcspr_el0; +} + +static int gcs_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + struct user_gcs user_gcs; + + if (!system_supports_gcs()) + return -EINVAL; + + if (target == current) + gcs_preserve_current_state(); + + task_gcs_to_user(&user_gcs, target); + + return membuf_write(&to, &user_gcs, sizeof(user_gcs)); +} + +static int gcs_set(struct task_struct *target, const struct + user_regset *regset, unsigned int pos, + unsigned int count, const void *kbuf, const + void __user *ubuf) +{ + int ret; + struct user_gcs user_gcs; + + if (!system_supports_gcs()) + return -EINVAL; + + task_gcs_to_user(&user_gcs, target); + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &user_gcs, 0, -1); + if (ret) + return ret; + + if (user_gcs.features_enabled & ~PR_SHADOW_STACK_SUPPORTED_STATUS_MASK) + return -EINVAL; + + task_gcs_from_user(target, &user_gcs); + + return 0; +} +#endif + enum aarch64_regset { REGSET_GPR, REGSET_FPR, @@ -1469,6 +1579,12 @@ enum aarch64_regset { #ifdef CONFIG_ARM64_TAGGED_ADDR_ABI REGSET_TAGGED_ADDR_CTRL, #endif +#ifdef CONFIG_ARM64_POE + REGSET_POE, +#endif +#ifdef CONFIG_ARM64_GCS + REGSET_GCS, +#endif }; static const struct user_regset aarch64_regsets[] = { @@ -1628,6 +1744,26 @@ static const struct user_regset aarch64_regsets[] = { .set = tagged_addr_ctrl_set, }, #endif +#ifdef CONFIG_ARM64_POE + [REGSET_POE] = { + .core_note_type = NT_ARM_POE, + .n = 1, + .size = sizeof(long), + .align = sizeof(long), + .regset_get = poe_get, + .set = poe_set, + }, +#endif +#ifdef CONFIG_ARM64_GCS + [REGSET_GCS] = { + .core_note_type = NT_ARM_GCS, + .n = sizeof(struct user_gcs) / sizeof(u64), + .size = sizeof(u64), + .align = sizeof(u64), + .regset_get = gcs_get, + .set = gcs_set, + }, +#endif }; static const struct user_regset_view user_aarch64_view = { diff --git a/arch/arm64/kernel/reloc_test_core.c b/arch/arm64/kernel/reloc_test_core.c index 99f2ffe9fc05..5b0891146054 100644 --- a/arch/arm64/kernel/reloc_test_core.c +++ b/arch/arm64/kernel/reloc_test_core.c @@ -74,4 +74,5 @@ static void __exit reloc_test_exit(void) module_init(reloc_test_init); module_exit(reloc_test_exit); +MODULE_DESCRIPTION("Relocation testing module"); MODULE_LICENSE("GPL v2"); diff --git a/arch/arm64/kernel/rsi.c b/arch/arm64/kernel/rsi.c new file mode 100644 index 000000000000..ce4778141ec7 --- /dev/null +++ b/arch/arm64/kernel/rsi.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2023 ARM Ltd. + */ + +#include <linux/jump_label.h> +#include <linux/memblock.h> +#include <linux/psci.h> +#include <linux/swiotlb.h> +#include <linux/cc_platform.h> +#include <linux/platform_device.h> + +#include <asm/io.h> +#include <asm/mem_encrypt.h> +#include <asm/rsi.h> + +static struct realm_config config; + +unsigned long prot_ns_shared; +EXPORT_SYMBOL(prot_ns_shared); + +DEFINE_STATIC_KEY_FALSE_RO(rsi_present); +EXPORT_SYMBOL(rsi_present); + +bool cc_platform_has(enum cc_attr attr) +{ + switch (attr) { + case CC_ATTR_MEM_ENCRYPT: + return is_realm_world(); + default: + return false; + } +} +EXPORT_SYMBOL_GPL(cc_platform_has); + +static bool rsi_version_matches(void) +{ + unsigned long ver_lower, ver_higher; + unsigned long ret = rsi_request_version(RSI_ABI_VERSION, + &ver_lower, + &ver_higher); + + if (ret == SMCCC_RET_NOT_SUPPORTED) + return false; + + if (ret != RSI_SUCCESS) { + pr_err("RME: RMM doesn't support RSI version %lu.%lu. Supported range: %lu.%lu-%lu.%lu\n", + RSI_ABI_VERSION_MAJOR, RSI_ABI_VERSION_MINOR, + RSI_ABI_VERSION_GET_MAJOR(ver_lower), + RSI_ABI_VERSION_GET_MINOR(ver_lower), + RSI_ABI_VERSION_GET_MAJOR(ver_higher), + RSI_ABI_VERSION_GET_MINOR(ver_higher)); + return false; + } + + pr_info("RME: Using RSI version %lu.%lu\n", + RSI_ABI_VERSION_GET_MAJOR(ver_lower), + RSI_ABI_VERSION_GET_MINOR(ver_lower)); + + return true; +} + +static void __init arm64_rsi_setup_memory(void) +{ + u64 i; + phys_addr_t start, end; + + /* + * Iterate over the available memory ranges and convert the state to + * protected memory. We should take extra care to ensure that we DO NOT + * permit any "DESTROYED" pages to be converted to "RAM". + * + * panic() is used because if the attempt to switch the memory to + * protected has failed here, then future accesses to the memory are + * simply going to be reflected as a SEA (Synchronous External Abort) + * which we can't handle. Bailing out early prevents the guest limping + * on and dying later. + */ + for_each_mem_range(i, &start, &end) { + if (rsi_set_memory_range_protected_safe(start, end)) { + panic("Failed to set memory range to protected: %pa-%pa", + &start, &end); + } + } +} + +bool __arm64_is_protected_mmio(phys_addr_t base, size_t size) +{ + enum ripas ripas; + phys_addr_t end, top; + + /* Overflow ? */ + if (WARN_ON(base + size <= base)) + return false; + + end = ALIGN(base + size, RSI_GRANULE_SIZE); + base = ALIGN_DOWN(base, RSI_GRANULE_SIZE); + + while (base < end) { + if (WARN_ON(rsi_ipa_state_get(base, end, &ripas, &top))) + break; + if (WARN_ON(top <= base)) + break; + if (ripas != RSI_RIPAS_DEV) + break; + base = top; + } + + return base >= end; +} +EXPORT_SYMBOL(__arm64_is_protected_mmio); + +static int realm_ioremap_hook(phys_addr_t phys, size_t size, pgprot_t *prot) +{ + if (__arm64_is_protected_mmio(phys, size)) + *prot = pgprot_encrypted(*prot); + else + *prot = pgprot_decrypted(*prot); + + return 0; +} + +void __init arm64_rsi_init(void) +{ + if (arm_smccc_1_1_get_conduit() != SMCCC_CONDUIT_SMC) + return; + if (!rsi_version_matches()) + return; + if (WARN_ON(rsi_get_realm_config(&config))) + return; + prot_ns_shared = BIT(config.ipa_bits - 1); + + if (arm64_ioremap_prot_hook_register(realm_ioremap_hook)) + return; + + if (realm_register_memory_enc_ops()) + return; + + arm64_rsi_setup_memory(); + + static_branch_enable(&rsi_present); +} + +static struct platform_device rsi_dev = { + .name = RSI_PDEV_NAME, + .id = PLATFORM_DEVID_NONE +}; + +static int __init arm64_create_dummy_rsi_dev(void) +{ + if (is_realm_world() && + platform_device_register(&rsi_dev)) + pr_err("failed to register rsi platform device\n"); + return 0; +} + +arch_initcall(arm64_create_dummy_rsi_dev) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 65a052bf741f..85104587f849 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -43,6 +43,7 @@ #include <asm/cpu_ops.h> #include <asm/kasan.h> #include <asm/numa.h> +#include <asm/rsi.h> #include <asm/scs.h> #include <asm/sections.h> #include <asm/setup.h> @@ -175,7 +176,11 @@ static void __init setup_machine_fdt(phys_addr_t dt_phys) if (dt_virt) memblock_reserve(dt_phys, size); - if (!dt_virt || !early_init_dt_scan(dt_virt)) { + /* + * dt_virt is a fixmap address, hence __pa(dt_virt) can't be used. + * Pass dt_phys directly. + */ + if (!early_init_dt_scan(dt_virt, dt_phys)) { pr_crit("\n" "Error: invalid device tree blob at physical address %pa (virtual address 0x%px)\n" "The dtb must be 8-byte aligned and must not exceed 2 MB in size\n" @@ -218,9 +223,7 @@ static void __init request_standard_resources(void) num_standard_resources = memblock.memory.cnt; res_size = num_standard_resources * sizeof(*standard_resources); - standard_resources = memblock_alloc(res_size, SMP_CACHE_BYTES); - if (!standard_resources) - panic("%s: Failed to allocate %zu bytes\n", __func__, res_size); + standard_resources = memblock_alloc_or_panic(res_size, SMP_CACHE_BYTES); for_each_mem_region(region) { res = &standard_resources[i++]; @@ -298,8 +301,15 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p) dynamic_scs_init(); /* - * Unmask SError as soon as possible after initializing earlycon so - * that we can report any SErrors immediately. + * The primary CPU enters the kernel with all DAIF exceptions masked. + * + * We must unmask Debug and SError before preemption or scheduling is + * possible to ensure that these are consistently unmasked across + * threads, and we want to unmask SError as soon as possible after + * initializing earlycon so that we can report any SErrors immediately. + * + * IRQ and FIQ will be unmasked after the root irqchip has been + * detected and initialized. */ local_daif_restore(DAIF_PROCCTX_NOIRQ); @@ -344,13 +354,12 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p) else psci_acpi_init(); + arm64_rsi_init(); + init_bootcpu_ops(); smp_init_cpus(); smp_build_mpidr_hash(); - /* Init percpu seeds for random tags after cpus are set up. */ - kasan_init_sw_tags(); - #ifdef CONFIG_ARM64_SW_TTBR0_PAN /* * Make sure init_thread_info.ttbr0 always generates translation diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 4a77f4976e11..99ea26d400ff 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -19,12 +19,14 @@ #include <linux/ratelimit.h> #include <linux/rseq.h> #include <linux/syscalls.h> +#include <linux/pkeys.h> #include <asm/daifflags.h> #include <asm/debug-monitors.h> #include <asm/elf.h> #include <asm/exception.h> #include <asm/cacheflush.h> +#include <asm/gcs.h> #include <asm/ucontext.h> #include <asm/unistd.h> #include <asm/fpsimd.h> @@ -34,6 +36,8 @@ #include <asm/traps.h> #include <asm/vdso.h> +#define GCS_SIGNAL_CAP(addr) (((unsigned long)addr) & GCS_CAP_ADDR_MASK) + /* * Do a signal return; undo the signal stack. These are aligned to 128-bit. */ @@ -42,11 +46,6 @@ struct rt_sigframe { struct ucontext uc; }; -struct frame_record { - u64 fp; - u64 lr; -}; - struct rt_sigframe_user_layout { struct rt_sigframe __user *sigframe; struct frame_record __user *next_frame; @@ -56,19 +55,73 @@ struct rt_sigframe_user_layout { unsigned long fpsimd_offset; unsigned long esr_offset; + unsigned long gcs_offset; unsigned long sve_offset; unsigned long tpidr2_offset; unsigned long za_offset; unsigned long zt_offset; unsigned long fpmr_offset; + unsigned long poe_offset; unsigned long extra_offset; unsigned long end_offset; }; -#define BASE_SIGFRAME_SIZE round_up(sizeof(struct rt_sigframe), 16) +/* + * Holds any EL0-controlled state that influences unprivileged memory accesses. + * This includes both accesses done in userspace and uaccess done in the kernel. + * + * This state needs to be carefully managed to ensure that it doesn't cause + * uaccess to fail when setting up the signal frame, and the signal handler + * itself also expects a well-defined state when entered. + */ +struct user_access_state { + u64 por_el0; +}; + #define TERMINATOR_SIZE round_up(sizeof(struct _aarch64_ctx), 16) #define EXTRA_CONTEXT_SIZE round_up(sizeof(struct extra_context), 16) +/* + * Save the user access state into ua_state and reset it to disable any + * restrictions. + */ +static void save_reset_user_access_state(struct user_access_state *ua_state) +{ + if (system_supports_poe()) { + u64 por_enable_all = 0; + + for (int pkey = 0; pkey < arch_max_pkey(); pkey++) + por_enable_all |= POE_RXW << (pkey * POR_BITS_PER_PKEY); + + ua_state->por_el0 = read_sysreg_s(SYS_POR_EL0); + write_sysreg_s(por_enable_all, SYS_POR_EL0); + /* Ensure that any subsequent uaccess observes the updated value */ + isb(); + } +} + +/* + * Set the user access state for invoking the signal handler. + * + * No uaccess should be done after that function is called. + */ +static void set_handler_user_access_state(void) +{ + if (system_supports_poe()) + write_sysreg_s(POR_EL0_INIT, SYS_POR_EL0); +} + +/* + * Restore the user access state to the values saved in ua_state. + * + * No uaccess should be done after that function is called. + */ +static void restore_user_access_state(const struct user_access_state *ua_state) +{ + if (system_supports_poe()) + write_sysreg_s(ua_state->por_el0, SYS_POR_EL0); +} + static void init_user_layout(struct rt_sigframe_user_layout *user) { const size_t reserved_size = @@ -185,6 +238,10 @@ struct user_ctxs { u32 zt_size; struct fpmr_context __user *fpmr; u32 fpmr_size; + struct poe_context __user *poe; + u32 poe_size; + struct gcs_context __user *gcs; + u32 gcs_size; }; static int preserve_fpsimd_context(struct fpsimd_context __user *ctx) @@ -258,6 +315,34 @@ static int restore_fpmr_context(struct user_ctxs *user) return err; } +static int preserve_poe_context(struct poe_context __user *ctx, + const struct user_access_state *ua_state) +{ + int err = 0; + + __put_user_error(POE_MAGIC, &ctx->head.magic, err); + __put_user_error(sizeof(*ctx), &ctx->head.size, err); + __put_user_error(ua_state->por_el0, &ctx->por_el0, err); + + return err; +} + +static int restore_poe_context(struct user_ctxs *user, + struct user_access_state *ua_state) +{ + u64 por_el0; + int err = 0; + + if (user->poe_size != sizeof(*user->poe)) + return -EINVAL; + + __get_user_error(por_el0, &(user->poe->por_el0), err); + if (!err) + ua_state->por_el0 = por_el0; + + return err; +} + #ifdef CONFIG_ARM64_SVE static int preserve_sve_context(struct sve_context __user *ctx) @@ -604,6 +689,82 @@ extern int restore_zt_context(struct user_ctxs *user); #endif /* ! CONFIG_ARM64_SME */ +#ifdef CONFIG_ARM64_GCS + +static int preserve_gcs_context(struct gcs_context __user *ctx) +{ + int err = 0; + u64 gcspr = read_sysreg_s(SYS_GCSPR_EL0); + + /* + * If GCS is enabled we will add a cap token to the frame, + * include it in the GCSPR_EL0 we report to support stack + * switching via sigreturn if GCS is enabled. We do not allow + * enabling via sigreturn so the token is only relevant for + * threads with GCS enabled. + */ + if (task_gcs_el0_enabled(current)) + gcspr -= 8; + + __put_user_error(GCS_MAGIC, &ctx->head.magic, err); + __put_user_error(sizeof(*ctx), &ctx->head.size, err); + __put_user_error(gcspr, &ctx->gcspr, err); + __put_user_error(0, &ctx->reserved, err); + __put_user_error(current->thread.gcs_el0_mode, + &ctx->features_enabled, err); + + return err; +} + +static int restore_gcs_context(struct user_ctxs *user) +{ + u64 gcspr, enabled; + int err = 0; + + if (user->gcs_size != sizeof(*user->gcs)) + return -EINVAL; + + __get_user_error(gcspr, &user->gcs->gcspr, err); + __get_user_error(enabled, &user->gcs->features_enabled, err); + if (err) + return err; + + /* Don't allow unknown modes */ + if (enabled & ~PR_SHADOW_STACK_SUPPORTED_STATUS_MASK) + return -EINVAL; + + err = gcs_check_locked(current, enabled); + if (err != 0) + return err; + + /* Don't allow enabling */ + if (!task_gcs_el0_enabled(current) && + (enabled & PR_SHADOW_STACK_ENABLE)) + return -EINVAL; + + /* If we are disabling disable everything */ + if (!(enabled & PR_SHADOW_STACK_ENABLE)) + enabled = 0; + + current->thread.gcs_el0_mode = enabled; + + /* + * We let userspace set GCSPR_EL0 to anything here, we will + * validate later in gcs_restore_signal(). + */ + write_sysreg_s(gcspr, SYS_GCSPR_EL0); + + return 0; +} + +#else /* ! CONFIG_ARM64_GCS */ + +/* Turn any non-optimised out attempts to use these into a link error: */ +extern int preserve_gcs_context(void __user *ctx); +extern int restore_gcs_context(struct user_ctxs *user); + +#endif /* ! CONFIG_ARM64_GCS */ + static int parse_user_sigframe(struct user_ctxs *user, struct rt_sigframe __user *sf) { @@ -621,6 +782,8 @@ static int parse_user_sigframe(struct user_ctxs *user, user->za = NULL; user->zt = NULL; user->fpmr = NULL; + user->poe = NULL; + user->gcs = NULL; if (!IS_ALIGNED((unsigned long)base, 16)) goto invalid; @@ -671,6 +834,17 @@ static int parse_user_sigframe(struct user_ctxs *user, /* ignore */ break; + case POE_MAGIC: + if (!system_supports_poe()) + goto invalid; + + if (user->poe) + goto invalid; + + user->poe = (struct poe_context __user *)head; + user->poe_size = size; + break; + case SVE_MAGIC: if (!system_supports_sve() && !system_supports_sme()) goto invalid; @@ -726,6 +900,17 @@ static int parse_user_sigframe(struct user_ctxs *user, user->fpmr_size = size; break; + case GCS_MAGIC: + if (!system_supports_gcs()) + goto invalid; + + if (user->gcs) + goto invalid; + + user->gcs = (struct gcs_context __user *)head; + user->gcs_size = size; + break; + case EXTRA_MAGIC: if (have_extra_context) goto invalid; @@ -809,7 +994,8 @@ invalid: } static int restore_sigframe(struct pt_regs *regs, - struct rt_sigframe __user *sf) + struct rt_sigframe __user *sf, + struct user_access_state *ua_state) { sigset_t set; int i, err; @@ -845,6 +1031,9 @@ static int restore_sigframe(struct pt_regs *regs, err = restore_fpsimd_context(&user); } + if (err == 0 && system_supports_gcs() && user.gcs) + err = restore_gcs_context(&user); + if (err == 0 && system_supports_tpidr2() && user.tpidr2) err = restore_tpidr2_context(&user); @@ -857,13 +1046,69 @@ static int restore_sigframe(struct pt_regs *regs, if (err == 0 && system_supports_sme2() && user.zt) err = restore_zt_context(&user); + if (err == 0 && system_supports_poe() && user.poe) + err = restore_poe_context(&user, ua_state); + return err; } +#ifdef CONFIG_ARM64_GCS +static int gcs_restore_signal(void) +{ + u64 gcspr_el0, cap; + int ret; + + if (!system_supports_gcs()) + return 0; + + if (!(current->thread.gcs_el0_mode & PR_SHADOW_STACK_ENABLE)) + return 0; + + gcspr_el0 = read_sysreg_s(SYS_GCSPR_EL0); + + /* + * Ensure that any changes to the GCS done via GCS operations + * are visible to the normal reads we do to validate the + * token. + */ + gcsb_dsync(); + + /* + * GCSPR_EL0 should be pointing at a capped GCS, read the cap. + * We don't enforce that this is in a GCS page, if it is not + * then faults will be generated on GCS operations - the main + * concern is to protect GCS pages. + */ + ret = copy_from_user(&cap, (unsigned long __user *)gcspr_el0, + sizeof(cap)); + if (ret) + return -EFAULT; + + /* + * Check that the cap is the actual GCS before replacing it. + */ + if (cap != GCS_SIGNAL_CAP(gcspr_el0)) + return -EINVAL; + + /* Invalidate the token to prevent reuse */ + put_user_gcs(0, (unsigned long __user *)gcspr_el0, &ret); + if (ret != 0) + return -EFAULT; + + write_sysreg_s(gcspr_el0 + 8, SYS_GCSPR_EL0); + + return 0; +} + +#else +static int gcs_restore_signal(void) { return 0; } +#endif + SYSCALL_DEFINE0(rt_sigreturn) { struct pt_regs *regs = current_pt_regs(); struct rt_sigframe __user *frame; + struct user_access_state ua_state; /* Always make any pending restarted system calls return -EINTR */ current->restart_block.fn = do_no_restart_syscall; @@ -880,12 +1125,17 @@ SYSCALL_DEFINE0(rt_sigreturn) if (!access_ok(frame, sizeof (*frame))) goto badframe; - if (restore_sigframe(regs, frame)) + if (restore_sigframe(regs, frame, &ua_state)) + goto badframe; + + if (gcs_restore_signal()) goto badframe; if (restore_altstack(&frame->uc.uc_stack)) goto badframe; + restore_user_access_state(&ua_state); + return regs->regs[0]; badframe: @@ -920,6 +1170,15 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user, return err; } +#ifdef CONFIG_ARM64_GCS + if (system_supports_gcs() && (add_all || current->thread.gcspr_el0)) { + err = sigframe_alloc(user, &user->gcs_offset, + sizeof(struct gcs_context)); + if (err) + return err; + } +#endif + if (system_supports_sve() || system_supports_sme()) { unsigned int vq = 0; @@ -980,11 +1239,19 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user, return err; } + if (system_supports_poe()) { + err = sigframe_alloc(user, &user->poe_offset, + sizeof(struct poe_context)); + if (err) + return err; + } + return sigframe_alloc_end(user); } static int setup_sigframe(struct rt_sigframe_user_layout *user, - struct pt_regs *regs, sigset_t *set) + struct pt_regs *regs, sigset_t *set, + const struct user_access_state *ua_state) { int i, err = 0; struct rt_sigframe __user *sf = user->sigframe; @@ -1020,6 +1287,12 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user, __put_user_error(current->thread.fault_code, &esr_ctx->esr, err); } + if (system_supports_gcs() && err == 0 && user->gcs_offset) { + struct gcs_context __user *gcs_ctx = + apply_user_offset(user, user->gcs_offset); + err |= preserve_gcs_context(gcs_ctx); + } + /* Scalable Vector Extension state (including streaming), if present */ if ((system_supports_sve() || system_supports_sme()) && err == 0 && user->sve_offset) { @@ -1042,6 +1315,13 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user, err |= preserve_fpmr_context(fpmr_ctx); } + if (system_supports_poe() && err == 0) { + struct poe_context __user *poe_ctx = + apply_user_offset(user, user->poe_offset); + + err |= preserve_poe_context(poe_ctx, ua_state); + } + /* ZA state if present */ if (system_supports_sme() && err == 0 && user->za_offset) { struct za_context __user *za_ctx = @@ -1130,15 +1410,81 @@ static int get_sigframe(struct rt_sigframe_user_layout *user, return 0; } -static void setup_return(struct pt_regs *regs, struct k_sigaction *ka, +#ifdef CONFIG_ARM64_GCS + +static int gcs_signal_entry(__sigrestore_t sigtramp, struct ksignal *ksig) +{ + u64 gcspr_el0; + int ret = 0; + + if (!system_supports_gcs()) + return 0; + + if (!task_gcs_el0_enabled(current)) + return 0; + + /* + * We are entering a signal handler, current register state is + * active. + */ + gcspr_el0 = read_sysreg_s(SYS_GCSPR_EL0); + + /* + * Push a cap and the GCS entry for the trampoline onto the GCS. + */ + put_user_gcs((unsigned long)sigtramp, + (unsigned long __user *)(gcspr_el0 - 16), &ret); + put_user_gcs(GCS_SIGNAL_CAP(gcspr_el0 - 8), + (unsigned long __user *)(gcspr_el0 - 8), &ret); + if (ret != 0) + return ret; + + gcspr_el0 -= 16; + write_sysreg_s(gcspr_el0, SYS_GCSPR_EL0); + + return 0; +} +#else + +static int gcs_signal_entry(__sigrestore_t sigtramp, struct ksignal *ksig) +{ + return 0; +} + +#endif + +static int setup_return(struct pt_regs *regs, struct ksignal *ksig, struct rt_sigframe_user_layout *user, int usig) { __sigrestore_t sigtramp; + int err; + + if (ksig->ka.sa.sa_flags & SA_RESTORER) + sigtramp = ksig->ka.sa.sa_restorer; + else + sigtramp = VDSO_SYMBOL(current->mm->context.vdso, sigtramp); + + err = gcs_signal_entry(sigtramp, ksig); + if (err) + return err; + + /* + * We must not fail from this point onwards. We are going to update + * registers, including SP, in order to invoke the signal handler. If + * we failed and attempted to deliver a nested SIGSEGV to a handler + * after that point, the subsequent sigreturn would end up restoring + * the (partial) state for the original signal handler. + */ regs->regs[0] = usig; + if (ksig->ka.sa.sa_flags & SA_SIGINFO) { + regs->regs[1] = (unsigned long)&user->sigframe->info; + regs->regs[2] = (unsigned long)&user->sigframe->uc; + } regs->sp = (unsigned long)user->sigframe; regs->regs[29] = (unsigned long)&user->next_frame->fp; - regs->pc = (unsigned long)ka->sa.sa_handler; + regs->regs[30] = (unsigned long)sigtramp; + regs->pc = (unsigned long)ksig->ka.sa.sa_handler; /* * Signal delivery is a (wacky) indirect function call in @@ -1178,12 +1524,7 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka, sme_smstop(); } - if (ka->sa.sa_flags & SA_RESTORER) - sigtramp = ka->sa.sa_restorer; - else - sigtramp = VDSO_SYMBOL(current->mm->context.vdso, sigtramp); - - regs->regs[30] = (unsigned long)sigtramp; + return 0; } static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set, @@ -1191,6 +1532,7 @@ static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set, { struct rt_sigframe_user_layout user; struct rt_sigframe __user *frame; + struct user_access_state ua_state; int err = 0; fpsimd_signal_preserve_current_state(); @@ -1198,21 +1540,29 @@ static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set, if (get_sigframe(&user, ksig, regs)) return 1; + save_reset_user_access_state(&ua_state); frame = user.sigframe; __put_user_error(0, &frame->uc.uc_flags, err); __put_user_error(NULL, &frame->uc.uc_link, err); err |= __save_altstack(&frame->uc.uc_stack, regs->sp); - err |= setup_sigframe(&user, regs, set); - if (err == 0) { - setup_return(regs, &ksig->ka, &user, usig); - if (ksig->ka.sa.sa_flags & SA_SIGINFO) { - err |= copy_siginfo_to_user(&frame->info, &ksig->info); - regs->regs[1] = (unsigned long)&frame->info; - regs->regs[2] = (unsigned long)&frame->uc; - } - } + err |= setup_sigframe(&user, regs, set, &ua_state); + if (ksig->ka.sa.sa_flags & SA_SIGINFO) + err |= copy_siginfo_to_user(&frame->info, &ksig->info); + + if (err == 0) + err = setup_return(regs, ksig, &user, usig); + + /* + * We must not fail if setup_return() succeeded - see comment at the + * beginning of setup_return(). + */ + + if (err == 0) + set_handler_user_access_state(); + else + restore_user_access_state(&ua_state); return err; } diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index bbd542704730..81e798b6dada 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -17,7 +17,7 @@ #include <asm/signal32.h> #include <asm/traps.h> #include <linux/uaccess.h> -#include <asm/unistd.h> +#include <asm/unistd_compat_32.h> #include <asm/vdso.h> struct compat_vfp_sigframe { @@ -451,7 +451,7 @@ int compat_setup_frame(int usig, struct ksignal *ksig, sigset_t *set, void compat_setup_restart_syscall(struct pt_regs *regs) { - regs->regs[7] = __NR_compat_restart_syscall; + regs->regs[7] = __NR_compat32_restart_syscall; } /* diff --git a/arch/arm64/kernel/sigreturn32.S b/arch/arm64/kernel/sigreturn32.S index ccbd4aab4ba4..6f486b95b413 100644 --- a/arch/arm64/kernel/sigreturn32.S +++ b/arch/arm64/kernel/sigreturn32.S @@ -13,7 +13,7 @@ * need two 16-bit instructions. */ -#include <asm/unistd.h> +#include <asm/unistd_compat_32.h> .section .rodata .globl __aarch32_sigret_code_start @@ -22,26 +22,26 @@ __aarch32_sigret_code_start: /* * ARM Code */ - .byte __NR_compat_sigreturn, 0x70, 0xa0, 0xe3 // mov r7, #__NR_compat_sigreturn - .byte __NR_compat_sigreturn, 0x00, 0x00, 0xef // svc #__NR_compat_sigreturn + .byte __NR_compat32_sigreturn, 0x70, 0xa0, 0xe3 // mov r7, #__NR_compat32_sigreturn + .byte __NR_compat32_sigreturn, 0x00, 0x00, 0xef // svc #__NR_compat32_sigreturn /* * Thumb code */ - .byte __NR_compat_sigreturn, 0x27 // svc #__NR_compat_sigreturn - .byte __NR_compat_sigreturn, 0xdf // mov r7, #__NR_compat_sigreturn + .byte __NR_compat32_sigreturn, 0x27 // svc #__NR_compat32_sigreturn + .byte __NR_compat32_sigreturn, 0xdf // mov r7, #__NR_compat32_sigreturn /* * ARM code */ - .byte __NR_compat_rt_sigreturn, 0x70, 0xa0, 0xe3 // mov r7, #__NR_compat_rt_sigreturn - .byte __NR_compat_rt_sigreturn, 0x00, 0x00, 0xef // svc #__NR_compat_rt_sigreturn + .byte __NR_compat32_rt_sigreturn, 0x70, 0xa0, 0xe3 // mov r7, #__NR_compat32_rt_sigreturn + .byte __NR_compat32_rt_sigreturn, 0x00, 0x00, 0xef // svc #__NR_compat32_rt_sigreturn /* * Thumb code */ - .byte __NR_compat_rt_sigreturn, 0x27 // svc #__NR_compat_rt_sigreturn - .byte __NR_compat_rt_sigreturn, 0xdf // mov r7, #__NR_compat_rt_sigreturn + .byte __NR_compat32_rt_sigreturn, 0x27 // svc #__NR_compat32_rt_sigreturn + .byte __NR_compat32_rt_sigreturn, 0xdf // mov r7, #__NR_compat32_rt_sigreturn .globl __aarch32_sigret_code_end __aarch32_sigret_code_end: diff --git a/arch/arm64/kernel/smccc-call.S b/arch/arm64/kernel/smccc-call.S index 487381164ff6..2def9d0dd3dd 100644 --- a/arch/arm64/kernel/smccc-call.S +++ b/arch/arm64/kernel/smccc-call.S @@ -7,48 +7,19 @@ #include <asm/asm-offsets.h> #include <asm/assembler.h> -#include <asm/thread_info.h> - -/* - * If we have SMCCC v1.3 and (as is likely) no SVE state in - * the registers then set the SMCCC hint bit to say there's no - * need to preserve it. Do this by directly adjusting the SMCCC - * function value which is already stored in x0 ready to be called. - */ -SYM_FUNC_START(__arm_smccc_sve_check) - - ldr_l x16, smccc_has_sve_hint - cbz x16, 2f - - get_current_task x16 - ldr x16, [x16, #TSK_TI_FLAGS] - tbnz x16, #TIF_FOREIGN_FPSTATE, 1f // Any live FP state? - tbnz x16, #TIF_SVE, 2f // Does that state include SVE? - -1: orr x0, x0, ARM_SMCCC_1_3_SVE_HINT - -2: ret -SYM_FUNC_END(__arm_smccc_sve_check) -EXPORT_SYMBOL(__arm_smccc_sve_check) .macro SMCCC instr - stp x29, x30, [sp, #-16]! - mov x29, sp -alternative_if ARM64_SVE - bl __arm_smccc_sve_check -alternative_else_nop_endif \instr #0 - ldr x4, [sp, #16] + ldr x4, [sp] stp x0, x1, [x4, #ARM_SMCCC_RES_X0_OFFS] stp x2, x3, [x4, #ARM_SMCCC_RES_X2_OFFS] - ldr x4, [sp, #24] + ldr x4, [sp, #8] cbz x4, 1f /* no quirk structure */ ldr x9, [x4, #ARM_SMCCC_QUIRK_ID_OFFS] cmp x9, #ARM_SMCCC_QUIRK_QCOM_A6 b.ne 1f str x6, [x4, ARM_SMCCC_QUIRK_STATE_OFFS] -1: ldp x29, x30, [sp], #16 - ret +1: ret .endm /* diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 4ced34f62dab..3b3f6b56e733 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -55,9 +55,6 @@ #include <trace/events/ipi.h> -DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number); -EXPORT_PER_CPU_SYMBOL(cpu_number); - /* * as from 2.5, kernels no longer have an init_tasks structure * so we need some other way of telling a new secondary core @@ -71,7 +68,7 @@ enum ipi_msg_type { IPI_RESCHEDULE, IPI_CALL_FUNC, IPI_CPU_STOP, - IPI_CPU_CRASH_STOP, + IPI_CPU_STOP_NMI, IPI_TIMER, IPI_IRQ_WORK, NR_IPI, @@ -88,6 +85,8 @@ static int ipi_irq_base __ro_after_init; static int nr_ipi __ro_after_init = NR_IPI; static struct irq_desc *ipi_desc[MAX_IPI] __ro_after_init; +static bool crash_stop; + static void ipi_setup(int cpu); #ifdef CONFIG_HOTPLUG_CPU @@ -132,7 +131,8 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) /* Now bring the CPU into our world */ ret = boot_secondary(cpu, idle); if (ret) { - pr_err("CPU%u: failed to boot: %d\n", cpu, ret); + if (ret != -EPERM) + pr_err("CPU%u: failed to boot: %d\n", cpu, ret); return ret; } @@ -264,6 +264,13 @@ asmlinkage notrace void secondary_start_kernel(void) set_cpu_online(cpu, true); complete(&cpu_running); + /* + * Secondary CPUs enter the kernel with all DAIF exceptions masked. + * + * As with setup_arch() we must unmask Debug and SError exceptions, and + * as the root irqchip has already been detected and initialized we can + * unmask IRQ and FIQ at the same time. + */ local_daif_restore(DAIF_PROCCTX); /* @@ -462,6 +469,8 @@ void __init smp_prepare_boot_cpu(void) init_gic_priority_masking(); kasan_init_hw_tags(); + /* Init percpu seeds for random tags after cpus are set up. */ + kasan_init_sw_tags(); } /* @@ -503,6 +512,59 @@ static int __init smp_cpu_setup(int cpu) static bool bootcpu_valid __initdata; static unsigned int cpu_count = 1; +int arch_register_cpu(int cpu) +{ + acpi_handle acpi_handle = acpi_get_processor_handle(cpu); + struct cpu *c = &per_cpu(cpu_devices, cpu); + + if (!acpi_disabled && !acpi_handle && + IS_ENABLED(CONFIG_ACPI_HOTPLUG_CPU)) + return -EPROBE_DEFER; + +#ifdef CONFIG_ACPI_HOTPLUG_CPU + /* For now block anything that looks like physical CPU Hotplug */ + if (invalid_logical_cpuid(cpu) || !cpu_present(cpu)) { + pr_err_once("Changing CPU present bit is not supported\n"); + return -ENODEV; + } +#endif + + /* + * Availability of the acpi handle is sufficient to establish + * that _STA has aleady been checked. No need to recheck here. + */ + c->hotpluggable = arch_cpu_is_hotpluggable(cpu); + + return register_cpu(c, cpu); +} + +#ifdef CONFIG_ACPI_HOTPLUG_CPU +void arch_unregister_cpu(int cpu) +{ + acpi_handle acpi_handle = acpi_get_processor_handle(cpu); + struct cpu *c = &per_cpu(cpu_devices, cpu); + acpi_status status; + unsigned long long sta; + + if (!acpi_handle) { + pr_err_once("Removing a CPU without associated ACPI handle\n"); + return; + } + + status = acpi_evaluate_integer(acpi_handle, "_STA", NULL, &sta); + if (ACPI_FAILURE(status)) + return; + + /* For now do not allow anything that looks like physical CPU HP */ + if (cpu_present(cpu) && !(sta & ACPI_STA_DEVICE_PRESENT)) { + pr_err_once("Changing CPU present bit is not supported\n"); + return; + } + + unregister_cpu(c); +} +#endif /* CONFIG_ACPI_HOTPLUG_CPU */ + #ifdef CONFIG_ACPI static struct acpi_madt_generic_interrupt cpu_madt_gicc[NR_CPUS]; @@ -523,7 +585,8 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor) { u64 hwid = processor->arm_mpidr; - if (!acpi_gicc_is_usable(processor)) { + if (!(processor->flags & + (ACPI_MADT_ENABLED | ACPI_MADT_GICC_ONLINE_CAPABLE))) { pr_debug("skipping disabled CPU entry with 0x%llx MPIDR\n", hwid); return; } @@ -742,8 +805,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus) */ for_each_possible_cpu(cpu) { - per_cpu(cpu_number, cpu) = cpu; - if (cpu == smp_processor_id()) continue; @@ -760,13 +821,15 @@ void __init smp_prepare_cpus(unsigned int max_cpus) } } -static const char *ipi_types[NR_IPI] __tracepoint_string = { +static const char *ipi_types[MAX_IPI] __tracepoint_string = { [IPI_RESCHEDULE] = "Rescheduling interrupts", [IPI_CALL_FUNC] = "Function call interrupts", [IPI_CPU_STOP] = "CPU stop interrupts", - [IPI_CPU_CRASH_STOP] = "CPU stop (for crash dump) interrupts", + [IPI_CPU_STOP_NMI] = "CPU stop NMIs", [IPI_TIMER] = "Timer broadcast interrupts", [IPI_IRQ_WORK] = "IRQ work interrupts", + [IPI_CPU_BACKTRACE] = "CPU backtrace interrupts", + [IPI_KGDB_ROUNDUP] = "KGDB roundup interrupts", }; static void smp_cross_call(const struct cpumask *target, unsigned int ipinr); @@ -777,7 +840,7 @@ int arch_show_interrupts(struct seq_file *p, int prec) { unsigned int cpu, i; - for (i = 0; i < NR_IPI; i++) { + for (i = 0; i < MAX_IPI; i++) { seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i, prec >= 4 ? " " : ""); for_each_online_cpu(cpu) @@ -806,9 +869,9 @@ void arch_irq_work_raise(void) } #endif -static void __noreturn local_cpu_stop(void) +static void __noreturn local_cpu_stop(unsigned int cpu) { - set_cpu_online(smp_processor_id(), false); + set_cpu_online(cpu, false); local_daif_mask(); sdei_mask_local_cpu(); @@ -822,21 +885,26 @@ static void __noreturn local_cpu_stop(void) */ void __noreturn panic_smp_self_stop(void) { - local_cpu_stop(); + local_cpu_stop(smp_processor_id()); } -#ifdef CONFIG_KEXEC_CORE -static atomic_t waiting_for_crash_ipi = ATOMIC_INIT(0); -#endif - static void __noreturn ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs) { #ifdef CONFIG_KEXEC_CORE + /* + * Use local_daif_mask() instead of local_irq_disable() to make sure + * that pseudo-NMIs are disabled. The "crash stop" code starts with + * an IRQ and falls back to NMI (which might be pseudo). If the IRQ + * finally goes through right as we're timing out then the NMI could + * interrupt us. It's better to prevent the NMI and let the IRQ + * finish since the pt_regs will be better. + */ + local_daif_mask(); + crash_save_cpu(regs, cpu); - atomic_dec(&waiting_for_crash_ipi); + set_cpu_online(cpu, false); - local_irq_disable(); sdei_mask_local_cpu(); if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) @@ -901,14 +969,12 @@ static void do_handle_IPI(int ipinr) break; case IPI_CPU_STOP: - local_cpu_stop(); - break; - - case IPI_CPU_CRASH_STOP: - if (IS_ENABLED(CONFIG_KEXEC_CORE)) { + case IPI_CPU_STOP_NMI: + if (IS_ENABLED(CONFIG_KEXEC_CORE) && crash_stop) { ipi_cpu_crash_stop(cpu, get_irq_regs()); - unreachable(); + } else { + local_cpu_stop(cpu); } break; @@ -963,8 +1029,7 @@ static bool ipi_should_be_nmi(enum ipi_msg_type ipi) return false; switch (ipi) { - case IPI_CPU_STOP: - case IPI_CPU_CRASH_STOP: + case IPI_CPU_STOP_NMI: case IPI_CPU_BACKTRACE: case IPI_KGDB_ROUNDUP: return true; @@ -1021,12 +1086,12 @@ void __init set_smp_ipi_range(int ipi_base, int n) if (ipi_should_be_nmi(i)) { err = request_percpu_nmi(ipi_base + i, ipi_handler, - "IPI", &cpu_number); + "IPI", &irq_stat); WARN(err, "Could not request IPI %d as NMI, err=%d\n", i, err); } else { err = request_percpu_irq(ipi_base + i, ipi_handler, - "IPI", &cpu_number); + "IPI", &irq_stat); WARN(err, "Could not request IPI %d as IRQ, err=%d\n", i, err); } @@ -1077,79 +1142,109 @@ static inline unsigned int num_other_online_cpus(void) void smp_send_stop(void) { + static unsigned long stop_in_progress; + cpumask_t mask; unsigned long timeout; - if (num_other_online_cpus()) { - cpumask_t mask; + /* + * If this cpu is the only one alive at this point in time, online or + * not, there are no stop messages to be sent around, so just back out. + */ + if (num_other_online_cpus() == 0) + goto skip_ipi; - cpumask_copy(&mask, cpu_online_mask); - cpumask_clear_cpu(smp_processor_id(), &mask); + /* Only proceed if this is the first CPU to reach this code */ + if (test_and_set_bit(0, &stop_in_progress)) + return; - if (system_state <= SYSTEM_RUNNING) - pr_crit("SMP: stopping secondary CPUs\n"); - smp_cross_call(&mask, IPI_CPU_STOP); - } + /* + * Send an IPI to all currently online CPUs except the CPU running + * this code. + * + * NOTE: we don't do anything here to prevent other CPUs from coming + * online after we snapshot `cpu_online_mask`. Ideally, the calling code + * should do something to prevent other CPUs from coming up. This code + * can be called in the panic path and thus it doesn't seem wise to + * grab the CPU hotplug mutex ourselves. Worst case: + * - If a CPU comes online as we're running, we'll likely notice it + * during the 1 second wait below and then we'll catch it when we try + * with an NMI (assuming NMIs are enabled) since we re-snapshot the + * mask before sending an NMI. + * - If we leave the function and see that CPUs are still online we'll + * at least print a warning. Especially without NMIs this function + * isn't foolproof anyway so calling code will just have to accept + * the fact that there could be cases where a CPU can't be stopped. + */ + cpumask_copy(&mask, cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), &mask); + + if (system_state <= SYSTEM_RUNNING) + pr_crit("SMP: stopping secondary CPUs\n"); - /* Wait up to one second for other CPUs to stop */ + /* + * Start with a normal IPI and wait up to one second for other CPUs to + * stop. We do this first because it gives other processors a chance + * to exit critical sections / drop locks and makes the rest of the + * stop process (especially console flush) more robust. + */ + smp_cross_call(&mask, IPI_CPU_STOP); timeout = USEC_PER_SEC; while (num_other_online_cpus() && timeout--) udelay(1); - if (num_other_online_cpus()) + /* + * If CPUs are still online, try an NMI. There's no excuse for this to + * be slow, so we only give them an extra 10 ms to respond. + */ + if (num_other_online_cpus() && ipi_should_be_nmi(IPI_CPU_STOP_NMI)) { + smp_rmb(); + cpumask_copy(&mask, cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), &mask); + + pr_info("SMP: retry stop with NMI for CPUs %*pbl\n", + cpumask_pr_args(&mask)); + + smp_cross_call(&mask, IPI_CPU_STOP_NMI); + timeout = USEC_PER_MSEC * 10; + while (num_other_online_cpus() && timeout--) + udelay(1); + } + + if (num_other_online_cpus()) { + smp_rmb(); + cpumask_copy(&mask, cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), &mask); + pr_warn("SMP: failed to stop secondary CPUs %*pbl\n", - cpumask_pr_args(cpu_online_mask)); + cpumask_pr_args(&mask)); + } +skip_ipi: sdei_mask_local_cpu(); } #ifdef CONFIG_KEXEC_CORE void crash_smp_send_stop(void) { - static int cpus_stopped; - cpumask_t mask; - unsigned long timeout; - /* * This function can be called twice in panic path, but obviously * we execute this only once. + * + * We use this same boolean to tell whether the IPI we send was a + * stop or a "crash stop". */ - if (cpus_stopped) + if (crash_stop) return; + crash_stop = 1; - cpus_stopped = 1; - - /* - * If this cpu is the only one alive at this point in time, online or - * not, there are no stop messages to be sent around, so just back out. - */ - if (num_other_online_cpus() == 0) - goto skip_ipi; - - cpumask_copy(&mask, cpu_online_mask); - cpumask_clear_cpu(smp_processor_id(), &mask); - - atomic_set(&waiting_for_crash_ipi, num_other_online_cpus()); + smp_send_stop(); - pr_crit("SMP: stopping secondary CPUs\n"); - smp_cross_call(&mask, IPI_CPU_CRASH_STOP); - - /* Wait up to one second for other CPUs to stop */ - timeout = USEC_PER_SEC; - while ((atomic_read(&waiting_for_crash_ipi) > 0) && timeout--) - udelay(1); - - if (atomic_read(&waiting_for_crash_ipi) > 0) - pr_warn("SMP: failed to stop secondary CPUs %*pbl\n", - cpumask_pr_args(&mask)); - -skip_ipi: - sdei_mask_local_cpu(); sdei_handler_abort(); } bool smp_crash_stop_failed(void) { - return (atomic_read(&waiting_for_crash_ipi) > 0); + return num_other_online_cpus() != 0; } #endif diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 684c26511696..1d9d51d7627f 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -20,11 +20,28 @@ #include <asm/stack_pointer.h> #include <asm/stacktrace.h> +enum kunwind_source { + KUNWIND_SOURCE_UNKNOWN, + KUNWIND_SOURCE_FRAME, + KUNWIND_SOURCE_CALLER, + KUNWIND_SOURCE_TASK, + KUNWIND_SOURCE_REGS_PC, +}; + +union unwind_flags { + unsigned long all; + struct { + unsigned long fgraph : 1, + kretprobe : 1; + }; +}; + /* * Kernel unwind state * * @common: Common unwind state. * @task: The task being unwound. + * @graph_idx: Used by ftrace_graph_ret_addr() for optimized stack unwinding. * @kr_cur: When KRETPROBES is selected, holds the kretprobe instance * associated with the most recently encountered replacement lr * value. @@ -32,9 +49,13 @@ struct kunwind_state { struct unwind_state common; struct task_struct *task; + int graph_idx; #ifdef CONFIG_KRETPROBES struct llist_node *kr_cur; #endif + enum kunwind_source source; + union unwind_flags flags; + struct pt_regs *regs; }; static __always_inline void @@ -43,6 +64,9 @@ kunwind_init(struct kunwind_state *state, { unwind_init_common(&state->common); state->task = task; + state->source = KUNWIND_SOURCE_UNKNOWN; + state->flags.all = 0; + state->regs = NULL; } /* @@ -58,8 +82,10 @@ kunwind_init_from_regs(struct kunwind_state *state, { kunwind_init(state, current); + state->regs = regs; state->common.fp = regs->regs[29]; state->common.pc = regs->pc; + state->source = KUNWIND_SOURCE_REGS_PC; } /* @@ -77,6 +103,7 @@ kunwind_init_from_caller(struct kunwind_state *state) state->common.fp = (unsigned long)__builtin_frame_address(1); state->common.pc = (unsigned long)__builtin_return_address(0); + state->source = KUNWIND_SOURCE_CALLER; } /* @@ -97,6 +124,7 @@ kunwind_init_from_task(struct kunwind_state *state, state->common.fp = thread_saved_fp(task); state->common.pc = thread_saved_pc(task); + state->source = KUNWIND_SOURCE_TASK; } static __always_inline int @@ -106,12 +134,15 @@ kunwind_recover_return_address(struct kunwind_state *state) if (state->task->ret_stack && (state->common.pc == (unsigned long)return_to_handler)) { unsigned long orig_pc; - orig_pc = ftrace_graph_ret_addr(state->task, NULL, + orig_pc = ftrace_graph_ret_addr(state->task, &state->graph_idx, state->common.pc, (void *)state->common.fp); - if (WARN_ON_ONCE(state->common.pc == orig_pc)) + if (state->common.pc == orig_pc) { + WARN_ON_ONCE(state->task == current); return -EINVAL; + } state->common.pc = orig_pc; + state->flags.fgraph = 1; } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ @@ -122,12 +153,95 @@ kunwind_recover_return_address(struct kunwind_state *state) (void *)state->common.fp, &state->kr_cur); state->common.pc = orig_pc; + state->flags.kretprobe = 1; } #endif /* CONFIG_KRETPROBES */ return 0; } +static __always_inline +int kunwind_next_regs_pc(struct kunwind_state *state) +{ + struct stack_info *info; + unsigned long fp = state->common.fp; + struct pt_regs *regs; + + regs = container_of((u64 *)fp, struct pt_regs, stackframe.record.fp); + + info = unwind_find_stack(&state->common, (unsigned long)regs, sizeof(*regs)); + if (!info) + return -EINVAL; + + unwind_consume_stack(&state->common, info, (unsigned long)regs, + sizeof(*regs)); + + state->regs = regs; + state->common.pc = regs->pc; + state->common.fp = regs->regs[29]; + state->regs = NULL; + state->source = KUNWIND_SOURCE_REGS_PC; + return 0; +} + +static __always_inline int +kunwind_next_frame_record_meta(struct kunwind_state *state) +{ + struct task_struct *tsk = state->task; + unsigned long fp = state->common.fp; + struct frame_record_meta *meta; + struct stack_info *info; + + info = unwind_find_stack(&state->common, fp, sizeof(*meta)); + if (!info) + return -EINVAL; + + meta = (struct frame_record_meta *)fp; + switch (READ_ONCE(meta->type)) { + case FRAME_META_TYPE_FINAL: + if (meta == &task_pt_regs(tsk)->stackframe) + return -ENOENT; + WARN_ON_ONCE(tsk == current); + return -EINVAL; + case FRAME_META_TYPE_PT_REGS: + return kunwind_next_regs_pc(state); + default: + WARN_ON_ONCE(tsk == current); + return -EINVAL; + } +} + +static __always_inline int +kunwind_next_frame_record(struct kunwind_state *state) +{ + unsigned long fp = state->common.fp; + struct frame_record *record; + struct stack_info *info; + unsigned long new_fp, new_pc; + + if (fp & 0x7) + return -EINVAL; + + info = unwind_find_stack(&state->common, fp, sizeof(*record)); + if (!info) + return -EINVAL; + + record = (struct frame_record *)fp; + new_fp = READ_ONCE(record->fp); + new_pc = READ_ONCE(record->lr); + + if (!new_fp && !new_pc) + return kunwind_next_frame_record_meta(state); + + unwind_consume_stack(&state->common, info, fp, sizeof(*record)); + + state->common.fp = new_fp; + state->common.pc = new_pc; + state->source = KUNWIND_SOURCE_FRAME; + + return 0; +} + /* * Unwind from one frame record (A) to the next frame record (B). * @@ -138,15 +252,21 @@ kunwind_recover_return_address(struct kunwind_state *state) static __always_inline int kunwind_next(struct kunwind_state *state) { - struct task_struct *tsk = state->task; - unsigned long fp = state->common.fp; int err; - /* Final frame; nothing to unwind */ - if (fp == (unsigned long)task_pt_regs(tsk)->stackframe) - return -ENOENT; + state->flags.all = 0; + + switch (state->source) { + case KUNWIND_SOURCE_FRAME: + case KUNWIND_SOURCE_CALLER: + case KUNWIND_SOURCE_TASK: + case KUNWIND_SOURCE_REGS_PC: + err = kunwind_next_frame_record(state); + break; + default: + err = -EINVAL; + } - err = unwind_next_frame_record(&state->common); if (err) return err; @@ -292,10 +412,32 @@ noinline noinstr void arch_bpf_stack_walk(bool (*consume_entry)(void *cookie, u6 kunwind_stack_walk(arch_bpf_unwind_consume_entry, &data, current, NULL); } -static bool dump_backtrace_entry(void *arg, unsigned long where) +static const char *state_source_string(const struct kunwind_state *state) +{ + switch (state->source) { + case KUNWIND_SOURCE_FRAME: return NULL; + case KUNWIND_SOURCE_CALLER: return "C"; + case KUNWIND_SOURCE_TASK: return "T"; + case KUNWIND_SOURCE_REGS_PC: return "P"; + default: return "U"; + } +} + +static bool dump_backtrace_entry(const struct kunwind_state *state, void *arg) { + const char *source = state_source_string(state); + union unwind_flags flags = state->flags; + bool has_info = source || flags.all; char *loglvl = arg; - printk("%s %pSb\n", loglvl, (void *)where); + + printk("%s %pSb%s%s%s%s%s\n", loglvl, + (void *)state->common.pc, + has_info ? " (" : "", + source ? source : "", + flags.fgraph ? "F" : "", + flags.kretprobe ? "K" : "", + has_info ? ")" : ""); + return true; } @@ -314,7 +456,7 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk, return; printk("%sCall trace:\n", loglvl); - arch_stack_walk(dump_backtrace_entry, (void *)loglvl, tsk, regs); + kunwind_stack_walk(dump_backtrace_entry, (void *)loglvl, tsk, regs); put_task_stack(tsk); } @@ -324,3 +466,123 @@ void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl) dump_backtrace(NULL, tsk, loglvl); barrier(); } + +/* + * The struct defined for userspace stack frame in AARCH64 mode. + */ +struct frame_tail { + struct frame_tail __user *fp; + unsigned long lr; +} __attribute__((packed)); + +/* + * Get the return address for a single stackframe and return a pointer to the + * next frame tail. + */ +static struct frame_tail __user * +unwind_user_frame(struct frame_tail __user *tail, void *cookie, + stack_trace_consume_fn consume_entry) +{ + struct frame_tail buftail; + unsigned long err; + unsigned long lr; + + /* Also check accessibility of one struct frame_tail beyond */ + if (!access_ok(tail, sizeof(buftail))) + return NULL; + + pagefault_disable(); + err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail)); + pagefault_enable(); + + if (err) + return NULL; + + lr = ptrauth_strip_user_insn_pac(buftail.lr); + + if (!consume_entry(cookie, lr)) + return NULL; + + /* + * Frame pointers should strictly progress back up the stack + * (towards higher addresses). + */ + if (tail >= buftail.fp) + return NULL; + + return buftail.fp; +} + +#ifdef CONFIG_COMPAT +/* + * The registers we're interested in are at the end of the variable + * length saved register structure. The fp points at the end of this + * structure so the address of this struct is: + * (struct compat_frame_tail *)(xxx->fp)-1 + * + * This code has been adapted from the ARM OProfile support. + */ +struct compat_frame_tail { + compat_uptr_t fp; /* a (struct compat_frame_tail *) in compat mode */ + u32 sp; + u32 lr; +} __attribute__((packed)); + +static struct compat_frame_tail __user * +unwind_compat_user_frame(struct compat_frame_tail __user *tail, void *cookie, + stack_trace_consume_fn consume_entry) +{ + struct compat_frame_tail buftail; + unsigned long err; + + /* Also check accessibility of one struct frame_tail beyond */ + if (!access_ok(tail, sizeof(buftail))) + return NULL; + + pagefault_disable(); + err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail)); + pagefault_enable(); + + if (err) + return NULL; + + if (!consume_entry(cookie, buftail.lr)) + return NULL; + + /* + * Frame pointers should strictly progress back up the stack + * (towards higher addresses). + */ + if (tail + 1 >= (struct compat_frame_tail __user *) + compat_ptr(buftail.fp)) + return NULL; + + return (struct compat_frame_tail __user *)compat_ptr(buftail.fp) - 1; +} +#endif /* CONFIG_COMPAT */ + + +void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie, + const struct pt_regs *regs) +{ + if (!consume_entry(cookie, regs->pc)) + return; + + if (!compat_user_mode(regs)) { + /* AARCH64 mode */ + struct frame_tail __user *tail; + + tail = (struct frame_tail __user *)regs->regs[29]; + while (tail && !((unsigned long)tail & 0x7)) + tail = unwind_user_frame(tail, cookie, consume_entry); + } else { +#ifdef CONFIG_COMPAT + /* AARCH32 compat mode */ + struct compat_frame_tail __user *tail; + + tail = (struct compat_frame_tail __user *)regs->compat_fp - 1; + while (tail && !((unsigned long)tail & 0x3)) + tail = unwind_compat_user_frame(tail, cookie, consume_entry); +#endif + } +} diff --git a/arch/arm64/kernel/sys.c b/arch/arm64/kernel/sys.c index d5ffaaab31a7..f08408b6e826 100644 --- a/arch/arm64/kernel/sys.c +++ b/arch/arm64/kernel/sys.c @@ -48,14 +48,16 @@ asmlinkage long __arm64_sys_ni_syscall(const struct pt_regs *__unused) */ #define __arm64_sys_personality __arm64_sys_arm64_personality +#define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, native) + #undef __SYSCALL #define __SYSCALL(nr, sym) asmlinkage long __arm64_##sym(const struct pt_regs *); -#include <asm/unistd.h> +#include <asm/syscall_table_64.h> #undef __SYSCALL #define __SYSCALL(nr, sym) [nr] = __arm64_##sym, const syscall_fn_t sys_call_table[__NR_syscalls] = { [0 ... __NR_syscalls - 1] = __arm64_sys_ni_syscall, -#include <asm/unistd.h> +#include <asm/syscall_table_64.h> }; diff --git a/arch/arm64/kernel/sys32.c b/arch/arm64/kernel/sys32.c index fc40386afb1b..96bcfb907443 100644 --- a/arch/arm64/kernel/sys32.c +++ b/arch/arm64/kernel/sys32.c @@ -5,17 +5,12 @@ * Copyright (C) 2015 ARM Ltd. */ -/* - * Needed to avoid conflicting __NR_* macros between uapi/asm/unistd.h and - * asm/unistd32.h. - */ -#define __COMPAT_SYSCALL_NR - #include <linux/compat.h> #include <linux/compiler.h> #include <linux/syscalls.h> #include <asm/syscall.h> +#include <asm/unistd_compat_32.h> asmlinkage long compat_sys_sigreturn(void); asmlinkage long compat_sys_rt_sigreturn(void); @@ -122,14 +117,16 @@ COMPAT_SYSCALL_DEFINE6(aarch32_fallocate, int, fd, int, mode, return ksys_fallocate(fd, mode, arg_u64(offset), arg_u64(len)); } +#define __SYSCALL_WITH_COMPAT(nr, sym, compat) __SYSCALL(nr, compat) + #undef __SYSCALL #define __SYSCALL(nr, sym) asmlinkage long __arm64_##sym(const struct pt_regs *); -#include <asm/unistd32.h> +#include <asm/syscall_table_32.h> #undef __SYSCALL #define __SYSCALL(nr, sym) [nr] = __arm64_##sym, -const syscall_fn_t compat_sys_call_table[__NR_compat_syscalls] = { - [0 ... __NR_compat_syscalls - 1] = __arm64_sys_ni_syscall, -#include <asm/unistd32.h> +const syscall_fn_t compat_sys_call_table[__NR_compat32_syscalls] = { + [0 ... __NR_compat32_syscalls - 1] = __arm64_sys_ni_syscall, +#include <asm/syscall_table_32.h> }; diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index ad198262b981..c442fcec6b9e 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -14,6 +14,7 @@ #include <asm/syscall.h> #include <asm/thread_info.h> #include <asm/unistd.h> +#include <asm/unistd_compat_32.h> long compat_arm_syscall(struct pt_regs *regs, int scno); long sys_ni_syscall(void); @@ -53,17 +54,15 @@ static void invoke_syscall(struct pt_regs *regs, unsigned int scno, syscall_set_return_value(current, regs, 0, ret); /* - * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(), - * but not enough for arm64 stack utilization comfort. To keep - * reasonable stack head room, reduce the maximum offset to 9 bits. + * This value will get limited by KSTACK_OFFSET_MAX(), which is 10 + * bits. The actual entropy will be further reduced by the compiler + * when applying stack alignment constraints: the AAPCS mandates a + * 16-byte aligned SP at function boundaries, which will remove the + * 4 low bits from any entropy chosen here. * - * The actual entropy will be further reduced by the compiler when - * applying stack alignment constraints: the AAPCS mandates a - * 16-byte (i.e. 4-bit) aligned SP at function boundaries. - * - * The resulting 5 bits of entropy is seen in SP[8:4]. + * The resulting 6 bits of entropy is seen in SP[9:4]. */ - choose_random_kstack_offset(get_random_u16() & 0x1FF); + choose_random_kstack_offset(get_random_u16()); } static inline bool has_syscall_work(unsigned long flags) @@ -155,7 +154,7 @@ void do_el0_svc(struct pt_regs *regs) #ifdef CONFIG_COMPAT void do_el0_svc_compat(struct pt_regs *regs) { - el0_svc_common(regs, regs->regs[7], __NR_compat_syscalls, + el0_svc_common(regs, regs->regs[7], __NR_compat32_syscalls, compat_sys_call_table); } #endif diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 1a2c72f3e7f8..cb180684d10d 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -194,12 +194,19 @@ static void amu_fie_setup(const struct cpumask *cpus) int cpu; /* We are already set since the last insmod of cpufreq driver */ - if (unlikely(cpumask_subset(cpus, amu_fie_cpus))) + if (cpumask_available(amu_fie_cpus) && + unlikely(cpumask_subset(cpus, amu_fie_cpus))) return; - for_each_cpu(cpu, cpus) { + for_each_cpu(cpu, cpus) if (!freq_counters_valid(cpu)) return; + + if (!cpumask_available(amu_fie_cpus) && + !zalloc_cpumask_var(&amu_fie_cpus, GFP_KERNEL)) { + WARN_ONCE(1, "Failed to allocate FIE cpumask for CPUs[%*pbl]\n", + cpumask_pr_args(cpus)); + return; } cpumask_or(amu_fie_cpus, amu_fie_cpus, cpus); @@ -237,17 +244,8 @@ static struct notifier_block init_amu_fie_notifier = { static int __init init_amu_fie(void) { - int ret; - - if (!zalloc_cpumask_var(&amu_fie_cpus, GFP_KERNEL)) - return -ENOMEM; - - ret = cpufreq_register_notifier(&init_amu_fie_notifier, + return cpufreq_register_notifier(&init_amu_fie_notifier, CPUFREQ_POLICY_NOTIFIER); - if (ret) - free_cpumask_var(amu_fie_cpus); - - return ret; } core_initcall(init_amu_fie); diff --git a/arch/arm64/kernel/trace-events-emulation.h b/arch/arm64/kernel/trace-events-emulation.h index 6c40f58b844a..c51b547b583e 100644 --- a/arch/arm64/kernel/trace-events-emulation.h +++ b/arch/arm64/kernel/trace-events-emulation.h @@ -18,7 +18,7 @@ TRACE_EVENT(instruction_emulation, ), TP_fast_assign( - __assign_str(instr, instr); + __assign_str(instr); __entry->addr = addr; ), diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 215e6d7f2df8..4e26bd356a48 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -41,7 +41,7 @@ #include <asm/extable.h> #include <asm/insn.h> #include <asm/kprobes.h> -#include <asm/patching.h> +#include <asm/text-patching.h> #include <asm/traps.h> #include <asm/smp.h> #include <asm/stack_pointer.h> @@ -273,6 +273,12 @@ void arm64_force_sig_fault(int signo, int code, unsigned long far, force_sig_fault(signo, code, (void __user *)far); } +void arm64_force_sig_fault_pkey(unsigned long far, const char *str, int pkey) +{ + arm64_show_signal(SIGSEGV, str); + force_sig_pkuerr((void __user *)far, pkey); +} + void arm64_force_sig_mceerr(int code, unsigned long far, short lsb, const char *str) { @@ -500,6 +506,16 @@ void do_el1_bti(struct pt_regs *regs, unsigned long esr) die("Oops - BTI", regs, esr); } +void do_el0_gcs(struct pt_regs *regs, unsigned long esr) +{ + force_signal_inject(SIGSEGV, SEGV_CPERR, regs->pc, 0); +} + +void do_el1_gcs(struct pt_regs *regs, unsigned long esr) +{ + die("Oops - GCS", regs, esr); +} + void do_el0_fpac(struct pt_regs *regs, unsigned long esr) { force_signal_inject(SIGILL, ILL_ILLOPN, regs->pc, esr); @@ -525,6 +541,13 @@ void do_el0_mops(struct pt_regs *regs, unsigned long esr) user_fastforward_single_step(current); } +void do_el1_mops(struct pt_regs *regs, unsigned long esr) +{ + arm64_mops_reset_regs(®s->user_regs, esr); + + kernel_fastforward_single_step(regs); +} + #define __user_cache_maint(insn, address, res) \ if (address >= TASK_SIZE_MAX) { \ res = -EFAULT; \ @@ -601,18 +624,26 @@ static void ctr_read_handler(unsigned long esr, struct pt_regs *regs) static void cntvct_read_handler(unsigned long esr, struct pt_regs *regs) { - int rt = ESR_ELx_SYS64_ISS_RT(esr); + if (test_thread_flag(TIF_TSC_SIGSEGV)) { + force_sig(SIGSEGV); + } else { + int rt = ESR_ELx_SYS64_ISS_RT(esr); - pt_regs_write_reg(regs, rt, arch_timer_read_counter()); - arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); + pt_regs_write_reg(regs, rt, arch_timer_read_counter()); + arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); + } } static void cntfrq_read_handler(unsigned long esr, struct pt_regs *regs) { - int rt = ESR_ELx_SYS64_ISS_RT(esr); + if (test_thread_flag(TIF_TSC_SIGSEGV)) { + force_sig(SIGSEGV); + } else { + int rt = ESR_ELx_SYS64_ISS_RT(esr); - pt_regs_write_reg(regs, rt, arch_timer_get_rate()); - arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); + pt_regs_write_reg(regs, rt, arch_timer_get_rate()); + arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); + } } static void mrs_handler(unsigned long esr, struct pt_regs *regs) @@ -838,6 +869,7 @@ static const char *esr_class_str[] = { [ESR_ELx_EC_MOPS] = "MOPS", [ESR_ELx_EC_FP_EXC32] = "FP (AArch32)", [ESR_ELx_EC_FP_EXC64] = "FP (AArch64)", + [ESR_ELx_EC_GCS] = "Guarded Control Stack", [ESR_ELx_EC_SERROR] = "SError", [ESR_ELx_EC_BREAKPT_LOW] = "Breakpoint (lower EL)", [ESR_ELx_EC_BREAKPT_CUR] = "Breakpoint (current EL)", @@ -1105,8 +1137,6 @@ static struct break_hook ubsan_break_hook = { }; #endif -#define esr_comment(esr) ((esr) & ESR_ELx_BRK64_ISS_COMMENT_MASK) - /* * Initial handler for AArch64 BRK exceptions * This handler only used until debug_traps_init(). @@ -1115,15 +1145,15 @@ int __init early_brk64(unsigned long addr, unsigned long esr, struct pt_regs *regs) { #ifdef CONFIG_CFI_CLANG - if ((esr_comment(esr) & ~CFI_BRK_IMM_MASK) == CFI_BRK_IMM_BASE) + if (esr_is_cfi_brk(esr)) return cfi_handler(regs, esr) != DBG_HOOK_HANDLED; #endif #ifdef CONFIG_KASAN_SW_TAGS - if ((esr_comment(esr) & ~KASAN_BRK_MASK) == KASAN_BRK_IMM) + if ((esr_brk_comment(esr) & ~KASAN_BRK_MASK) == KASAN_BRK_IMM) return kasan_handler(regs, esr) != DBG_HOOK_HANDLED; #endif #ifdef CONFIG_UBSAN_TRAP - if ((esr_comment(esr) & ~UBSAN_BRK_MASK) == UBSAN_BRK_IMM) + if ((esr_brk_comment(esr) & ~UBSAN_BRK_MASK) == UBSAN_BRK_IMM) return ubsan_handler(regs, esr) != DBG_HOOK_HANDLED; #endif return bug_handler(regs, esr) != DBG_HOOK_HANDLED; diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 89b6e7840002..e8ed8e5b713b 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -19,7 +19,6 @@ #include <linux/signal.h> #include <linux/slab.h> #include <linux/time_namespace.h> -#include <linux/timekeeper_internal.h> #include <linux/vmalloc.h> #include <vdso/datapage.h> #include <vdso/helpers.h> @@ -34,19 +33,11 @@ enum vdso_abi { VDSO_ABI_AA32, }; -enum vvar_pages { - VVAR_DATA_PAGE_OFFSET, - VVAR_TIMENS_PAGE_OFFSET, - VVAR_NR_PAGES, -}; - struct vdso_abi_info { const char *name; const char *vdso_code_start; const char *vdso_code_end; unsigned long vdso_pages; - /* Data Mapping */ - struct vm_special_mapping *dm; /* Code Mapping */ struct vm_special_mapping *cm; }; @@ -119,6 +110,8 @@ struct vdso_data *arch_get_vdso_data(void *vvar_page) return (struct vdso_data *)(vvar_page); } +static const struct vm_special_mapping vvar_map; + /* * The vvar mapping contains data for a specific time namespace, so when a task * changes namespace we must unmap its vvar data for the old namespace. @@ -135,12 +128,8 @@ int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) mmap_read_lock(mm); for_each_vma(vmi, vma) { - if (vma_is_special_mapping(vma, vdso_info[VDSO_ABI_AA64].dm)) + if (vma_is_special_mapping(vma, &vvar_map)) zap_vma_pages(vma); -#ifdef CONFIG_COMPAT_VDSO - if (vma_is_special_mapping(vma, vdso_info[VDSO_ABI_AA32].dm)) - zap_vma_pages(vma); -#endif } mmap_read_unlock(mm); @@ -182,6 +171,11 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, return vmf_insert_pfn(vma, vmf->address, pfn); } +static const struct vm_special_mapping vvar_map = { + .name = "[vvar]", + .fault = vvar_fault, +}; + static int __setup_additional_pages(enum vdso_abi abi, struct mm_struct *mm, struct linux_binprm *bprm, @@ -205,7 +199,7 @@ static int __setup_additional_pages(enum vdso_abi abi, ret = _install_special_mapping(mm, vdso_base, VVAR_NR_PAGES * PAGE_SIZE, VM_READ|VM_MAYREAD|VM_PFNMAP, - vdso_info[abi].dm); + &vvar_map); if (IS_ERR(ret)) goto up_fail; @@ -235,7 +229,6 @@ up_fail: enum aarch32_map { AA32_MAP_VECTORS, /* kuser helpers */ AA32_MAP_SIGPAGE, - AA32_MAP_VVAR, AA32_MAP_VDSO, }; @@ -260,10 +253,6 @@ static struct vm_special_mapping aarch32_vdso_maps[] = { .pages = &aarch32_sig_page, .mremap = aarch32_sigpage_mremap, }, - [AA32_MAP_VVAR] = { - .name = "[vvar]", - .fault = vvar_fault, - }, [AA32_MAP_VDSO] = { .name = "[vdso]", .mremap = vdso_mremap, @@ -313,7 +302,6 @@ static int __init __aarch32_alloc_vdso_pages(void) if (!IS_ENABLED(CONFIG_COMPAT_VDSO)) return 0; - vdso_info[VDSO_ABI_AA32].dm = &aarch32_vdso_maps[AA32_MAP_VVAR]; vdso_info[VDSO_ABI_AA32].cm = &aarch32_vdso_maps[AA32_MAP_VDSO]; return __vdso_init(VDSO_ABI_AA32); @@ -408,26 +396,14 @@ out: } #endif /* CONFIG_COMPAT */ -enum aarch64_map { - AA64_MAP_VVAR, - AA64_MAP_VDSO, -}; - -static struct vm_special_mapping aarch64_vdso_maps[] __ro_after_init = { - [AA64_MAP_VVAR] = { - .name = "[vvar]", - .fault = vvar_fault, - }, - [AA64_MAP_VDSO] = { - .name = "[vdso]", - .mremap = vdso_mremap, - }, +static struct vm_special_mapping aarch64_vdso_map __ro_after_init = { + .name = "[vdso]", + .mremap = vdso_mremap, }; static int __init vdso_init(void) { - vdso_info[VDSO_ABI_AA64].dm = &aarch64_vdso_maps[AA64_MAP_VVAR]; - vdso_info[VDSO_ABI_AA64].cm = &aarch64_vdso_maps[AA64_MAP_VDSO]; + vdso_info[VDSO_ABI_AA64].cm = &aarch64_vdso_map; return __vdso_init(VDSO_ABI_AA64); } diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index 8818287f1095..35685c036044 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -9,7 +9,7 @@ # Include the generic Makefile to check the built vdso. include $(srctree)/lib/vdso/Makefile -obj-vdso := vgettimeofday.o note.o sigreturn.o +obj-vdso := vgettimeofday.o note.o sigreturn.o vgetrandom.o vgetrandom-chacha.o # Build rules targets := $(obj-vdso) vdso.so vdso.so.dbg @@ -21,7 +21,7 @@ btildflags-$(CONFIG_ARM64_BTI_KERNEL) += -z force-bti # potential future proofing if we end up with internal calls to the exported # routines, as x86 does (see 6f121e548f83 ("x86, vdso: Reimplement vdso.so # preparation in build-time C")). -ldflags-y := -shared -soname=linux-vdso.so.1 --hash-style=sysv \ +ldflags-y := -shared -soname=linux-vdso.so.1 \ -Bsymbolic --build-id=sha1 -n $(btildflags-y) ifdef CONFIG_LD_ORPHAN_WARN @@ -34,26 +34,27 @@ ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18 ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO # -Wmissing-prototypes and -Wmissing-declarations are removed from -# the CFLAGS of vgettimeofday.c to make possible to build the -# kernel with CONFIG_WERROR enabled. -CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \ - $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) \ - $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \ - -Wmissing-prototypes -Wmissing-declarations -KASAN_SANITIZE := n -KCSAN_SANITIZE := n -UBSAN_SANITIZE := n -OBJECT_FILES_NON_STANDARD := y -KCOV_INSTRUMENT := n - -CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -fasynchronous-unwind-tables +# the CFLAGS to make possible to build the kernel with CONFIG_WERROR enabled. +CC_FLAGS_REMOVE_VDSO := $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \ + $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) \ + $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \ + -Wmissing-prototypes -Wmissing-declarations + +CC_FLAGS_ADD_VDSO := -O2 -mcmodel=tiny -fasynchronous-unwind-tables + +CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_REMOVE_VDSO) +CFLAGS_REMOVE_vgetrandom.o = $(CC_FLAGS_REMOVE_VDSO) + +CFLAGS_vgettimeofday.o = $(CC_FLAGS_ADD_VDSO) +CFLAGS_vgetrandom.o = $(CC_FLAGS_ADD_VDSO) ifneq ($(c-gettimeofday-y),) CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y) endif -# Disable gcov profiling for VDSO code -GCOV_PROFILE := n +ifneq ($(c-getrandom-y),) + CFLAGS_vgetrandom.o += -include $(c-getrandom-y) +endif targets += vdso.lds CPPFLAGS_vdso.lds += -P -C -U$(ARCH) @@ -68,7 +69,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE $(call if_changed,objcopy) # Generate VDSO offsets using helper script -gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh +gen-vdsosym := $(src)/gen_vdso_offsets.sh quiet_cmd_vdsosym = VDSOSYM $@ cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S index 45354f2ddf70..47ad6944f9f0 100644 --- a/arch/arm64/kernel/vdso/vdso.lds.S +++ b/arch/arm64/kernel/vdso/vdso.lds.S @@ -11,7 +11,9 @@ #include <linux/const.h> #include <asm/page.h> #include <asm/vdso.h> +#include <asm/vdso/vsyscall.h> #include <asm-generic/vmlinux.lds.h> +#include <vdso/datapage.h> OUTPUT_FORMAT("elf64-littleaarch64", "elf64-bigaarch64", "elf64-littleaarch64") OUTPUT_ARCH(aarch64) @@ -19,10 +21,11 @@ OUTPUT_ARCH(aarch64) SECTIONS { PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); + PROVIDE(_vdso_rng_data = _vdso_data + __VDSO_RND_DATA_OFFSET); #ifdef CONFIG_TIME_NS PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); #endif - . = VDSO_LBASE + SIZEOF_HEADERS; + . = SIZEOF_HEADERS; .hash : { *(.hash) } :text .gnu.hash : { *(.gnu.hash) } @@ -38,6 +41,7 @@ SECTIONS */ /DISCARD/ : { *(.note.GNU-stack .note.gnu.property) + *(.ARM.attributes) } .note : { *(.note.*) } :text :note @@ -102,6 +106,7 @@ VERSION __kernel_gettimeofday; __kernel_clock_gettime; __kernel_clock_getres; + __kernel_getrandom; local: *; }; } diff --git a/arch/arm64/kernel/vdso/vgetrandom-chacha.S b/arch/arm64/kernel/vdso/vgetrandom-chacha.S new file mode 100644 index 000000000000..67890b445309 --- /dev/null +++ b/arch/arm64/kernel/vdso/vgetrandom-chacha.S @@ -0,0 +1,172 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/linkage.h> +#include <asm/cache.h> +#include <asm/assembler.h> + + .text + +#define state0 v0 +#define state1 v1 +#define state2 v2 +#define state3 v3 +#define copy0 v4 +#define copy0_q q4 +#define copy1 v5 +#define copy2 v6 +#define copy3 v7 +#define copy3_d d7 +#define one_d d16 +#define one_q q16 +#define one_v v16 +#define tmp v17 +#define rot8 v18 + +/* + * ARM64 ChaCha20 implementation meant for vDSO. Produces a given positive + * number of blocks of output with nonce 0, taking an input key and 8-bytes + * counter. Importantly does not spill to the stack. + * + * This implementation avoids d8-d15 because they are callee-save in user + * space. + * + * void __arch_chacha20_blocks_nostack(uint8_t *dst_bytes, + * const uint8_t *key, + * uint32_t *counter, + * size_t nblocks) + * + * x0: output bytes + * x1: 32-byte key input + * x2: 8-byte counter input/output + * x3: number of 64-byte block to write to output + */ +SYM_FUNC_START(__arch_chacha20_blocks_nostack) + + /* copy0 = "expand 32-byte k" */ + mov_q x8, 0x3320646e61707865 + mov_q x9, 0x6b20657479622d32 + mov copy0.d[0], x8 + mov copy0.d[1], x9 + + /* copy1,copy2 = key */ + ld1 { copy1.4s, copy2.4s }, [x1] + /* copy3 = counter || zero nonce */ + ld1 { copy3.2s }, [x2] + + movi one_v.2s, #1 + uzp1 one_v.4s, one_v.4s, one_v.4s + +.Lblock: + /* copy state to auxiliary vectors for the final add after the permute. */ + mov state0.16b, copy0.16b + mov state1.16b, copy1.16b + mov state2.16b, copy2.16b + mov state3.16b, copy3.16b + + mov w4, 20 +.Lpermute: + /* + * Permute one 64-byte block where the state matrix is stored in the four NEON + * registers state0-state3. It performs matrix operations on four words in parallel, + * but requires shuffling to rearrange the words after each round. + */ + +.Ldoubleround: + /* state0 += state1, state3 = rotl32(state3 ^ state0, 16) */ + add state0.4s, state0.4s, state1.4s + eor state3.16b, state3.16b, state0.16b + rev32 state3.8h, state3.8h + + /* state2 += state3, state1 = rotl32(state1 ^ state2, 12) */ + add state2.4s, state2.4s, state3.4s + eor tmp.16b, state1.16b, state2.16b + shl state1.4s, tmp.4s, #12 + sri state1.4s, tmp.4s, #20 + + /* state0 += state1, state3 = rotl32(state3 ^ state0, 8) */ + add state0.4s, state0.4s, state1.4s + eor tmp.16b, state3.16b, state0.16b + shl state3.4s, tmp.4s, #8 + sri state3.4s, tmp.4s, #24 + + /* state2 += state3, state1 = rotl32(state1 ^ state2, 7) */ + add state2.4s, state2.4s, state3.4s + eor tmp.16b, state1.16b, state2.16b + shl state1.4s, tmp.4s, #7 + sri state1.4s, tmp.4s, #25 + + /* state1[0,1,2,3] = state1[1,2,3,0] */ + ext state1.16b, state1.16b, state1.16b, #4 + /* state2[0,1,2,3] = state2[2,3,0,1] */ + ext state2.16b, state2.16b, state2.16b, #8 + /* state3[0,1,2,3] = state3[1,2,3,0] */ + ext state3.16b, state3.16b, state3.16b, #12 + + /* state0 += state1, state3 = rotl32(state3 ^ state0, 16) */ + add state0.4s, state0.4s, state1.4s + eor state3.16b, state3.16b, state0.16b + rev32 state3.8h, state3.8h + + /* state2 += state3, state1 = rotl32(state1 ^ state2, 12) */ + add state2.4s, state2.4s, state3.4s + eor tmp.16b, state1.16b, state2.16b + shl state1.4s, tmp.4s, #12 + sri state1.4s, tmp.4s, #20 + + /* state0 += state1, state3 = rotl32(state3 ^ state0, 8) */ + add state0.4s, state0.4s, state1.4s + eor tmp.16b, state3.16b, state0.16b + shl state3.4s, tmp.4s, #8 + sri state3.4s, tmp.4s, #24 + + /* state2 += state3, state1 = rotl32(state1 ^ state2, 7) */ + add state2.4s, state2.4s, state3.4s + eor tmp.16b, state1.16b, state2.16b + shl state1.4s, tmp.4s, #7 + sri state1.4s, tmp.4s, #25 + + /* state1[0,1,2,3] = state1[3,0,1,2] */ + ext state1.16b, state1.16b, state1.16b, #12 + /* state2[0,1,2,3] = state2[2,3,0,1] */ + ext state2.16b, state2.16b, state2.16b, #8 + /* state3[0,1,2,3] = state3[1,2,3,0] */ + ext state3.16b, state3.16b, state3.16b, #4 + + subs w4, w4, #2 + b.ne .Ldoubleround + + /* output0 = state0 + state0 */ + add state0.4s, state0.4s, copy0.4s + /* output1 = state1 + state1 */ + add state1.4s, state1.4s, copy1.4s + /* output2 = state2 + state2 */ + add state2.4s, state2.4s, copy2.4s + /* output2 = state3 + state3 */ + add state3.4s, state3.4s, copy3.4s + st1 { state0.16b - state3.16b }, [x0] + + /* + * ++copy3.counter, the 'add' clears the upper half of the SIMD register + * which is the expected behaviour here. + */ + add copy3_d, copy3_d, one_d + + /* output += 64, --nblocks */ + add x0, x0, 64 + subs x3, x3, #1 + b.ne .Lblock + + /* counter = copy3.counter */ + st1 { copy3.2s }, [x2] + + /* Zero out the potentially sensitive regs, in case nothing uses these again. */ + movi state0.16b, #0 + movi state1.16b, #0 + movi state2.16b, #0 + movi state3.16b, #0 + movi copy1.16b, #0 + movi copy2.16b, #0 + ret +SYM_FUNC_END(__arch_chacha20_blocks_nostack) + +emit_aarch64_feature_1_and diff --git a/arch/arm64/kernel/vdso/vgetrandom.c b/arch/arm64/kernel/vdso/vgetrandom.c new file mode 100644 index 000000000000..832fe195292b --- /dev/null +++ b/arch/arm64/kernel/vdso/vgetrandom.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <uapi/asm-generic/errno.h> + +typeof(__cvdso_getrandom) __kernel_getrandom; + +ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len) +{ + if (alternative_has_cap_likely(ARM64_HAS_FPSIMD)) + return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len); + + if (unlikely(opaque_len == ~0UL && !buffer && !len && !flags)) + return -ENOSYS; + return getrandom_syscall(buffer, len, flags); +} diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index f5f80fdce0fe..25a2cb6317f3 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -98,7 +98,7 @@ VDSO_AFLAGS += -D__ASSEMBLY__ # From arm vDSO Makefile VDSO_LDFLAGS += -Bsymbolic --no-undefined -soname=linux-vdso.so.1 VDSO_LDFLAGS += -z max-page-size=4096 -z common-page-size=4096 -VDSO_LDFLAGS += -shared --hash-style=sysv --build-id=sha1 +VDSO_LDFLAGS += -shared --build-id=sha1 VDSO_LDFLAGS += --orphan-handling=$(CONFIG_LD_ORPHAN_WARN_LEVEL) @@ -136,7 +136,7 @@ $(obj)/vdso32.so.dbg: $(obj)/vdso.so.raw $(obj)/$(munge) FORCE $(call if_changed,vdsomunge) # Link rule for the .so file, .lds has to be first -$(obj)/vdso.so.raw: $(src)/vdso.lds $(obj-vdso) FORCE +$(obj)/vdso.so.raw: $(obj)/vdso.lds $(obj-vdso) FORCE $(call if_changed,vdsold_and_vdso_check) # Compilation rules for the vDSO sources diff --git a/arch/arm64/kernel/vdso32/vdso.lds.S b/arch/arm64/kernel/vdso32/vdso.lds.S index 8d95d7d35057..732702a187e9 100644 --- a/arch/arm64/kernel/vdso32/vdso.lds.S +++ b/arch/arm64/kernel/vdso32/vdso.lds.S @@ -22,7 +22,7 @@ SECTIONS #ifdef CONFIG_TIME_NS PROVIDE_HIDDEN(_timens_data = _vdso_data + PAGE_SIZE); #endif - . = VDSO_LBASE + SIZEOF_HEADERS; + . = SIZEOF_HEADERS; .hash : { *(.hash) } :text .gnu.hash : { *(.gnu.hash) } diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 755a22d4f840..e73326bd3ff7 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -162,6 +162,7 @@ SECTIONS /DISCARD/ : { *(.interp .dynamic) *(.dynsym .dynstr .hash .gnu.hash) + *(.ARM.attributes) } . = KIMAGE_VADDR; @@ -264,6 +265,8 @@ SECTIONS EXIT_DATA } + RUNTIME_CONST_VARIABLES + PERCPU_SECTION(L1_CACHE_BYTES) HYPERVISOR_PERCPU_SECTION @@ -285,6 +288,9 @@ SECTIONS __initdata_end = .; __init_end = .; + .data.rel.ro : { *(.data.rel.ro) } + ASSERT(SIZEOF(.data.rel.ro) == 0, "Unexpected RELRO detected!") + _data = .; _sdata = .; RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_ALIGN) @@ -341,9 +347,6 @@ SECTIONS *(.plt) *(.plt.*) *(.iplt) *(.igot .igot.plt) } ASSERT(SIZEOF(.plt) == 0, "Unexpected run-time procedure linkages detected!") - - .data.rel.ro : { *(.data.rel.ro) } - ASSERT(SIZEOF(.data.rel.ro) == 0, "Unexpected RELRO detected!") } #include "image-vars.h" |