diff options
Diffstat (limited to 'arch/arm64/kernel')
43 files changed, 2073 insertions, 1197 deletions
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index a561cbb91d4d..bbaf0bc4ad60 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -3,8 +3,6 @@ # Makefile for the linux kernel. # -CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET) -AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET) CFLAGS_armv8_deprecated.o := -I$(src) CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) @@ -19,7 +17,7 @@ obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ return_address.o cpuinfo.o cpu_errata.o \ cpufeature.o alternative.o cacheinfo.o \ smp.o smp_spin_table.o topology.o smccc-call.o \ - syscall.o + syscall.o proton-pack.o targets += efi-entry.o @@ -59,9 +57,9 @@ arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_CRASH_CORE) += crash_core.o obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o -obj-$(CONFIG_ARM64_SSBD) += ssbd.o obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o +obj-$(CONFIG_ARM64_MTE) += mte.o obj-y += vdso/ probes/ obj-$(CONFIG_COMPAT_VDSO) += vdso32/ diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index 455966401102..cada0b816c8a 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -298,8 +298,21 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) case EFI_BOOT_SERVICES_DATA: case EFI_CONVENTIONAL_MEMORY: case EFI_PERSISTENT_MEMORY: - pr_warn(FW_BUG "requested region covers kernel memory @ %pa\n", &phys); - return NULL; + if (memblock_is_map_memory(phys) || + !memblock_is_region_memory(phys, size)) { + pr_warn(FW_BUG "requested region covers kernel memory @ %pa\n", &phys); + return NULL; + } + /* + * Mapping kernel memory is permitted if the region in + * question is covered by a single memblock with the + * NOMAP attribute set: this enables the use of ACPI + * table overrides passed via initramfs, which are + * reserved in memory using arch_reserve_mem_area() + * below. As this particular use case only requires + * read access, fall through to the R/O mapping case. + */ + fallthrough; case EFI_RUNTIME_SERVICES_CODE: /* @@ -322,7 +335,7 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) */ if (memblock_is_map_memory(phys)) return (void __iomem *)__phys_to_virt(phys); - /* fall through */ + fallthrough; default: if (region->attribute & EFI_MEMORY_WB) @@ -388,3 +401,8 @@ int apei_claim_sea(struct pt_regs *regs) return err; } + +void arch_reserve_mem_area(acpi_physical_address addr, size_t size) +{ + memblock_mark_nomap(addr, size); +} diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S index 4a18055b2ff9..37721eb6f9a1 100644 --- a/arch/arm64/kernel/cpu-reset.S +++ b/arch/arm64/kernel/cpu-reset.S @@ -35,6 +35,10 @@ SYM_CODE_START(__cpu_soft_restart) mov_q x13, SCTLR_ELx_FLAGS bic x12, x12, x13 pre_disable_mmu_workaround + /* + * either disable EL1&0 translation regime or disable EL2&0 translation + * regime if HCR_EL2.E2H == 1 + */ msr sctlr_el1, x12 isb diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 6bd1d3ad037a..24d75af344b1 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -106,365 +106,6 @@ cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *cap) sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0); } -atomic_t arm64_el2_vector_last_slot = ATOMIC_INIT(-1); - -#include <asm/mmu_context.h> -#include <asm/cacheflush.h> - -DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); - -#ifdef CONFIG_KVM_INDIRECT_VECTORS -static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start, - const char *hyp_vecs_end) -{ - void *dst = lm_alias(__bp_harden_hyp_vecs + slot * SZ_2K); - int i; - - for (i = 0; i < SZ_2K; i += 0x80) - memcpy(dst + i, hyp_vecs_start, hyp_vecs_end - hyp_vecs_start); - - __flush_icache_range((uintptr_t)dst, (uintptr_t)dst + SZ_2K); -} - -static void install_bp_hardening_cb(bp_hardening_cb_t fn, - const char *hyp_vecs_start, - const char *hyp_vecs_end) -{ - static DEFINE_RAW_SPINLOCK(bp_lock); - int cpu, slot = -1; - - /* - * detect_harden_bp_fw() passes NULL for the hyp_vecs start/end if - * we're a guest. Skip the hyp-vectors work. - */ - if (!hyp_vecs_start) { - __this_cpu_write(bp_hardening_data.fn, fn); - return; - } - - raw_spin_lock(&bp_lock); - for_each_possible_cpu(cpu) { - if (per_cpu(bp_hardening_data.fn, cpu) == fn) { - slot = per_cpu(bp_hardening_data.hyp_vectors_slot, cpu); - break; - } - } - - if (slot == -1) { - slot = atomic_inc_return(&arm64_el2_vector_last_slot); - BUG_ON(slot >= BP_HARDEN_EL2_SLOTS); - __copy_hyp_vect_bpi(slot, hyp_vecs_start, hyp_vecs_end); - } - - __this_cpu_write(bp_hardening_data.hyp_vectors_slot, slot); - __this_cpu_write(bp_hardening_data.fn, fn); - raw_spin_unlock(&bp_lock); -} -#else -static void install_bp_hardening_cb(bp_hardening_cb_t fn, - const char *hyp_vecs_start, - const char *hyp_vecs_end) -{ - __this_cpu_write(bp_hardening_data.fn, fn); -} -#endif /* CONFIG_KVM_INDIRECT_VECTORS */ - -#include <linux/arm-smccc.h> - -static void __maybe_unused call_smc_arch_workaround_1(void) -{ - arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL); -} - -static void call_hvc_arch_workaround_1(void) -{ - arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL); -} - -static void qcom_link_stack_sanitization(void) -{ - u64 tmp; - - asm volatile("mov %0, x30 \n" - ".rept 16 \n" - "bl . + 4 \n" - ".endr \n" - "mov x30, %0 \n" - : "=&r" (tmp)); -} - -static bool __nospectre_v2; -static int __init parse_nospectre_v2(char *str) -{ - __nospectre_v2 = true; - return 0; -} -early_param("nospectre_v2", parse_nospectre_v2); - -/* - * -1: No workaround - * 0: No workaround required - * 1: Workaround installed - */ -static int detect_harden_bp_fw(void) -{ - bp_hardening_cb_t cb; - void *smccc_start, *smccc_end; - struct arm_smccc_res res; - u32 midr = read_cpuid_id(); - - arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, - ARM_SMCCC_ARCH_WORKAROUND_1, &res); - - switch ((int)res.a0) { - case 1: - /* Firmware says we're just fine */ - return 0; - case 0: - break; - default: - return -1; - } - - switch (arm_smccc_1_1_get_conduit()) { - case SMCCC_CONDUIT_HVC: - cb = call_hvc_arch_workaround_1; - /* This is a guest, no need to patch KVM vectors */ - smccc_start = NULL; - smccc_end = NULL; - break; - -#if IS_ENABLED(CONFIG_KVM) - case SMCCC_CONDUIT_SMC: - cb = call_smc_arch_workaround_1; - smccc_start = __smccc_workaround_1_smc; - smccc_end = __smccc_workaround_1_smc + - __SMCCC_WORKAROUND_1_SMC_SZ; - break; -#endif - - default: - return -1; - } - - if (((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR) || - ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1)) - cb = qcom_link_stack_sanitization; - - if (IS_ENABLED(CONFIG_HARDEN_BRANCH_PREDICTOR)) - install_bp_hardening_cb(cb, smccc_start, smccc_end); - - return 1; -} - -DEFINE_PER_CPU_READ_MOSTLY(u64, arm64_ssbd_callback_required); - -int ssbd_state __read_mostly = ARM64_SSBD_KERNEL; -static bool __ssb_safe = true; - -static const struct ssbd_options { - const char *str; - int state; -} ssbd_options[] = { - { "force-on", ARM64_SSBD_FORCE_ENABLE, }, - { "force-off", ARM64_SSBD_FORCE_DISABLE, }, - { "kernel", ARM64_SSBD_KERNEL, }, -}; - -static int __init ssbd_cfg(char *buf) -{ - int i; - - if (!buf || !buf[0]) - return -EINVAL; - - for (i = 0; i < ARRAY_SIZE(ssbd_options); i++) { - int len = strlen(ssbd_options[i].str); - - if (strncmp(buf, ssbd_options[i].str, len)) - continue; - - ssbd_state = ssbd_options[i].state; - return 0; - } - - return -EINVAL; -} -early_param("ssbd", ssbd_cfg); - -void __init arm64_update_smccc_conduit(struct alt_instr *alt, - __le32 *origptr, __le32 *updptr, - int nr_inst) -{ - u32 insn; - - BUG_ON(nr_inst != 1); - - switch (arm_smccc_1_1_get_conduit()) { - case SMCCC_CONDUIT_HVC: - insn = aarch64_insn_get_hvc_value(); - break; - case SMCCC_CONDUIT_SMC: - insn = aarch64_insn_get_smc_value(); - break; - default: - return; - } - - *updptr = cpu_to_le32(insn); -} - -void __init arm64_enable_wa2_handling(struct alt_instr *alt, - __le32 *origptr, __le32 *updptr, - int nr_inst) -{ - BUG_ON(nr_inst != 1); - /* - * Only allow mitigation on EL1 entry/exit and guest - * ARCH_WORKAROUND_2 handling if the SSBD state allows it to - * be flipped. - */ - if (arm64_get_ssbd_state() == ARM64_SSBD_KERNEL) - *updptr = cpu_to_le32(aarch64_insn_gen_nop()); -} - -void arm64_set_ssbd_mitigation(bool state) -{ - int conduit; - - if (!IS_ENABLED(CONFIG_ARM64_SSBD)) { - pr_info_once("SSBD disabled by kernel configuration\n"); - return; - } - - if (this_cpu_has_cap(ARM64_SSBS)) { - if (state) - asm volatile(SET_PSTATE_SSBS(0)); - else - asm volatile(SET_PSTATE_SSBS(1)); - return; - } - - conduit = arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_WORKAROUND_2, state, - NULL); - - WARN_ON_ONCE(conduit == SMCCC_CONDUIT_NONE); -} - -static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, - int scope) -{ - struct arm_smccc_res res; - bool required = true; - s32 val; - bool this_cpu_safe = false; - int conduit; - - WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); - - if (cpu_mitigations_off()) - ssbd_state = ARM64_SSBD_FORCE_DISABLE; - - /* delay setting __ssb_safe until we get a firmware response */ - if (is_midr_in_range_list(read_cpuid_id(), entry->midr_range_list)) - this_cpu_safe = true; - - if (this_cpu_has_cap(ARM64_SSBS)) { - if (!this_cpu_safe) - __ssb_safe = false; - required = false; - goto out_printmsg; - } - - conduit = arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, - ARM_SMCCC_ARCH_WORKAROUND_2, &res); - - if (conduit == SMCCC_CONDUIT_NONE) { - ssbd_state = ARM64_SSBD_UNKNOWN; - if (!this_cpu_safe) - __ssb_safe = false; - return false; - } - - val = (s32)res.a0; - - switch (val) { - case SMCCC_RET_NOT_SUPPORTED: - ssbd_state = ARM64_SSBD_UNKNOWN; - if (!this_cpu_safe) - __ssb_safe = false; - return false; - - /* machines with mixed mitigation requirements must not return this */ - case SMCCC_RET_NOT_REQUIRED: - pr_info_once("%s mitigation not required\n", entry->desc); - ssbd_state = ARM64_SSBD_MITIGATED; - return false; - - case SMCCC_RET_SUCCESS: - __ssb_safe = false; - required = true; - break; - - case 1: /* Mitigation not required on this CPU */ - required = false; - break; - - default: - WARN_ON(1); - if (!this_cpu_safe) - __ssb_safe = false; - return false; - } - - switch (ssbd_state) { - case ARM64_SSBD_FORCE_DISABLE: - arm64_set_ssbd_mitigation(false); - required = false; - break; - - case ARM64_SSBD_KERNEL: - if (required) { - __this_cpu_write(arm64_ssbd_callback_required, 1); - arm64_set_ssbd_mitigation(true); - } - break; - - case ARM64_SSBD_FORCE_ENABLE: - arm64_set_ssbd_mitigation(true); - required = true; - break; - - default: - WARN_ON(1); - break; - } - -out_printmsg: - switch (ssbd_state) { - case ARM64_SSBD_FORCE_DISABLE: - pr_info_once("%s disabled from command-line\n", entry->desc); - break; - - case ARM64_SSBD_FORCE_ENABLE: - pr_info_once("%s forced from command-line\n", entry->desc); - break; - } - - return required; -} - -/* known invulnerable cores */ -static const struct midr_range arm64_ssb_cpus[] = { - MIDR_ALL_VERSIONS(MIDR_CORTEX_A35), - MIDR_ALL_VERSIONS(MIDR_CORTEX_A53), - MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), - MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53), - MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_3XX_SILVER), - MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_SILVER), - {}, -}; - #ifdef CONFIG_ARM64_ERRATUM_1463225 DEFINE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa); @@ -519,83 +160,6 @@ cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused) .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, \ CAP_MIDR_RANGE_LIST(midr_list) -/* Track overall mitigation state. We are only mitigated if all cores are ok */ -static bool __hardenbp_enab = true; -static bool __spectrev2_safe = true; - -int get_spectre_v2_workaround_state(void) -{ - if (__spectrev2_safe) - return ARM64_BP_HARDEN_NOT_REQUIRED; - - if (!__hardenbp_enab) - return ARM64_BP_HARDEN_UNKNOWN; - - return ARM64_BP_HARDEN_WA_NEEDED; -} - -/* - * List of CPUs that do not need any Spectre-v2 mitigation at all. - */ -static const struct midr_range spectre_v2_safe_list[] = { - MIDR_ALL_VERSIONS(MIDR_CORTEX_A35), - MIDR_ALL_VERSIONS(MIDR_CORTEX_A53), - MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), - MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53), - MIDR_ALL_VERSIONS(MIDR_HISI_TSV110), - MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_3XX_SILVER), - MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_SILVER), - { /* sentinel */ } -}; - -/* - * Track overall bp hardening for all heterogeneous cores in the machine. - * We are only considered "safe" if all booted cores are known safe. - */ -static bool __maybe_unused -check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope) -{ - int need_wa; - - WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); - - /* If the CPU has CSV2 set, we're safe */ - if (cpuid_feature_extract_unsigned_field(read_cpuid(ID_AA64PFR0_EL1), - ID_AA64PFR0_CSV2_SHIFT)) - return false; - - /* Alternatively, we have a list of unaffected CPUs */ - if (is_midr_in_range_list(read_cpuid_id(), spectre_v2_safe_list)) - return false; - - /* Fallback to firmware detection */ - need_wa = detect_harden_bp_fw(); - if (!need_wa) - return false; - - __spectrev2_safe = false; - - if (!IS_ENABLED(CONFIG_HARDEN_BRANCH_PREDICTOR)) { - pr_warn_once("spectrev2 mitigation disabled by kernel configuration\n"); - __hardenbp_enab = false; - return false; - } - - /* forced off */ - if (__nospectre_v2 || cpu_mitigations_off()) { - pr_info_once("spectrev2 mitigation disabled by command line option\n"); - __hardenbp_enab = false; - return false; - } - - if (need_wa < 0) { - pr_warn_once("ARM_SMCCC_ARCH_WORKAROUND_1 missing from firmware\n"); - __hardenbp_enab = false; - } - - return (need_wa > 0); -} - static const __maybe_unused struct midr_range tx2_family_cpus[] = { MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), @@ -887,9 +451,11 @@ const struct arm64_cpu_capabilities arm64_errata[] = { }, #endif { - .capability = ARM64_HARDEN_BRANCH_PREDICTOR, + .desc = "Spectre-v2", + .capability = ARM64_SPECTRE_V2, .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, - .matches = check_branch_predictor, + .matches = has_spectre_v2, + .cpu_enable = spectre_v2_enable_mitigation, }, #ifdef CONFIG_RANDOMIZE_BASE { @@ -899,17 +465,23 @@ const struct arm64_cpu_capabilities arm64_errata[] = { }, #endif { - .desc = "Speculative Store Bypass Disable", - .capability = ARM64_SSBD, + .desc = "Spectre-v4", + .capability = ARM64_SPECTRE_V4, .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, - .matches = has_ssbd_mitigation, - .midr_range_list = arm64_ssb_cpus, + .matches = has_spectre_v4, + .cpu_enable = spectre_v4_enable_mitigation, }, #ifdef CONFIG_ARM64_ERRATUM_1418040 { .desc = "ARM erratum 1418040", .capability = ARM64_WORKAROUND_1418040, ERRATA_MIDR_RANGE_LIST(erratum_1418040_list), + /* + * We need to allow affected CPUs to come in late, but + * also need the non-affected CPUs to be able to come + * in at any point in time. Wonderful. + */ + .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, }, #endif #ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_AT @@ -954,40 +526,3 @@ const struct arm64_cpu_capabilities arm64_errata[] = { { } }; - -ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, - char *buf) -{ - return sprintf(buf, "Mitigation: __user pointer sanitization\n"); -} - -ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, - char *buf) -{ - switch (get_spectre_v2_workaround_state()) { - case ARM64_BP_HARDEN_NOT_REQUIRED: - return sprintf(buf, "Not affected\n"); - case ARM64_BP_HARDEN_WA_NEEDED: - return sprintf(buf, "Mitigation: Branch predictor hardening\n"); - case ARM64_BP_HARDEN_UNKNOWN: - default: - return sprintf(buf, "Vulnerable\n"); - } -} - -ssize_t cpu_show_spec_store_bypass(struct device *dev, - struct device_attribute *attr, char *buf) -{ - if (__ssb_safe) - return sprintf(buf, "Not affected\n"); - - switch (ssbd_state) { - case ARM64_SSBD_KERNEL: - case ARM64_SSBD_FORCE_ENABLE: - if (IS_ENABLED(CONFIG_ARM64_SSBD)) - return sprintf(buf, - "Mitigation: Speculative Store Bypass disabled via prctl\n"); - } - - return sprintf(buf, "Vulnerable\n"); -} diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index a389b999482e..dcc165b3fc04 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -75,6 +75,7 @@ #include <asm/cpu_ops.h> #include <asm/fpsimd.h> #include <asm/mmu_context.h> +#include <asm/mte.h> #include <asm/processor.h> #include <asm/sysreg.h> #include <asm/traps.h> @@ -197,9 +198,9 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_FCMA_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_JSCVT_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), - FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_API_SHIFT, 4, 0), + FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_API_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), - FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_APA_SHIFT, 4, 0), + FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_APA_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_DPB_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -227,7 +228,9 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_MPAMFRAC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_RASFRAC_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SSBS_SHIFT, 4, ID_AA64PFR1_SSBS_PSTATE_NI), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_MTE), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_MTE_SHIFT, 4, ID_AA64PFR1_MTE_NI), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SSBS_SHIFT, 4, ID_AA64PFR1_SSBS_PSTATE_NI), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_BTI), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_BT_SHIFT, 4, 0), ARM64_FTR_END, @@ -487,7 +490,7 @@ static const struct arm64_ftr_bits ftr_id_pfr1[] = { }; static const struct arm64_ftr_bits ftr_id_pfr2[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR2_SSBS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_PFR2_SSBS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_PFR2_CSV3_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -686,7 +689,7 @@ static s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new, case FTR_HIGHER_OR_ZERO_SAFE: if (!cur || !new) break; - /* Fallthrough */ + fallthrough; case FTR_HIGHER_SAFE: ret = new > cur ? new : cur; break; @@ -1111,6 +1114,7 @@ u64 read_sanitised_ftr_reg(u32 id) return 0; return regp->sys_val; } +EXPORT_SYMBOL_GPL(read_sanitised_ftr_reg); #define read_sysreg_case(r) \ case r: return read_sysreg_s(r) @@ -1443,6 +1447,7 @@ static inline void __cpu_enable_hw_dbm(void) write_sysreg(tcr, tcr_el1); isb(); + local_flush_tlb_all(); } static bool cpu_has_broken_dbm(void) @@ -1583,48 +1588,6 @@ static void cpu_has_fwb(const struct arm64_cpu_capabilities *__unused) WARN_ON(val & (7 << 27 | 7 << 21)); } -#ifdef CONFIG_ARM64_SSBD -static int ssbs_emulation_handler(struct pt_regs *regs, u32 instr) -{ - if (user_mode(regs)) - return 1; - - if (instr & BIT(PSTATE_Imm_shift)) - regs->pstate |= PSR_SSBS_BIT; - else - regs->pstate &= ~PSR_SSBS_BIT; - - arm64_skip_faulting_instruction(regs, 4); - return 0; -} - -static struct undef_hook ssbs_emulation_hook = { - .instr_mask = ~(1U << PSTATE_Imm_shift), - .instr_val = 0xd500401f | PSTATE_SSBS, - .fn = ssbs_emulation_handler, -}; - -static void cpu_enable_ssbs(const struct arm64_cpu_capabilities *__unused) -{ - static bool undef_hook_registered = false; - static DEFINE_RAW_SPINLOCK(hook_lock); - - raw_spin_lock(&hook_lock); - if (!undef_hook_registered) { - register_undef_hook(&ssbs_emulation_hook); - undef_hook_registered = true; - } - raw_spin_unlock(&hook_lock); - - if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) { - sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_DSSBS); - arm64_set_ssbd_mitigation(false); - } else { - arm64_set_ssbd_mitigation(true); - } -} -#endif /* CONFIG_ARM64_SSBD */ - #ifdef CONFIG_ARM64_PAN static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused) { @@ -1648,11 +1611,37 @@ static void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused) #endif /* CONFIG_ARM64_RAS_EXTN */ #ifdef CONFIG_ARM64_PTR_AUTH -static bool has_address_auth(const struct arm64_cpu_capabilities *entry, - int __unused) +static bool has_address_auth_cpucap(const struct arm64_cpu_capabilities *entry, int scope) { - return __system_matches_cap(ARM64_HAS_ADDRESS_AUTH_ARCH) || - __system_matches_cap(ARM64_HAS_ADDRESS_AUTH_IMP_DEF); + int boot_val, sec_val; + + /* We don't expect to be called with SCOPE_SYSTEM */ + WARN_ON(scope == SCOPE_SYSTEM); + /* + * The ptr-auth feature levels are not intercompatible with lower + * levels. Hence we must match ptr-auth feature level of the secondary + * CPUs with that of the boot CPU. The level of boot cpu is fetched + * from the sanitised register whereas direct register read is done for + * the secondary CPUs. + * The sanitised feature state is guaranteed to match that of the + * boot CPU as a mismatched secondary CPU is parked before it gets + * a chance to update the state, with the capability. + */ + boot_val = cpuid_feature_extract_field(read_sanitised_ftr_reg(entry->sys_reg), + entry->field_pos, entry->sign); + if (scope & SCOPE_BOOT_CPU) + return boot_val >= entry->min_field_value; + /* Now check for the secondary CPUs with SCOPE_LOCAL_CPU scope */ + sec_val = cpuid_feature_extract_field(__read_sysreg_by_encoding(entry->sys_reg), + entry->field_pos, entry->sign); + return sec_val == boot_val; +} + +static bool has_address_auth_metacap(const struct arm64_cpu_capabilities *entry, + int scope) +{ + return has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH], scope) || + has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_IMP_DEF], scope); } static bool has_generic_auth(const struct arm64_cpu_capabilities *entry, @@ -1702,6 +1691,22 @@ static void bti_enable(const struct arm64_cpu_capabilities *__unused) } #endif /* CONFIG_ARM64_BTI */ +#ifdef CONFIG_ARM64_MTE +static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap) +{ + static bool cleared_zero_page = false; + + /* + * Clear the tags in the zero page. This needs to be done via the + * linear map which has the Tagged attribute. + */ + if (!cleared_zero_page) { + cleared_zero_page = true; + mte_clear_page_tags(lm_alias(empty_zero_page)); + } +} +#endif /* CONFIG_ARM64_MTE */ + /* Internal helper functions to match cpu capability type */ static bool cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap) @@ -1976,19 +1981,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .field_pos = ID_AA64ISAR0_CRC32_SHIFT, .min_field_value = 1, }, -#ifdef CONFIG_ARM64_SSBD { .desc = "Speculative Store Bypassing Safe (SSBS)", .capability = ARM64_SSBS, - .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64PFR1_EL1, .field_pos = ID_AA64PFR1_SSBS_SHIFT, .sign = FTR_UNSIGNED, .min_field_value = ID_AA64PFR1_SSBS_PSTATE_ONLY, - .cpu_enable = cpu_enable_ssbs, }, -#endif #ifdef CONFIG_ARM64_CNP { .desc = "Common not Private translations", @@ -2021,7 +2023,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sign = FTR_UNSIGNED, .field_pos = ID_AA64ISAR1_APA_SHIFT, .min_field_value = ID_AA64ISAR1_APA_ARCHITECTED, - .matches = has_cpuid_feature, + .matches = has_address_auth_cpucap, }, { .desc = "Address authentication (IMP DEF algorithm)", @@ -2031,12 +2033,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sign = FTR_UNSIGNED, .field_pos = ID_AA64ISAR1_API_SHIFT, .min_field_value = ID_AA64ISAR1_API_IMP_DEF, - .matches = has_cpuid_feature, + .matches = has_address_auth_cpucap, }, { .capability = ARM64_HAS_ADDRESS_AUTH, .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, - .matches = has_address_auth, + .matches = has_address_auth_metacap, }, { .desc = "Generic authentication (architected algorithm)", @@ -2121,6 +2123,19 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sign = FTR_UNSIGNED, }, #endif +#ifdef CONFIG_ARM64_MTE + { + .desc = "Memory Tagging Extension", + .capability = ARM64_MTE, + .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64PFR1_EL1, + .field_pos = ID_AA64PFR1_MTE_SHIFT, + .min_field_value = ID_AA64PFR1_MTE, + .sign = FTR_UNSIGNED, + .cpu_enable = cpu_enable_mte, + }, +#endif /* CONFIG_ARM64_MTE */ {}, }; @@ -2237,6 +2252,9 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_MULTI_CAP(ptr_auth_hwcap_addr_matches, CAP_HWCAP, KERNEL_HWCAP_PACA), HWCAP_MULTI_CAP(ptr_auth_hwcap_gen_matches, CAP_HWCAP, KERNEL_HWCAP_PACG), #endif +#ifdef CONFIG_ARM64_MTE + HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_MTE, CAP_HWCAP, KERNEL_HWCAP_MTE), +#endif /* CONFIG_ARM64_MTE */ {}, }; diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 393c6fb1f1cb..6a7bb3729d60 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -43,94 +43,93 @@ static const char *icache_policy_str[] = { unsigned long __icache_flags; static const char *const hwcap_str[] = { - "fp", - "asimd", - "evtstrm", - "aes", - "pmull", - "sha1", - "sha2", - "crc32", - "atomics", - "fphp", - "asimdhp", - "cpuid", - "asimdrdm", - "jscvt", - "fcma", - "lrcpc", - "dcpop", - "sha3", - "sm3", - "sm4", - "asimddp", - "sha512", - "sve", - "asimdfhm", - "dit", - "uscat", - "ilrcpc", - "flagm", - "ssbs", - "sb", - "paca", - "pacg", - "dcpodp", - "sve2", - "sveaes", - "svepmull", - "svebitperm", - "svesha3", - "svesm4", - "flagm2", - "frint", - "svei8mm", - "svef32mm", - "svef64mm", - "svebf16", - "i8mm", - "bf16", - "dgh", - "rng", - "bti", - /* reserved for "mte" */ - NULL + [KERNEL_HWCAP_FP] = "fp", + [KERNEL_HWCAP_ASIMD] = "asimd", + [KERNEL_HWCAP_EVTSTRM] = "evtstrm", + [KERNEL_HWCAP_AES] = "aes", + [KERNEL_HWCAP_PMULL] = "pmull", + [KERNEL_HWCAP_SHA1] = "sha1", + [KERNEL_HWCAP_SHA2] = "sha2", + [KERNEL_HWCAP_CRC32] = "crc32", + [KERNEL_HWCAP_ATOMICS] = "atomics", + [KERNEL_HWCAP_FPHP] = "fphp", + [KERNEL_HWCAP_ASIMDHP] = "asimdhp", + [KERNEL_HWCAP_CPUID] = "cpuid", + [KERNEL_HWCAP_ASIMDRDM] = "asimdrdm", + [KERNEL_HWCAP_JSCVT] = "jscvt", + [KERNEL_HWCAP_FCMA] = "fcma", + [KERNEL_HWCAP_LRCPC] = "lrcpc", + [KERNEL_HWCAP_DCPOP] = "dcpop", + [KERNEL_HWCAP_SHA3] = "sha3", + [KERNEL_HWCAP_SM3] = "sm3", + [KERNEL_HWCAP_SM4] = "sm4", + [KERNEL_HWCAP_ASIMDDP] = "asimddp", + [KERNEL_HWCAP_SHA512] = "sha512", + [KERNEL_HWCAP_SVE] = "sve", + [KERNEL_HWCAP_ASIMDFHM] = "asimdfhm", + [KERNEL_HWCAP_DIT] = "dit", + [KERNEL_HWCAP_USCAT] = "uscat", + [KERNEL_HWCAP_ILRCPC] = "ilrcpc", + [KERNEL_HWCAP_FLAGM] = "flagm", + [KERNEL_HWCAP_SSBS] = "ssbs", + [KERNEL_HWCAP_SB] = "sb", + [KERNEL_HWCAP_PACA] = "paca", + [KERNEL_HWCAP_PACG] = "pacg", + [KERNEL_HWCAP_DCPODP] = "dcpodp", + [KERNEL_HWCAP_SVE2] = "sve2", + [KERNEL_HWCAP_SVEAES] = "sveaes", + [KERNEL_HWCAP_SVEPMULL] = "svepmull", + [KERNEL_HWCAP_SVEBITPERM] = "svebitperm", + [KERNEL_HWCAP_SVESHA3] = "svesha3", + [KERNEL_HWCAP_SVESM4] = "svesm4", + [KERNEL_HWCAP_FLAGM2] = "flagm2", + [KERNEL_HWCAP_FRINT] = "frint", + [KERNEL_HWCAP_SVEI8MM] = "svei8mm", + [KERNEL_HWCAP_SVEF32MM] = "svef32mm", + [KERNEL_HWCAP_SVEF64MM] = "svef64mm", + [KERNEL_HWCAP_SVEBF16] = "svebf16", + [KERNEL_HWCAP_I8MM] = "i8mm", + [KERNEL_HWCAP_BF16] = "bf16", + [KERNEL_HWCAP_DGH] = "dgh", + [KERNEL_HWCAP_RNG] = "rng", + [KERNEL_HWCAP_BTI] = "bti", + [KERNEL_HWCAP_MTE] = "mte", }; #ifdef CONFIG_COMPAT +#define COMPAT_KERNEL_HWCAP(x) const_ilog2(COMPAT_HWCAP_ ## x) static const char *const compat_hwcap_str[] = { - "swp", - "half", - "thumb", - "26bit", - "fastmult", - "fpa", - "vfp", - "edsp", - "java", - "iwmmxt", - "crunch", - "thumbee", - "neon", - "vfpv3", - "vfpv3d16", - "tls", - "vfpv4", - "idiva", - "idivt", - "vfpd32", - "lpae", - "evtstrm", - NULL + [COMPAT_KERNEL_HWCAP(SWP)] = "swp", + [COMPAT_KERNEL_HWCAP(HALF)] = "half", + [COMPAT_KERNEL_HWCAP(THUMB)] = "thumb", + [COMPAT_KERNEL_HWCAP(26BIT)] = NULL, /* Not possible on arm64 */ + [COMPAT_KERNEL_HWCAP(FAST_MULT)] = "fastmult", + [COMPAT_KERNEL_HWCAP(FPA)] = NULL, /* Not possible on arm64 */ + [COMPAT_KERNEL_HWCAP(VFP)] = "vfp", + [COMPAT_KERNEL_HWCAP(EDSP)] = "edsp", + [COMPAT_KERNEL_HWCAP(JAVA)] = NULL, /* Not possible on arm64 */ + [COMPAT_KERNEL_HWCAP(IWMMXT)] = NULL, /* Not possible on arm64 */ + [COMPAT_KERNEL_HWCAP(CRUNCH)] = NULL, /* Not possible on arm64 */ + [COMPAT_KERNEL_HWCAP(THUMBEE)] = NULL, /* Not possible on arm64 */ + [COMPAT_KERNEL_HWCAP(NEON)] = "neon", + [COMPAT_KERNEL_HWCAP(VFPv3)] = "vfpv3", + [COMPAT_KERNEL_HWCAP(VFPV3D16)] = NULL, /* Not possible on arm64 */ + [COMPAT_KERNEL_HWCAP(TLS)] = "tls", + [COMPAT_KERNEL_HWCAP(VFPv4)] = "vfpv4", + [COMPAT_KERNEL_HWCAP(IDIVA)] = "idiva", + [COMPAT_KERNEL_HWCAP(IDIVT)] = "idivt", + [COMPAT_KERNEL_HWCAP(VFPD32)] = NULL, /* Not possible on arm64 */ + [COMPAT_KERNEL_HWCAP(LPAE)] = "lpae", + [COMPAT_KERNEL_HWCAP(EVTSTRM)] = "evtstrm", }; +#define COMPAT_KERNEL_HWCAP2(x) const_ilog2(COMPAT_HWCAP2_ ## x) static const char *const compat_hwcap2_str[] = { - "aes", - "pmull", - "sha1", - "sha2", - "crc32", - NULL + [COMPAT_KERNEL_HWCAP2(AES)] = "aes", + [COMPAT_KERNEL_HWCAP2(PMULL)] = "pmull", + [COMPAT_KERNEL_HWCAP2(SHA1)] = "sha1", + [COMPAT_KERNEL_HWCAP2(SHA2)] = "sha2", + [COMPAT_KERNEL_HWCAP2(CRC32)] = "crc32", }; #endif /* CONFIG_COMPAT */ @@ -166,16 +165,25 @@ static int c_show(struct seq_file *m, void *v) seq_puts(m, "Features\t:"); if (compat) { #ifdef CONFIG_COMPAT - for (j = 0; compat_hwcap_str[j]; j++) - if (compat_elf_hwcap & (1 << j)) + for (j = 0; j < ARRAY_SIZE(compat_hwcap_str); j++) { + if (compat_elf_hwcap & (1 << j)) { + /* + * Warn once if any feature should not + * have been present on arm64 platform. + */ + if (WARN_ON_ONCE(!compat_hwcap_str[j])) + continue; + seq_printf(m, " %s", compat_hwcap_str[j]); + } + } - for (j = 0; compat_hwcap2_str[j]; j++) + for (j = 0; j < ARRAY_SIZE(compat_hwcap2_str); j++) if (compat_elf_hwcap2 & (1 << j)) seq_printf(m, " %s", compat_hwcap2_str[j]); #endif /* CONFIG_COMPAT */ } else { - for (j = 0; hwcap_str[j]; j++) + for (j = 0; j < ARRAY_SIZE(hwcap_str); j++) if (cpu_have_feature(j)) seq_printf(m, " %s", hwcap_str[j]); } @@ -327,7 +335,6 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info) set_bit(ICACHEF_VPIPT, &__icache_flags); break; default: - /* Fallthrough */ case ICACHE_POLICY_VIPT: /* Assume aliasing */ set_bit(ICACHEF_ALIASING, &__icache_flags); diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 7310a4f7f993..fa76151de6ff 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -384,7 +384,7 @@ void __init debug_traps_init(void) hook_debug_fault_code(DBG_ESR_EVT_HWSS, single_step_handler, SIGTRAP, TRAP_TRACE, "single-step handler"); hook_debug_fault_code(DBG_ESR_EVT_BRK, brk_handler, SIGTRAP, - TRAP_BRKPT, "ptrace BRK handler"); + TRAP_BRKPT, "BRK handler"); } /* Re-enable single step for syscall restarting. */ diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index d3be9dbf5490..43d4c329775f 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -66,6 +66,13 @@ static void notrace el1_dbg(struct pt_regs *regs, unsigned long esr) } NOKPROBE_SYMBOL(el1_dbg); +static void notrace el1_fpac(struct pt_regs *regs, unsigned long esr) +{ + local_daif_inherit(regs); + do_ptrauth_fault(regs, esr); +} +NOKPROBE_SYMBOL(el1_fpac); + asmlinkage void notrace el1_sync_handler(struct pt_regs *regs) { unsigned long esr = read_sysreg(esr_el1); @@ -92,6 +99,9 @@ asmlinkage void notrace el1_sync_handler(struct pt_regs *regs) case ESR_ELx_EC_BRK64: el1_dbg(regs, esr); break; + case ESR_ELx_EC_FPAC: + el1_fpac(regs, esr); + break; default: el1_inv(regs, esr); } @@ -227,6 +237,14 @@ static void notrace el0_svc(struct pt_regs *regs) } NOKPROBE_SYMBOL(el0_svc); +static void notrace el0_fpac(struct pt_regs *regs, unsigned long esr) +{ + user_exit_irqoff(); + local_daif_restore(DAIF_PROCCTX); + do_ptrauth_fault(regs, esr); +} +NOKPROBE_SYMBOL(el0_fpac); + asmlinkage void notrace el0_sync_handler(struct pt_regs *regs) { unsigned long esr = read_sysreg(esr_el1); @@ -272,6 +290,9 @@ asmlinkage void notrace el0_sync_handler(struct pt_regs *regs) case ESR_ELx_EC_BRK64: el0_dbg(regs, esr); break; + case ESR_ELx_EC_FPAC: + el0_fpac(regs, esr); + break; default: el0_inv(regs, esr); } diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index f880dd63ddc3..2ca395c25448 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -32,6 +32,7 @@ SYM_FUNC_START(fpsimd_load_state) SYM_FUNC_END(fpsimd_load_state) #ifdef CONFIG_ARM64_SVE + SYM_FUNC_START(sve_save_state) sve_save 0, x1, 2 ret @@ -46,4 +47,28 @@ SYM_FUNC_START(sve_get_vl) _sve_rdvl 0, 1 ret SYM_FUNC_END(sve_get_vl) + +/* + * Load SVE state from FPSIMD state. + * + * x0 = pointer to struct fpsimd_state + * x1 = VQ - 1 + * + * Each SVE vector will be loaded with the first 128-bits taken from FPSIMD + * and the rest zeroed. All the other SVE registers will be zeroed. + */ +SYM_FUNC_START(sve_load_from_fpsimd_state) + sve_load_vq x1, x2, x3 + fpsimd_restore x0, 8 + _for n, 0, 15, _sve_pfalse \n + _sve_wrffr 0 + ret +SYM_FUNC_END(sve_load_from_fpsimd_state) + +/* Zero all SVE registers but the first 128-bits of each vector */ +SYM_FUNC_START(sve_flush_live) + sve_flush + ret +SYM_FUNC_END(sve_flush_live) + #endif /* CONFIG_ARM64_SVE */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 2646178c8329..f30007dff35f 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -132,9 +132,8 @@ alternative_else_nop_endif * them if required. */ .macro apply_ssbd, state, tmp1, tmp2 -#ifdef CONFIG_ARM64_SSBD -alternative_cb arm64_enable_wa2_handling - b .L__asm_ssbd_skip\@ +alternative_cb spectre_v4_patch_fw_mitigation_enable + b .L__asm_ssbd_skip\@ // Patched to NOP alternative_cb_end ldr_this_cpu \tmp2, arm64_ssbd_callback_required, \tmp1 cbz \tmp2, .L__asm_ssbd_skip\@ @@ -142,10 +141,35 @@ alternative_cb_end tbnz \tmp2, #TIF_SSBD, .L__asm_ssbd_skip\@ mov w0, #ARM_SMCCC_ARCH_WORKAROUND_2 mov w1, #\state -alternative_cb arm64_update_smccc_conduit +alternative_cb spectre_v4_patch_fw_mitigation_conduit nop // Patched to SMC/HVC #0 alternative_cb_end .L__asm_ssbd_skip\@: + .endm + + /* Check for MTE asynchronous tag check faults */ + .macro check_mte_async_tcf, flgs, tmp +#ifdef CONFIG_ARM64_MTE +alternative_if_not ARM64_MTE + b 1f +alternative_else_nop_endif + mrs_s \tmp, SYS_TFSRE0_EL1 + tbz \tmp, #SYS_TFSR_EL1_TF0_SHIFT, 1f + /* Asynchronous TCF occurred for TTBR0 access, set the TI flag */ + orr \flgs, \flgs, #_TIF_MTE_ASYNC_FAULT + str \flgs, [tsk, #TSK_TI_FLAGS] + msr_s SYS_TFSRE0_EL1, xzr +1: +#endif + .endm + + /* Clear the MTE asynchronous tag check faults */ + .macro clear_mte_async_tcf +#ifdef CONFIG_ARM64_MTE +alternative_if ARM64_MTE + dsb ish + msr_s SYS_TFSRE0_EL1, xzr +alternative_else_nop_endif #endif .endm @@ -170,19 +194,6 @@ alternative_cb_end stp x28, x29, [sp, #16 * 14] .if \el == 0 - .if \regsize == 32 - /* - * If we're returning from a 32-bit task on a system affected by - * 1418040 then re-enable userspace access to the virtual counter. - */ -#ifdef CONFIG_ARM64_ERRATUM_1418040 -alternative_if ARM64_WORKAROUND_1418040 - mrs x0, cntkctl_el1 - orr x0, x0, #2 // ARCH_TIMER_USR_VCT_ACCESS_EN - msr cntkctl_el1, x0 -alternative_else_nop_endif -#endif - .endif clear_gp_regs mrs x21, sp_el0 ldr_this_cpu tsk, __entry_task, x20 @@ -195,6 +206,8 @@ alternative_else_nop_endif ldr x19, [tsk, #TSK_TI_FLAGS] disable_step_tsk x19, x20 + /* Check for asynchronous tag check faults in user space */ + check_mte_async_tcf x19, x22 apply_ssbd 1, x22, x23 ptrauth_keys_install_kernel tsk, x20, x22, x23 @@ -246,6 +259,13 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKING str x20, [sp, #S_PMR_SAVE] alternative_else_nop_endif + /* Re-enable tag checking (TCO set on exception entry) */ +#ifdef CONFIG_ARM64_MTE +alternative_if ARM64_MTE + SET_PSTATE_TCO(0) +alternative_else_nop_endif +#endif + /* * Registers that may be useful after this macro is invoked: * @@ -294,14 +314,6 @@ alternative_else_nop_endif tst x22, #PSR_MODE32_BIT // native task? b.eq 3f -#ifdef CONFIG_ARM64_ERRATUM_1418040 -alternative_if ARM64_WORKAROUND_1418040 - mrs x0, cntkctl_el1 - bic x0, x0, #2 // ARCH_TIMER_USR_VCT_ACCESS_EN - msr cntkctl_el1, x0 -alternative_else_nop_endif -#endif - #ifdef CONFIG_ARM64_ERRATUM_845719 alternative_if ARM64_WORKAROUND_845719 #ifdef CONFIG_PID_IN_CONTEXTIDR @@ -718,11 +730,9 @@ el0_irq_naked: bl trace_hardirqs_off #endif -#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR tbz x22, #55, 1f bl do_el0_irq_bp_hardening 1: -#endif irq_handler #ifdef CONFIG_TRACE_IRQFLAGS @@ -765,6 +775,8 @@ SYM_CODE_START_LOCAL(ret_to_user) and x2, x1, #_TIF_WORK_MASK cbnz x2, work_pending finish_ret_to_user: + /* Ignore asynchronous tag check faults in the uaccess routines */ + clear_mte_async_tcf enable_step_tsk x1, x2 #ifdef CONFIG_GCC_PLUGIN_STACKLEAK bl stackleak_erase diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 55c8f3ec6705..a6d688c10745 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -32,9 +32,11 @@ #include <linux/swab.h> #include <asm/esr.h> +#include <asm/exception.h> #include <asm/fpsimd.h> #include <asm/cpufeature.h> #include <asm/cputype.h> +#include <asm/neon.h> #include <asm/processor.h> #include <asm/simd.h> #include <asm/sigcontext.h> @@ -312,7 +314,7 @@ static void fpsimd_save(void) * re-enter user with corrupt state. * There's no way to recover, so kill it: */ - force_signal_inject(SIGKILL, SI_KERNEL, 0); + force_signal_inject(SIGKILL, SI_KERNEL, 0, 0); return; } @@ -928,7 +930,7 @@ void fpsimd_release_task(struct task_struct *dead_task) * the SVE access trap will be disabled the next time this task * reaches ret_to_user. * - * TIF_SVE should be clear on entry: otherwise, task_fpsimd_load() + * TIF_SVE should be clear on entry: otherwise, fpsimd_restore_current_state() * would have disabled the SVE access trap for userspace during * ret_to_user, making an SVE access trap impossible in that case. */ @@ -936,7 +938,7 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs) { /* Even if we chose not to use SVE, the hardware could still trap: */ if (unlikely(!system_supports_sve()) || WARN_ON(is_compat_task())) { - force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); + force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0); return; } diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 037421c66b14..d8d9caf02834 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -36,14 +36,10 @@ #include "efi-header.S" -#define __PHYS_OFFSET (KERNEL_START - TEXT_OFFSET) +#define __PHYS_OFFSET KERNEL_START -#if (TEXT_OFFSET & 0xfff) != 0 -#error TEXT_OFFSET must be at least 4KB aligned -#elif (PAGE_OFFSET & 0x1fffff) != 0 +#if (PAGE_OFFSET & 0x1fffff) != 0 #error PAGE_OFFSET must be at least 2MB aligned -#elif TEXT_OFFSET > 0x1fffff -#error TEXT_OFFSET must be less than 2MB #endif /* @@ -55,7 +51,7 @@ * x0 = physical address to the FDT blob. * * This code is mostly position independent so you call this at - * __pa(PAGE_OFFSET + TEXT_OFFSET). + * __pa(PAGE_OFFSET). * * Note that the callee-saved registers are used for storing variables * that are useful before the MMU is enabled. The allocations are described @@ -77,7 +73,7 @@ _head: b primary_entry // branch to kernel start, magic .long 0 // reserved #endif - le64sym _kernel_offset_le // Image load offset from start of RAM, little-endian + .quad 0 // Image load offset from start of RAM, little-endian le64sym _kernel_size_le // Effective size of kernel image, little-endian le64sym _kernel_flags_le // Informative flags, little-endian .quad 0 // reserved @@ -382,7 +378,7 @@ SYM_FUNC_START_LOCAL(__create_page_tables) * Map the kernel image (starting with PHYS_OFFSET). */ adrp x0, init_pg_dir - mov_q x5, KIMAGE_VADDR + TEXT_OFFSET // compile time __va(_text) + mov_q x5, KIMAGE_VADDR // compile time __va(_text) add x5, x5, x23 // add KASLR displacement mov x4, PTRS_PER_PGD adrp x6, _end // runtime __pa(_end) @@ -474,7 +470,7 @@ SYM_FUNC_END(__primary_switched) .pushsection ".rodata", "a" SYM_DATA_START(kimage_vaddr) - .quad _text - TEXT_OFFSET + .quad _text SYM_DATA_END(kimage_vaddr) EXPORT_SYMBOL(kimage_vaddr) .popsection diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 68e14152d6e9..42003774d261 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -21,7 +21,6 @@ #include <linux/sched.h> #include <linux/suspend.h> #include <linux/utsname.h> -#include <linux/version.h> #include <asm/barrier.h> #include <asm/cacheflush.h> @@ -31,6 +30,7 @@ #include <asm/kexec.h> #include <asm/memory.h> #include <asm/mmu_context.h> +#include <asm/mte.h> #include <asm/pgalloc.h> #include <asm/pgtable-hwdef.h> #include <asm/sections.h> @@ -285,6 +285,117 @@ static int create_safe_exec_page(void *src_start, size_t length, #define dcache_clean_range(start, end) __flush_dcache_area(start, (end - start)) +#ifdef CONFIG_ARM64_MTE + +static DEFINE_XARRAY(mte_pages); + +static int save_tags(struct page *page, unsigned long pfn) +{ + void *tag_storage, *ret; + + tag_storage = mte_allocate_tag_storage(); + if (!tag_storage) + return -ENOMEM; + + mte_save_page_tags(page_address(page), tag_storage); + + ret = xa_store(&mte_pages, pfn, tag_storage, GFP_KERNEL); + if (WARN(xa_is_err(ret), "Failed to store MTE tags")) { + mte_free_tag_storage(tag_storage); + return xa_err(ret); + } else if (WARN(ret, "swsusp: %s: Duplicate entry", __func__)) { + mte_free_tag_storage(ret); + } + + return 0; +} + +static void swsusp_mte_free_storage(void) +{ + XA_STATE(xa_state, &mte_pages, 0); + void *tags; + + xa_lock(&mte_pages); + xas_for_each(&xa_state, tags, ULONG_MAX) { + mte_free_tag_storage(tags); + } + xa_unlock(&mte_pages); + + xa_destroy(&mte_pages); +} + +static int swsusp_mte_save_tags(void) +{ + struct zone *zone; + unsigned long pfn, max_zone_pfn; + int ret = 0; + int n = 0; + + if (!system_supports_mte()) + return 0; + + for_each_populated_zone(zone) { + max_zone_pfn = zone_end_pfn(zone); + for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) { + struct page *page = pfn_to_online_page(pfn); + + if (!page) + continue; + + if (!test_bit(PG_mte_tagged, &page->flags)) + continue; + + ret = save_tags(page, pfn); + if (ret) { + swsusp_mte_free_storage(); + goto out; + } + + n++; + } + } + pr_info("Saved %d MTE pages\n", n); + +out: + return ret; +} + +static void swsusp_mte_restore_tags(void) +{ + XA_STATE(xa_state, &mte_pages, 0); + int n = 0; + void *tags; + + xa_lock(&mte_pages); + xas_for_each(&xa_state, tags, ULONG_MAX) { + unsigned long pfn = xa_state.xa_index; + struct page *page = pfn_to_online_page(pfn); + + mte_restore_page_tags(page_address(page), tags); + + mte_free_tag_storage(tags); + n++; + } + xa_unlock(&mte_pages); + + pr_info("Restored %d MTE pages\n", n); + + xa_destroy(&mte_pages); +} + +#else /* CONFIG_ARM64_MTE */ + +static int swsusp_mte_save_tags(void) +{ + return 0; +} + +static void swsusp_mte_restore_tags(void) +{ +} + +#endif /* CONFIG_ARM64_MTE */ + int swsusp_arch_suspend(void) { int ret = 0; @@ -302,6 +413,10 @@ int swsusp_arch_suspend(void) /* make the crash dump kernel image visible/saveable */ crash_prepare_suspend(); + ret = swsusp_mte_save_tags(); + if (ret) + return ret; + sleep_cpu = smp_processor_id(); ret = swsusp_save(); } else { @@ -315,6 +430,8 @@ int swsusp_arch_suspend(void) dcache_clean_range(__hyp_text_start, __hyp_text_end); } + swsusp_mte_restore_tags(); + /* make the crash dump kernel image protected again */ crash_post_resume(); @@ -332,11 +449,7 @@ int swsusp_arch_suspend(void) * mitigation off behind our back, let's set the state * to what we expect it to be. */ - switch (arm64_get_ssbd_state()) { - case ARM64_SSBD_FORCE_ENABLE: - case ARM64_SSBD_KERNEL: - arm64_set_ssbd_mitigation(true); - } + spectre_v4_enable_mitigation(NULL); } local_daif_restore(flags); diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index af234a1e08b7..712e97c03e54 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -257,7 +257,7 @@ static int hw_breakpoint_control(struct perf_event *bp, * level. */ enable_debug_monitors(dbg_el); - /* Fall through */ + fallthrough; case HW_BREAKPOINT_RESTORE: /* Setup the address register. */ write_wb_reg(val_reg, i, info->address); @@ -541,13 +541,13 @@ int hw_breakpoint_arch_parse(struct perf_event *bp, if (hw->ctrl.len == ARM_BREAKPOINT_LEN_2) break; - /* Fallthrough */ + fallthrough; case 3: /* Allow single byte watchpoint. */ if (hw->ctrl.len == ARM_BREAKPOINT_LEN_1) break; - /* Fallthrough */ + fallthrough; default: return -EINVAL; } diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 9e897c500237..d0f3f35dd0d7 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -64,12 +64,10 @@ __efistub__ctype = _ctype; #define KVM_NVHE_ALIAS(sym) __kvm_nvhe_##sym = sym; /* Alternative callbacks for init-time patching of nVHE hyp code. */ -KVM_NVHE_ALIAS(arm64_enable_wa2_handling); KVM_NVHE_ALIAS(kvm_patch_vector_branch); KVM_NVHE_ALIAS(kvm_update_va_mask); /* Global kernel state accessed by nVHE hyp code. */ -KVM_NVHE_ALIAS(arm64_ssbd_callback_required); KVM_NVHE_ALIAS(kvm_host_data); KVM_NVHE_ALIAS(kvm_vgic_global_state); @@ -103,6 +101,10 @@ KVM_NVHE_ALIAS(vgic_v3_cpuif_trap); KVM_NVHE_ALIAS(gic_pmr_sync); #endif +/* EL2 exception handling */ +KVM_NVHE_ALIAS(__start___kvm_ex_table); +KVM_NVHE_ALIAS(__stop___kvm_ex_table); + #endif /* CONFIG_KVM */ #endif /* __ARM64_KERNEL_IMAGE_VARS_H */ diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h index c7d38c660372..7bc3ba897901 100644 --- a/arch/arm64/kernel/image.h +++ b/arch/arm64/kernel/image.h @@ -62,7 +62,6 @@ */ #define HEAD_SYMBOLS \ DEFINE_IMAGE_LE64(_kernel_size_le, _end - _text); \ - DEFINE_IMAGE_LE64(_kernel_offset_le, TEXT_OFFSET); \ DEFINE_IMAGE_LE64(_kernel_flags_le, __HEAD_FLAGS); #endif /* __ARM64_KERNEL_IMAGE_H */ diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index a107375005bc..6c0de2f60ea9 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -60,16 +60,10 @@ bool __kprobes aarch64_insn_is_steppable_hint(u32 insn) case AARCH64_INSN_HINT_XPACLRI: case AARCH64_INSN_HINT_PACIA_1716: case AARCH64_INSN_HINT_PACIB_1716: - case AARCH64_INSN_HINT_AUTIA_1716: - case AARCH64_INSN_HINT_AUTIB_1716: case AARCH64_INSN_HINT_PACIAZ: case AARCH64_INSN_HINT_PACIASP: case AARCH64_INSN_HINT_PACIBZ: case AARCH64_INSN_HINT_PACIBSP: - case AARCH64_INSN_HINT_AUTIAZ: - case AARCH64_INSN_HINT_AUTIASP: - case AARCH64_INSN_HINT_AUTIBZ: - case AARCH64_INSN_HINT_AUTIBSP: case AARCH64_INSN_HINT_BTI: case AARCH64_INSN_HINT_BTIC: case AARCH64_INSN_HINT_BTIJ: @@ -176,7 +170,7 @@ bool __kprobes aarch64_insn_uses_literal(u32 insn) bool __kprobes aarch64_insn_is_branch(u32 insn) { - /* b, bl, cb*, tb*, b.cond, br, blr */ + /* b, bl, cb*, tb*, ret*, b.cond, br*, blr* */ return aarch64_insn_is_b(insn) || aarch64_insn_is_bl(insn) || @@ -185,8 +179,11 @@ bool __kprobes aarch64_insn_is_branch(u32 insn) aarch64_insn_is_tbz(insn) || aarch64_insn_is_tbnz(insn) || aarch64_insn_is_ret(insn) || + aarch64_insn_is_ret_auth(insn) || aarch64_insn_is_br(insn) || + aarch64_insn_is_br_auth(insn) || aarch64_insn_is_blr(insn) || + aarch64_insn_is_blr_auth(insn) || aarch64_insn_is_bcond(insn); } diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c index 0ce3a28e3347..2e224435c024 100644 --- a/arch/arm64/kernel/module-plts.c +++ b/arch/arm64/kernel/module-plts.c @@ -305,8 +305,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, mod->arch.core.plt_shndx = i; else if (!strcmp(secstrings + sechdrs[i].sh_name, ".init.plt")) mod->arch.init.plt_shndx = i; - else if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE) && - !strcmp(secstrings + sechdrs[i].sh_name, + else if (!strcmp(secstrings + sechdrs[i].sh_name, ".text.ftrace_trampoline")) tramp = sechdrs + i; else if (sechdrs[i].sh_type == SHT_SYMTAB) diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 1cd1a4d0ed30..2a1ad95d9b2c 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -315,21 +315,21 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, /* MOVW instruction relocations. */ case R_AARCH64_MOVW_UABS_G0_NC: overflow_check = false; - /* Fall through */ + fallthrough; case R_AARCH64_MOVW_UABS_G0: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0, AARCH64_INSN_IMM_MOVKZ); break; case R_AARCH64_MOVW_UABS_G1_NC: overflow_check = false; - /* Fall through */ + fallthrough; case R_AARCH64_MOVW_UABS_G1: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16, AARCH64_INSN_IMM_MOVKZ); break; case R_AARCH64_MOVW_UABS_G2_NC: overflow_check = false; - /* Fall through */ + fallthrough; case R_AARCH64_MOVW_UABS_G2: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32, AARCH64_INSN_IMM_MOVKZ); @@ -397,7 +397,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, break; case R_AARCH64_ADR_PREL_PG_HI21_NC: overflow_check = false; - /* Fall through */ + fallthrough; case R_AARCH64_ADR_PREL_PG_HI21: ovf = reloc_insn_adrp(me, sechdrs, loc, val); if (ovf && ovf != -ERANGE) diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c new file mode 100644 index 000000000000..52a0638ed967 --- /dev/null +++ b/arch/arm64/kernel/mte.c @@ -0,0 +1,336 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 ARM Ltd. + */ + +#include <linux/bitops.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/prctl.h> +#include <linux/sched.h> +#include <linux/sched/mm.h> +#include <linux/string.h> +#include <linux/swap.h> +#include <linux/swapops.h> +#include <linux/thread_info.h> +#include <linux/uio.h> + +#include <asm/cpufeature.h> +#include <asm/mte.h> +#include <asm/ptrace.h> +#include <asm/sysreg.h> + +static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap) +{ + pte_t old_pte = READ_ONCE(*ptep); + + if (check_swap && is_swap_pte(old_pte)) { + swp_entry_t entry = pte_to_swp_entry(old_pte); + + if (!non_swap_entry(entry) && mte_restore_tags(entry, page)) + return; + } + + mte_clear_page_tags(page_address(page)); +} + +void mte_sync_tags(pte_t *ptep, pte_t pte) +{ + struct page *page = pte_page(pte); + long i, nr_pages = compound_nr(page); + bool check_swap = nr_pages == 1; + + /* if PG_mte_tagged is set, tags have already been initialised */ + for (i = 0; i < nr_pages; i++, page++) { + if (!test_and_set_bit(PG_mte_tagged, &page->flags)) + mte_sync_page_tags(page, ptep, check_swap); + } +} + +int memcmp_pages(struct page *page1, struct page *page2) +{ + char *addr1, *addr2; + int ret; + + addr1 = page_address(page1); + addr2 = page_address(page2); + ret = memcmp(addr1, addr2, PAGE_SIZE); + + if (!system_supports_mte() || ret) + return ret; + + /* + * If the page content is identical but at least one of the pages is + * tagged, return non-zero to avoid KSM merging. If only one of the + * pages is tagged, set_pte_at() may zero or change the tags of the + * other page via mte_sync_tags(). + */ + if (test_bit(PG_mte_tagged, &page1->flags) || + test_bit(PG_mte_tagged, &page2->flags)) + return addr1 != addr2; + + return ret; +} + +static void update_sctlr_el1_tcf0(u64 tcf0) +{ + /* ISB required for the kernel uaccess routines */ + sysreg_clear_set(sctlr_el1, SCTLR_EL1_TCF0_MASK, tcf0); + isb(); +} + +static void set_sctlr_el1_tcf0(u64 tcf0) +{ + /* + * mte_thread_switch() checks current->thread.sctlr_tcf0 as an + * optimisation. Disable preemption so that it does not see + * the variable update before the SCTLR_EL1.TCF0 one. + */ + preempt_disable(); + current->thread.sctlr_tcf0 = tcf0; + update_sctlr_el1_tcf0(tcf0); + preempt_enable(); +} + +static void update_gcr_el1_excl(u64 incl) +{ + u64 excl = ~incl & SYS_GCR_EL1_EXCL_MASK; + + /* + * Note that 'incl' is an include mask (controlled by the user via + * prctl()) while GCR_EL1 accepts an exclude mask. + * No need for ISB since this only affects EL0 currently, implicit + * with ERET. + */ + sysreg_clear_set_s(SYS_GCR_EL1, SYS_GCR_EL1_EXCL_MASK, excl); +} + +static void set_gcr_el1_excl(u64 incl) +{ + current->thread.gcr_user_incl = incl; + update_gcr_el1_excl(incl); +} + +void flush_mte_state(void) +{ + if (!system_supports_mte()) + return; + + /* clear any pending asynchronous tag fault */ + dsb(ish); + write_sysreg_s(0, SYS_TFSRE0_EL1); + clear_thread_flag(TIF_MTE_ASYNC_FAULT); + /* disable tag checking */ + set_sctlr_el1_tcf0(SCTLR_EL1_TCF0_NONE); + /* reset tag generation mask */ + set_gcr_el1_excl(0); +} + +void mte_thread_switch(struct task_struct *next) +{ + if (!system_supports_mte()) + return; + + /* avoid expensive SCTLR_EL1 accesses if no change */ + if (current->thread.sctlr_tcf0 != next->thread.sctlr_tcf0) + update_sctlr_el1_tcf0(next->thread.sctlr_tcf0); + update_gcr_el1_excl(next->thread.gcr_user_incl); +} + +void mte_suspend_exit(void) +{ + if (!system_supports_mte()) + return; + + update_gcr_el1_excl(current->thread.gcr_user_incl); +} + +long set_mte_ctrl(struct task_struct *task, unsigned long arg) +{ + u64 tcf0; + u64 gcr_incl = (arg & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT; + + if (!system_supports_mte()) + return 0; + + switch (arg & PR_MTE_TCF_MASK) { + case PR_MTE_TCF_NONE: + tcf0 = SCTLR_EL1_TCF0_NONE; + break; + case PR_MTE_TCF_SYNC: + tcf0 = SCTLR_EL1_TCF0_SYNC; + break; + case PR_MTE_TCF_ASYNC: + tcf0 = SCTLR_EL1_TCF0_ASYNC; + break; + default: + return -EINVAL; + } + + if (task != current) { + task->thread.sctlr_tcf0 = tcf0; + task->thread.gcr_user_incl = gcr_incl; + } else { + set_sctlr_el1_tcf0(tcf0); + set_gcr_el1_excl(gcr_incl); + } + + return 0; +} + +long get_mte_ctrl(struct task_struct *task) +{ + unsigned long ret; + + if (!system_supports_mte()) + return 0; + + ret = task->thread.gcr_user_incl << PR_MTE_TAG_SHIFT; + + switch (task->thread.sctlr_tcf0) { + case SCTLR_EL1_TCF0_NONE: + return PR_MTE_TCF_NONE; + case SCTLR_EL1_TCF0_SYNC: + ret |= PR_MTE_TCF_SYNC; + break; + case SCTLR_EL1_TCF0_ASYNC: + ret |= PR_MTE_TCF_ASYNC; + break; + } + + return ret; +} + +/* + * Access MTE tags in another process' address space as given in mm. Update + * the number of tags copied. Return 0 if any tags copied, error otherwise. + * Inspired by __access_remote_vm(). + */ +static int __access_remote_tags(struct mm_struct *mm, unsigned long addr, + struct iovec *kiov, unsigned int gup_flags) +{ + struct vm_area_struct *vma; + void __user *buf = kiov->iov_base; + size_t len = kiov->iov_len; + int ret; + int write = gup_flags & FOLL_WRITE; + + if (!access_ok(buf, len)) + return -EFAULT; + + if (mmap_read_lock_killable(mm)) + return -EIO; + + while (len) { + unsigned long tags, offset; + void *maddr; + struct page *page = NULL; + + ret = get_user_pages_remote(mm, addr, 1, gup_flags, &page, + &vma, NULL); + if (ret <= 0) + break; + + /* + * Only copy tags if the page has been mapped as PROT_MTE + * (PG_mte_tagged set). Otherwise the tags are not valid and + * not accessible to user. Moreover, an mprotect(PROT_MTE) + * would cause the existing tags to be cleared if the page + * was never mapped with PROT_MTE. + */ + if (!test_bit(PG_mte_tagged, &page->flags)) { + ret = -EOPNOTSUPP; + put_page(page); + break; + } + + /* limit access to the end of the page */ + offset = offset_in_page(addr); + tags = min(len, (PAGE_SIZE - offset) / MTE_GRANULE_SIZE); + + maddr = page_address(page); + if (write) { + tags = mte_copy_tags_from_user(maddr + offset, buf, tags); + set_page_dirty_lock(page); + } else { + tags = mte_copy_tags_to_user(buf, maddr + offset, tags); + } + put_page(page); + + /* error accessing the tracer's buffer */ + if (!tags) + break; + + len -= tags; + buf += tags; + addr += tags * MTE_GRANULE_SIZE; + } + mmap_read_unlock(mm); + + /* return an error if no tags copied */ + kiov->iov_len = buf - kiov->iov_base; + if (!kiov->iov_len) { + /* check for error accessing the tracee's address space */ + if (ret <= 0) + return -EIO; + else + return -EFAULT; + } + + return 0; +} + +/* + * Copy MTE tags in another process' address space at 'addr' to/from tracer's + * iovec buffer. Return 0 on success. Inspired by ptrace_access_vm(). + */ +static int access_remote_tags(struct task_struct *tsk, unsigned long addr, + struct iovec *kiov, unsigned int gup_flags) +{ + struct mm_struct *mm; + int ret; + + mm = get_task_mm(tsk); + if (!mm) + return -EPERM; + + if (!tsk->ptrace || (current != tsk->parent) || + ((get_dumpable(mm) != SUID_DUMP_USER) && + !ptracer_capable(tsk, mm->user_ns))) { + mmput(mm); + return -EPERM; + } + + ret = __access_remote_tags(mm, addr, kiov, gup_flags); + mmput(mm); + + return ret; +} + +int mte_ptrace_copy_tags(struct task_struct *child, long request, + unsigned long addr, unsigned long data) +{ + int ret; + struct iovec kiov; + struct iovec __user *uiov = (void __user *)data; + unsigned int gup_flags = FOLL_FORCE; + + if (!system_supports_mte()) + return -EIO; + + if (get_user(kiov.iov_base, &uiov->iov_base) || + get_user(kiov.iov_len, &uiov->iov_len)) + return -EFAULT; + + if (request == PTRACE_POKEMTETAGS) + gup_flags |= FOLL_WRITE; + + /* align addr to the MTE tag granule */ + addr &= MTE_GRANULE_MASK; + + ret = access_remote_tags(child, addr, &kiov, gup_flags); + if (!ret) + ret = put_user(kiov.iov_len, &uiov->iov_len); + + return ret; +} diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c index 295d66490584..c07d7a034941 100644 --- a/arch/arm64/kernel/paravirt.c +++ b/arch/arm64/kernel/paravirt.c @@ -50,16 +50,19 @@ static u64 pv_steal_clock(int cpu) struct pv_time_stolen_time_region *reg; reg = per_cpu_ptr(&stolen_time_region, cpu); - if (!reg->kaddr) { - pr_warn_once("stolen time enabled but not configured for cpu %d\n", - cpu); + + /* + * paravirt_steal_clock() may be called before the CPU + * online notification callback runs. Until the callback + * has run we just return zero. + */ + if (!reg->kaddr) return 0; - } return le64_to_cpu(READ_ONCE(reg->kaddr->stolen_time)); } -static int stolen_time_dying_cpu(unsigned int cpu) +static int stolen_time_cpu_down_prepare(unsigned int cpu) { struct pv_time_stolen_time_region *reg; @@ -73,7 +76,7 @@ static int stolen_time_dying_cpu(unsigned int cpu) return 0; } -static int init_stolen_time_cpu(unsigned int cpu) +static int stolen_time_cpu_online(unsigned int cpu) { struct pv_time_stolen_time_region *reg; struct arm_smccc_res res; @@ -103,19 +106,20 @@ static int init_stolen_time_cpu(unsigned int cpu) return 0; } -static int pv_time_init_stolen_time(void) +static int __init pv_time_init_stolen_time(void) { int ret; - ret = cpuhp_setup_state(CPUHP_AP_ARM_KVMPV_STARTING, - "hypervisor/arm/pvtime:starting", - init_stolen_time_cpu, stolen_time_dying_cpu); + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, + "hypervisor/arm/pvtime:online", + stolen_time_cpu_online, + stolen_time_cpu_down_prepare); if (ret < 0) return ret; return 0; } -static bool has_pv_steal_clock(void) +static bool __init has_pv_steal_clock(void) { struct arm_smccc_res res; diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c index b0e03e052dd1..88ff471b0bce 100644 --- a/arch/arm64/kernel/perf_callchain.c +++ b/arch/arm64/kernel/perf_callchain.c @@ -137,11 +137,11 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry, * whist unwinding the stackframe and is like a subroutine return so we use * the PC. */ -static int callchain_trace(struct stackframe *frame, void *data) +static bool callchain_trace(void *data, unsigned long pc) { struct perf_callchain_entry_ctx *entry = data; - perf_callchain_store(entry, frame->pc); - return 0; + perf_callchain_store(entry, pc); + return true; } void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 462f9a9cc44b..3605f77ad4df 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -69,6 +69,9 @@ static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL, [C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB, + [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD, + [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_LL_CACHE_RD, + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, }; @@ -302,13 +305,33 @@ static struct attribute_group armv8_pmuv3_format_attr_group = { .attrs = armv8_pmuv3_format_attrs, }; +static ssize_t slots_show(struct device *dev, struct device_attribute *attr, + char *page) +{ + struct pmu *pmu = dev_get_drvdata(dev); + struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu); + u32 slots = cpu_pmu->reg_pmmir & ARMV8_PMU_SLOTS_MASK; + + return snprintf(page, PAGE_SIZE, "0x%08x\n", slots); +} + +static DEVICE_ATTR_RO(slots); + +static struct attribute *armv8_pmuv3_caps_attrs[] = { + &dev_attr_slots.attr, + NULL, +}; + +static struct attribute_group armv8_pmuv3_caps_attr_group = { + .name = "caps", + .attrs = armv8_pmuv3_caps_attrs, +}; + /* * Perf Events' indices */ #define ARMV8_IDX_CYCLE_COUNTER 0 #define ARMV8_IDX_COUNTER0 1 -#define ARMV8_IDX_COUNTER_LAST(cpu_pmu) \ - (ARMV8_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1) /* @@ -348,6 +371,73 @@ static inline bool armv8pmu_event_is_chained(struct perf_event *event) #define ARMV8_IDX_TO_COUNTER(x) \ (((x) - ARMV8_IDX_COUNTER0) & ARMV8_PMU_COUNTER_MASK) +/* + * This code is really good + */ + +#define PMEVN_CASE(n, case_macro) \ + case n: case_macro(n); break + +#define PMEVN_SWITCH(x, case_macro) \ + do { \ + switch (x) { \ + PMEVN_CASE(0, case_macro); \ + PMEVN_CASE(1, case_macro); \ + PMEVN_CASE(2, case_macro); \ + PMEVN_CASE(3, case_macro); \ + PMEVN_CASE(4, case_macro); \ + PMEVN_CASE(5, case_macro); \ + PMEVN_CASE(6, case_macro); \ + PMEVN_CASE(7, case_macro); \ + PMEVN_CASE(8, case_macro); \ + PMEVN_CASE(9, case_macro); \ + PMEVN_CASE(10, case_macro); \ + PMEVN_CASE(11, case_macro); \ + PMEVN_CASE(12, case_macro); \ + PMEVN_CASE(13, case_macro); \ + PMEVN_CASE(14, case_macro); \ + PMEVN_CASE(15, case_macro); \ + PMEVN_CASE(16, case_macro); \ + PMEVN_CASE(17, case_macro); \ + PMEVN_CASE(18, case_macro); \ + PMEVN_CASE(19, case_macro); \ + PMEVN_CASE(20, case_macro); \ + PMEVN_CASE(21, case_macro); \ + PMEVN_CASE(22, case_macro); \ + PMEVN_CASE(23, case_macro); \ + PMEVN_CASE(24, case_macro); \ + PMEVN_CASE(25, case_macro); \ + PMEVN_CASE(26, case_macro); \ + PMEVN_CASE(27, case_macro); \ + PMEVN_CASE(28, case_macro); \ + PMEVN_CASE(29, case_macro); \ + PMEVN_CASE(30, case_macro); \ + default: WARN(1, "Invalid PMEV* index\n"); \ + } \ + } while (0) + +#define RETURN_READ_PMEVCNTRN(n) \ + return read_sysreg(pmevcntr##n##_el0) +static unsigned long read_pmevcntrn(int n) +{ + PMEVN_SWITCH(n, RETURN_READ_PMEVCNTRN); + return 0; +} + +#define WRITE_PMEVCNTRN(n) \ + write_sysreg(val, pmevcntr##n##_el0) +static void write_pmevcntrn(int n, unsigned long val) +{ + PMEVN_SWITCH(n, WRITE_PMEVCNTRN); +} + +#define WRITE_PMEVTYPERN(n) \ + write_sysreg(val, pmevtyper##n##_el0) +static void write_pmevtypern(int n, unsigned long val) +{ + PMEVN_SWITCH(n, WRITE_PMEVTYPERN); +} + static inline u32 armv8pmu_pmcr_read(void) { return read_sysreg(pmcr_el0); @@ -365,28 +455,16 @@ static inline int armv8pmu_has_overflowed(u32 pmovsr) return pmovsr & ARMV8_PMU_OVERFLOWED_MASK; } -static inline int armv8pmu_counter_valid(struct arm_pmu *cpu_pmu, int idx) -{ - return idx >= ARMV8_IDX_CYCLE_COUNTER && - idx <= ARMV8_IDX_COUNTER_LAST(cpu_pmu); -} - static inline int armv8pmu_counter_has_overflowed(u32 pmnc, int idx) { return pmnc & BIT(ARMV8_IDX_TO_COUNTER(idx)); } -static inline void armv8pmu_select_counter(int idx) +static inline u32 armv8pmu_read_evcntr(int idx) { u32 counter = ARMV8_IDX_TO_COUNTER(idx); - write_sysreg(counter, pmselr_el0); - isb(); -} -static inline u64 armv8pmu_read_evcntr(int idx) -{ - armv8pmu_select_counter(idx); - return read_sysreg(pmxevcntr_el0); + return read_pmevcntrn(counter); } static inline u64 armv8pmu_read_hw_counter(struct perf_event *event) @@ -440,15 +518,11 @@ static u64 armv8pmu_unbias_long_counter(struct perf_event *event, u64 value) static u64 armv8pmu_read_counter(struct perf_event *event) { - struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; u64 value = 0; - if (!armv8pmu_counter_valid(cpu_pmu, idx)) - pr_err("CPU%u reading wrong counter %d\n", - smp_processor_id(), idx); - else if (idx == ARMV8_IDX_CYCLE_COUNTER) + if (idx == ARMV8_IDX_CYCLE_COUNTER) value = read_sysreg(pmccntr_el0); else value = armv8pmu_read_hw_counter(event); @@ -458,8 +532,9 @@ static u64 armv8pmu_read_counter(struct perf_event *event) static inline void armv8pmu_write_evcntr(int idx, u64 value) { - armv8pmu_select_counter(idx); - write_sysreg(value, pmxevcntr_el0); + u32 counter = ARMV8_IDX_TO_COUNTER(idx); + + write_pmevcntrn(counter, value); } static inline void armv8pmu_write_hw_counter(struct perf_event *event, @@ -477,16 +552,12 @@ static inline void armv8pmu_write_hw_counter(struct perf_event *event, static void armv8pmu_write_counter(struct perf_event *event, u64 value) { - struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; value = armv8pmu_bias_long_counter(event, value); - if (!armv8pmu_counter_valid(cpu_pmu, idx)) - pr_err("CPU%u writing wrong counter %d\n", - smp_processor_id(), idx); - else if (idx == ARMV8_IDX_CYCLE_COUNTER) + if (idx == ARMV8_IDX_CYCLE_COUNTER) write_sysreg(value, pmccntr_el0); else armv8pmu_write_hw_counter(event, value); @@ -494,9 +565,10 @@ static void armv8pmu_write_counter(struct perf_event *event, u64 value) static inline void armv8pmu_write_evtype(int idx, u32 val) { - armv8pmu_select_counter(idx); + u32 counter = ARMV8_IDX_TO_COUNTER(idx); + val &= ARMV8_PMU_EVTYPE_MASK; - write_sysreg(val, pmxevtyper_el0); + write_pmevtypern(counter, val); } static inline void armv8pmu_write_event_type(struct perf_event *event) @@ -516,7 +588,10 @@ static inline void armv8pmu_write_event_type(struct perf_event *event) armv8pmu_write_evtype(idx - 1, hwc->config_base); armv8pmu_write_evtype(idx, chain_evt); } else { - armv8pmu_write_evtype(idx, hwc->config_base); + if (idx == ARMV8_IDX_CYCLE_COUNTER) + write_sysreg(hwc->config_base, pmccfiltr_el0); + else + armv8pmu_write_evtype(idx, hwc->config_base); } } @@ -532,6 +607,11 @@ static u32 armv8pmu_event_cnten_mask(struct perf_event *event) static inline void armv8pmu_enable_counter(u32 mask) { + /* + * Make sure event configuration register writes are visible before we + * enable the counter. + * */ + isb(); write_sysreg(mask, pmcntenset_el0); } @@ -550,6 +630,11 @@ static inline void armv8pmu_enable_event_counter(struct perf_event *event) static inline void armv8pmu_disable_counter(u32 mask) { write_sysreg(mask, pmcntenclr_el0); + /* + * Make sure the effects of disabling the counter are visible before we + * start configuring the event. + */ + isb(); } static inline void armv8pmu_disable_event_counter(struct perf_event *event) @@ -606,15 +691,10 @@ static inline u32 armv8pmu_getreset_flags(void) static void armv8pmu_enable_event(struct perf_event *event) { - unsigned long flags; - struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); - struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); - /* * Enable counter and interrupt, and set the counter to count * the event that we're interested in. */ - raw_spin_lock_irqsave(&events->pmu_lock, flags); /* * Disable counter @@ -622,7 +702,7 @@ static void armv8pmu_enable_event(struct perf_event *event) armv8pmu_disable_event_counter(event); /* - * Set event (if destined for PMNx counters). + * Set event. */ armv8pmu_write_event_type(event); @@ -635,21 +715,10 @@ static void armv8pmu_enable_event(struct perf_event *event) * Enable counter */ armv8pmu_enable_event_counter(event); - - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } static void armv8pmu_disable_event(struct perf_event *event) { - unsigned long flags; - struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); - struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); - - /* - * Disable counter and interrupt - */ - raw_spin_lock_irqsave(&events->pmu_lock, flags); - /* * Disable counter */ @@ -659,30 +728,18 @@ static void armv8pmu_disable_event(struct perf_event *event) * Disable interrupt for this counter */ armv8pmu_disable_event_irq(event); - - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } static void armv8pmu_start(struct arm_pmu *cpu_pmu) { - unsigned long flags; - struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); - - raw_spin_lock_irqsave(&events->pmu_lock, flags); /* Enable all counters */ armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E); - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } static void armv8pmu_stop(struct arm_pmu *cpu_pmu) { - unsigned long flags; - struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); - - raw_spin_lock_irqsave(&events->pmu_lock, flags); /* Disable all counters */ armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_PMCR_E); - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) @@ -735,20 +792,16 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) if (!armpmu_event_set_period(event)) continue; + /* + * Perf event overflow will queue the processing of the event as + * an irq_work which will be taken care of in the handling of + * IPI_IRQ_WORK. + */ if (perf_event_overflow(event, &data, regs)) cpu_pmu->disable(event); } armv8pmu_start(cpu_pmu); - /* - * Handle the pending perf events. - * - * Note: this call *must* be run with interrupts disabled. For - * platforms that can have the PMU interrupts raised as an NMI, this - * will not work. - */ - irq_work_run(); - return IRQ_HANDLED; } @@ -997,6 +1050,12 @@ static void __armv8pmu_probe_pmu(void *info) bitmap_from_arr32(cpu_pmu->pmceid_ext_bitmap, pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS); + + /* store PMMIR_EL1 register for sysfs */ + if (pmuver >= ID_AA64DFR0_PMUVER_8_4 && (pmceid_raw[1] & BIT(31))) + cpu_pmu->reg_pmmir = read_cpuid(PMMIR_EL1); + else + cpu_pmu->reg_pmmir = 0; } static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu) @@ -1019,7 +1078,8 @@ static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu) static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name, int (*map_event)(struct perf_event *event), const struct attribute_group *events, - const struct attribute_group *format) + const struct attribute_group *format, + const struct attribute_group *caps) { int ret = armv8pmu_probe_pmu(cpu_pmu); if (ret) @@ -1044,104 +1104,112 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name, events : &armv8_pmuv3_events_attr_group; cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = format ? format : &armv8_pmuv3_format_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_CAPS] = caps ? + caps : &armv8_pmuv3_caps_attr_group; return 0; } +static int armv8_pmu_init_nogroups(struct arm_pmu *cpu_pmu, char *name, + int (*map_event)(struct perf_event *event)) +{ + return armv8_pmu_init(cpu_pmu, name, map_event, NULL, NULL, NULL); +} + static int armv8_pmuv3_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_pmuv3", - armv8_pmuv3_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_pmuv3", + armv8_pmuv3_map_event); } static int armv8_a34_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cortex_a34", - armv8_pmuv3_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a34", + armv8_pmuv3_map_event); } static int armv8_a35_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cortex_a35", - armv8_a53_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a35", + armv8_a53_map_event); } static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cortex_a53", - armv8_a53_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a53", + armv8_a53_map_event); } static int armv8_a55_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cortex_a55", - armv8_pmuv3_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a55", + armv8_pmuv3_map_event); } static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cortex_a57", - armv8_a57_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a57", + armv8_a57_map_event); } static int armv8_a65_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cortex_a65", - armv8_pmuv3_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a65", + armv8_pmuv3_map_event); } static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cortex_a72", - armv8_a57_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a72", + armv8_a57_map_event); } static int armv8_a73_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cortex_a73", - armv8_a73_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a73", + armv8_a73_map_event); } static int armv8_a75_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cortex_a75", - armv8_pmuv3_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a75", + armv8_pmuv3_map_event); } static int armv8_a76_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cortex_a76", - armv8_pmuv3_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a76", + armv8_pmuv3_map_event); } static int armv8_a77_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cortex_a77", - armv8_pmuv3_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a77", + armv8_pmuv3_map_event); } static int armv8_e1_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_neoverse_e1", - armv8_pmuv3_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_neoverse_e1", + armv8_pmuv3_map_event); } static int armv8_n1_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_neoverse_n1", - armv8_pmuv3_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_neoverse_n1", + armv8_pmuv3_map_event); } static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cavium_thunder", - armv8_thunder_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cavium_thunder", + armv8_thunder_map_event); } static int armv8_vulcan_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_brcm_vulcan", - armv8_vulcan_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_brcm_vulcan", + armv8_vulcan_map_event); } static const struct of_device_id armv8_pmu_of_device_ids[] = { diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c index 666b225aeb3a..94e8718e7229 100644 --- a/arch/arm64/kernel/perf_regs.c +++ b/arch/arm64/kernel/perf_regs.c @@ -16,7 +16,7 @@ u64 perf_reg_value(struct pt_regs *regs, int idx) /* * Our handling of compat tasks (PERF_SAMPLE_REGS_ABI_32) is weird, but - * we're stuck with it for ABI compatability reasons. + * we're stuck with it for ABI compatibility reasons. * * For a 32-bit consumer inspecting a 32-bit task, then it will look at * the first 16 registers (see arch/arm/include/uapi/asm/perf_regs.h). diff --git a/arch/arm64/kernel/probes/decode-insn.c b/arch/arm64/kernel/probes/decode-insn.c index 263d5fba4c8a..104101f633b1 100644 --- a/arch/arm64/kernel/probes/decode-insn.c +++ b/arch/arm64/kernel/probes/decode-insn.c @@ -29,7 +29,8 @@ static bool __kprobes aarch64_insn_is_steppable(u32 insn) aarch64_insn_is_msr_imm(insn) || aarch64_insn_is_msr_reg(insn) || aarch64_insn_is_exception(insn) || - aarch64_insn_is_eret(insn)) + aarch64_insn_is_eret(insn) || + aarch64_insn_is_eret_auth(insn)) return false; /* @@ -42,8 +43,10 @@ static bool __kprobes aarch64_insn_is_steppable(u32 insn) != AARCH64_INSN_SPCLREG_DAIF; /* - * The HINT instruction is is problematic when single-stepping, - * except for the NOP case. + * The HINT instruction is steppable only if it is in whitelist + * and the rest of other such instructions are blocked for + * single stepping as they may cause exception or other + * unintended behaviour. */ if (aarch64_insn_is_hint(insn)) return aarch64_insn_is_steppable_hint(insn); diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 84ec630b8ab5..4784011cecac 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -21,6 +21,7 @@ #include <linux/lockdep.h> #include <linux/mman.h> #include <linux/mm.h> +#include <linux/nospec.h> #include <linux/stddef.h> #include <linux/sysctl.h> #include <linux/unistd.h> @@ -52,6 +53,7 @@ #include <asm/exec.h> #include <asm/fpsimd.h> #include <asm/mmu_context.h> +#include <asm/mte.h> #include <asm/processor.h> #include <asm/pointer_auth.h> #include <asm/stacktrace.h> @@ -123,10 +125,8 @@ void arch_cpu_idle(void) * This should do all the clock switching and wait for interrupt * tricks */ - trace_cpu_idle_rcuidle(1, smp_processor_id()); cpu_do_idle(); local_irq_enable(); - trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); } #ifdef CONFIG_HOTPLUG_CPU @@ -241,7 +241,7 @@ static void print_pstate(struct pt_regs *regs) const char *btype_str = btypes[(pstate & PSR_BTYPE_MASK) >> PSR_BTYPE_SHIFT]; - printk("pstate: %08llx (%c%c%c%c %c%c%c%c %cPAN %cUAO BTYPE=%s)\n", + printk("pstate: %08llx (%c%c%c%c %c%c%c%c %cPAN %cUAO %cTCO BTYPE=%s)\n", pstate, pstate & PSR_N_BIT ? 'N' : 'n', pstate & PSR_Z_BIT ? 'Z' : 'z', @@ -253,6 +253,7 @@ static void print_pstate(struct pt_regs *regs) pstate & PSR_F_BIT ? 'F' : 'f', pstate & PSR_PAN_BIT ? '+' : '-', pstate & PSR_UAO_BIT ? '+' : '-', + pstate & PSR_TCO_BIT ? '+' : '-', btype_str); } } @@ -338,6 +339,7 @@ void flush_thread(void) tls_thread_flush(); flush_ptrace_hw_breakpoint(current); flush_tagged_addr_state(); + flush_mte_state(); } void release_thread(struct task_struct *dead_task) @@ -370,6 +372,9 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) dst->thread.sve_state = NULL; clear_tsk_thread_flag(dst, TIF_SVE); + /* clear any pending asynchronous tag fault raised by the parent */ + clear_tsk_thread_flag(dst, TIF_MTE_ASYNC_FAULT); + return 0; } @@ -423,8 +428,7 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, cpus_have_const_cap(ARM64_HAS_UAO)) childregs->pstate |= PSR_UAO_BIT; - if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) - set_ssbs_bit(childregs); + spectre_v4_enable_task_mitigation(p); if (system_uses_irq_prio_masking()) childregs->pmr_save = GIC_PRIO_IRQON; @@ -474,8 +478,6 @@ void uao_thread_switch(struct task_struct *next) */ static void ssbs_thread_switch(struct task_struct *next) { - struct pt_regs *regs = task_pt_regs(next); - /* * Nothing to do for kernel threads, but 'regs' may be junk * (e.g. idle task) so check the flags and bail early. @@ -487,18 +489,10 @@ static void ssbs_thread_switch(struct task_struct *next) * If all CPUs implement the SSBS extension, then we just need to * context-switch the PSTATE field. */ - if (cpu_have_feature(cpu_feature(SSBS))) + if (cpus_have_const_cap(ARM64_SSBS)) return; - /* If the mitigation is enabled, then we leave SSBS clear. */ - if ((arm64_get_ssbd_state() == ARM64_SSBD_FORCE_ENABLE) || - test_tsk_thread_flag(next, TIF_SSBD)) - return; - - if (compat_user_mode(regs)) - set_compat_ssbs_bit(regs); - else if (user_mode(regs)) - set_ssbs_bit(regs); + spectre_v4_enable_task_mitigation(next); } /* @@ -516,6 +510,39 @@ static void entry_task_switch(struct task_struct *next) } /* + * ARM erratum 1418040 handling, affecting the 32bit view of CNTVCT. + * Assuming the virtual counter is enabled at the beginning of times: + * + * - disable access when switching from a 64bit task to a 32bit task + * - enable access when switching from a 32bit task to a 64bit task + */ +static void erratum_1418040_thread_switch(struct task_struct *prev, + struct task_struct *next) +{ + bool prev32, next32; + u64 val; + + if (!(IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040) && + cpus_have_const_cap(ARM64_WORKAROUND_1418040))) + return; + + prev32 = is_compat_thread(task_thread_info(prev)); + next32 = is_compat_thread(task_thread_info(next)); + + if (prev32 == next32) + return; + + val = read_sysreg(cntkctl_el1); + + if (!next32) + val |= ARCH_TIMER_USR_VCT_ACCESS_EN; + else + val &= ~ARCH_TIMER_USR_VCT_ACCESS_EN; + + write_sysreg(val, cntkctl_el1); +} + +/* * Thread switching. */ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev, @@ -530,6 +557,7 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev, entry_task_switch(next); uao_thread_switch(next); ssbs_thread_switch(next); + erratum_1418040_thread_switch(prev, next); /* * Complete any pending TLB or cache maintenance on this CPU in case @@ -539,6 +567,13 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev, */ dsb(ish); + /* + * MTE thread switching must happen after the DSB above to ensure that + * any asynchronous tag check faults have been logged in the TFSR*_EL1 + * registers. + */ + mte_thread_switch(next); + /* the actual thread switch */ last = cpu_switch_to(prev, next); @@ -588,6 +623,11 @@ void arch_setup_new_exec(void) current->mm->context.flags = is_compat_task() ? MMCF_AARCH32 : 0; ptrauth_thread_init_user(current); + + if (task_spec_ssb_noexec(current)) { + arch_prctl_spec_ctrl_set(current, PR_SPEC_STORE_BYPASS, + PR_SPEC_ENABLE); + } } #ifdef CONFIG_ARM64_TAGGED_ADDR_ABI @@ -596,11 +636,18 @@ void arch_setup_new_exec(void) */ static unsigned int tagged_addr_disabled; -long set_tagged_addr_ctrl(unsigned long arg) +long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg) { - if (is_compat_task()) + unsigned long valid_mask = PR_TAGGED_ADDR_ENABLE; + struct thread_info *ti = task_thread_info(task); + + if (is_compat_thread(ti)) return -EINVAL; - if (arg & ~PR_TAGGED_ADDR_ENABLE) + + if (system_supports_mte()) + valid_mask |= PR_MTE_TCF_MASK | PR_MTE_TAG_MASK; + + if (arg & ~valid_mask) return -EINVAL; /* @@ -610,20 +657,28 @@ long set_tagged_addr_ctrl(unsigned long arg) if (arg & PR_TAGGED_ADDR_ENABLE && tagged_addr_disabled) return -EINVAL; - update_thread_flag(TIF_TAGGED_ADDR, arg & PR_TAGGED_ADDR_ENABLE); + if (set_mte_ctrl(task, arg) != 0) + return -EINVAL; + + update_ti_thread_flag(ti, TIF_TAGGED_ADDR, arg & PR_TAGGED_ADDR_ENABLE); return 0; } -long get_tagged_addr_ctrl(void) +long get_tagged_addr_ctrl(struct task_struct *task) { - if (is_compat_task()) + long ret = 0; + struct thread_info *ti = task_thread_info(task); + + if (is_compat_thread(ti)) return -EINVAL; - if (test_thread_flag(TIF_TAGGED_ADDR)) - return PR_TAGGED_ADDR_ENABLE; + if (test_ti_thread_flag(ti, TIF_TAGGED_ADDR)) + ret = PR_TAGGED_ADDR_ENABLE; - return 0; + ret |= get_mte_ctrl(task); + + return ret; } /* diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c new file mode 100644 index 000000000000..68b710f1b43f --- /dev/null +++ b/arch/arm64/kernel/proton-pack.c @@ -0,0 +1,792 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Handle detection, reporting and mitigation of Spectre v1, v2 and v4, as + * detailed at: + * + * https://developer.arm.com/support/arm-security-updates/speculative-processor-vulnerability + * + * This code was originally written hastily under an awful lot of stress and so + * aspects of it are somewhat hacky. Unfortunately, changing anything in here + * instantly makes me feel ill. Thanks, Jann. Thann. + * + * Copyright (C) 2018 ARM Ltd, All Rights Reserved. + * Copyright (C) 2020 Google LLC + * + * "If there's something strange in your neighbourhood, who you gonna call?" + * + * Authors: Will Deacon <will@kernel.org> and Marc Zyngier <maz@kernel.org> + */ + +#include <linux/arm-smccc.h> +#include <linux/cpu.h> +#include <linux/device.h> +#include <linux/nospec.h> +#include <linux/prctl.h> +#include <linux/sched/task_stack.h> + +#include <asm/spectre.h> +#include <asm/traps.h> + +/* + * We try to ensure that the mitigation state can never change as the result of + * onlining a late CPU. + */ +static void update_mitigation_state(enum mitigation_state *oldp, + enum mitigation_state new) +{ + enum mitigation_state state; + + do { + state = READ_ONCE(*oldp); + if (new <= state) + break; + + /* Userspace almost certainly can't deal with this. */ + if (WARN_ON(system_capabilities_finalized())) + break; + } while (cmpxchg_relaxed(oldp, state, new) != state); +} + +/* + * Spectre v1. + * + * The kernel can't protect userspace for this one: it's each person for + * themselves. Advertise what we're doing and be done with it. + */ +ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "Mitigation: __user pointer sanitization\n"); +} + +/* + * Spectre v2. + * + * This one sucks. A CPU is either: + * + * - Mitigated in hardware and advertised by ID_AA64PFR0_EL1.CSV2. + * - Mitigated in hardware and listed in our "safe list". + * - Mitigated in software by firmware. + * - Mitigated in software by a CPU-specific dance in the kernel. + * - Vulnerable. + * + * It's not unlikely for different CPUs in a big.LITTLE system to fall into + * different camps. + */ +static enum mitigation_state spectre_v2_state; + +static bool __read_mostly __nospectre_v2; +static int __init parse_spectre_v2_param(char *str) +{ + __nospectre_v2 = true; + return 0; +} +early_param("nospectre_v2", parse_spectre_v2_param); + +static bool spectre_v2_mitigations_off(void) +{ + bool ret = __nospectre_v2 || cpu_mitigations_off(); + + if (ret) + pr_info_once("spectre-v2 mitigation disabled by command line option\n"); + + return ret; +} + +ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, + char *buf) +{ + switch (spectre_v2_state) { + case SPECTRE_UNAFFECTED: + return sprintf(buf, "Not affected\n"); + case SPECTRE_MITIGATED: + return sprintf(buf, "Mitigation: Branch predictor hardening\n"); + case SPECTRE_VULNERABLE: + fallthrough; + default: + return sprintf(buf, "Vulnerable\n"); + } +} + +static enum mitigation_state spectre_v2_get_cpu_hw_mitigation_state(void) +{ + u64 pfr0; + static const struct midr_range spectre_v2_safe_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A35), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A53), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), + MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53), + MIDR_ALL_VERSIONS(MIDR_HISI_TSV110), + MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_3XX_SILVER), + MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_SILVER), + { /* sentinel */ } + }; + + /* If the CPU has CSV2 set, we're safe */ + pfr0 = read_cpuid(ID_AA64PFR0_EL1); + if (cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_CSV2_SHIFT)) + return SPECTRE_UNAFFECTED; + + /* Alternatively, we have a list of unaffected CPUs */ + if (is_midr_in_range_list(read_cpuid_id(), spectre_v2_safe_list)) + return SPECTRE_UNAFFECTED; + + return SPECTRE_VULNERABLE; +} + +#define SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED (1) + +static enum mitigation_state spectre_v2_get_cpu_fw_mitigation_state(void) +{ + int ret; + struct arm_smccc_res res; + + arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_1, &res); + + ret = res.a0; + switch (ret) { + case SMCCC_RET_SUCCESS: + return SPECTRE_MITIGATED; + case SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED: + return SPECTRE_UNAFFECTED; + default: + fallthrough; + case SMCCC_RET_NOT_SUPPORTED: + return SPECTRE_VULNERABLE; + } +} + +bool has_spectre_v2(const struct arm64_cpu_capabilities *entry, int scope) +{ + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + + if (spectre_v2_get_cpu_hw_mitigation_state() == SPECTRE_UNAFFECTED) + return false; + + if (spectre_v2_get_cpu_fw_mitigation_state() == SPECTRE_UNAFFECTED) + return false; + + return true; +} + +DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); + +enum mitigation_state arm64_get_spectre_v2_state(void) +{ + return spectre_v2_state; +} + +#ifdef CONFIG_KVM +#include <asm/cacheflush.h> +#include <asm/kvm_asm.h> + +atomic_t arm64_el2_vector_last_slot = ATOMIC_INIT(-1); + +static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start, + const char *hyp_vecs_end) +{ + void *dst = lm_alias(__bp_harden_hyp_vecs + slot * SZ_2K); + int i; + + for (i = 0; i < SZ_2K; i += 0x80) + memcpy(dst + i, hyp_vecs_start, hyp_vecs_end - hyp_vecs_start); + + __flush_icache_range((uintptr_t)dst, (uintptr_t)dst + SZ_2K); +} + +static void install_bp_hardening_cb(bp_hardening_cb_t fn) +{ + static DEFINE_RAW_SPINLOCK(bp_lock); + int cpu, slot = -1; + const char *hyp_vecs_start = __smccc_workaround_1_smc; + const char *hyp_vecs_end = __smccc_workaround_1_smc + + __SMCCC_WORKAROUND_1_SMC_SZ; + + /* + * detect_harden_bp_fw() passes NULL for the hyp_vecs start/end if + * we're a guest. Skip the hyp-vectors work. + */ + if (!is_hyp_mode_available()) { + __this_cpu_write(bp_hardening_data.fn, fn); + return; + } + + raw_spin_lock(&bp_lock); + for_each_possible_cpu(cpu) { + if (per_cpu(bp_hardening_data.fn, cpu) == fn) { + slot = per_cpu(bp_hardening_data.hyp_vectors_slot, cpu); + break; + } + } + + if (slot == -1) { + slot = atomic_inc_return(&arm64_el2_vector_last_slot); + BUG_ON(slot >= BP_HARDEN_EL2_SLOTS); + __copy_hyp_vect_bpi(slot, hyp_vecs_start, hyp_vecs_end); + } + + __this_cpu_write(bp_hardening_data.hyp_vectors_slot, slot); + __this_cpu_write(bp_hardening_data.fn, fn); + raw_spin_unlock(&bp_lock); +} +#else +static void install_bp_hardening_cb(bp_hardening_cb_t fn) +{ + __this_cpu_write(bp_hardening_data.fn, fn); +} +#endif /* CONFIG_KVM */ + +static void call_smc_arch_workaround_1(void) +{ + arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL); +} + +static void call_hvc_arch_workaround_1(void) +{ + arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL); +} + +static void qcom_link_stack_sanitisation(void) +{ + u64 tmp; + + asm volatile("mov %0, x30 \n" + ".rept 16 \n" + "bl . + 4 \n" + ".endr \n" + "mov x30, %0 \n" + : "=&r" (tmp)); +} + +static enum mitigation_state spectre_v2_enable_fw_mitigation(void) +{ + bp_hardening_cb_t cb; + enum mitigation_state state; + + state = spectre_v2_get_cpu_fw_mitigation_state(); + if (state != SPECTRE_MITIGATED) + return state; + + if (spectre_v2_mitigations_off()) + return SPECTRE_VULNERABLE; + + switch (arm_smccc_1_1_get_conduit()) { + case SMCCC_CONDUIT_HVC: + cb = call_hvc_arch_workaround_1; + break; + + case SMCCC_CONDUIT_SMC: + cb = call_smc_arch_workaround_1; + break; + + default: + return SPECTRE_VULNERABLE; + } + + install_bp_hardening_cb(cb); + return SPECTRE_MITIGATED; +} + +static enum mitigation_state spectre_v2_enable_sw_mitigation(void) +{ + u32 midr; + + if (spectre_v2_mitigations_off()) + return SPECTRE_VULNERABLE; + + midr = read_cpuid_id(); + if (((midr & MIDR_CPU_MODEL_MASK) != MIDR_QCOM_FALKOR) && + ((midr & MIDR_CPU_MODEL_MASK) != MIDR_QCOM_FALKOR_V1)) + return SPECTRE_VULNERABLE; + + install_bp_hardening_cb(qcom_link_stack_sanitisation); + return SPECTRE_MITIGATED; +} + +void spectre_v2_enable_mitigation(const struct arm64_cpu_capabilities *__unused) +{ + enum mitigation_state state; + + WARN_ON(preemptible()); + + state = spectre_v2_get_cpu_hw_mitigation_state(); + if (state == SPECTRE_VULNERABLE) + state = spectre_v2_enable_fw_mitigation(); + if (state == SPECTRE_VULNERABLE) + state = spectre_v2_enable_sw_mitigation(); + + update_mitigation_state(&spectre_v2_state, state); +} + +/* + * Spectre v4. + * + * If you thought Spectre v2 was nasty, wait until you see this mess. A CPU is + * either: + * + * - Mitigated in hardware and listed in our "safe list". + * - Mitigated in hardware via PSTATE.SSBS. + * - Mitigated in software by firmware (sometimes referred to as SSBD). + * + * Wait, that doesn't sound so bad, does it? Keep reading... + * + * A major source of headaches is that the software mitigation is enabled both + * on a per-task basis, but can also be forced on for the kernel, necessitating + * both context-switch *and* entry/exit hooks. To make it even worse, some CPUs + * allow EL0 to toggle SSBS directly, which can end up with the prctl() state + * being stale when re-entering the kernel. The usual big.LITTLE caveats apply, + * so you can have systems that have both firmware and SSBS mitigations. This + * means we actually have to reject late onlining of CPUs with mitigations if + * all of the currently onlined CPUs are safelisted, as the mitigation tends to + * be opt-in for userspace. Yes, really, the cure is worse than the disease. + * + * The only good part is that if the firmware mitigation is present, then it is + * present for all CPUs, meaning we don't have to worry about late onlining of a + * vulnerable CPU if one of the boot CPUs is using the firmware mitigation. + * + * Give me a VAX-11/780 any day of the week... + */ +static enum mitigation_state spectre_v4_state; + +/* This is the per-cpu state tracking whether we need to talk to firmware */ +DEFINE_PER_CPU_READ_MOSTLY(u64, arm64_ssbd_callback_required); + +enum spectre_v4_policy { + SPECTRE_V4_POLICY_MITIGATION_DYNAMIC, + SPECTRE_V4_POLICY_MITIGATION_ENABLED, + SPECTRE_V4_POLICY_MITIGATION_DISABLED, +}; + +static enum spectre_v4_policy __read_mostly __spectre_v4_policy; + +static const struct spectre_v4_param { + const char *str; + enum spectre_v4_policy policy; +} spectre_v4_params[] = { + { "force-on", SPECTRE_V4_POLICY_MITIGATION_ENABLED, }, + { "force-off", SPECTRE_V4_POLICY_MITIGATION_DISABLED, }, + { "kernel", SPECTRE_V4_POLICY_MITIGATION_DYNAMIC, }, +}; +static int __init parse_spectre_v4_param(char *str) +{ + int i; + + if (!str || !str[0]) + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(spectre_v4_params); i++) { + const struct spectre_v4_param *param = &spectre_v4_params[i]; + + if (strncmp(str, param->str, strlen(param->str))) + continue; + + __spectre_v4_policy = param->policy; + return 0; + } + + return -EINVAL; +} +early_param("ssbd", parse_spectre_v4_param); + +/* + * Because this was all written in a rush by people working in different silos, + * we've ended up with multiple command line options to control the same thing. + * Wrap these up in some helpers, which prefer disabling the mitigation if faced + * with contradictory parameters. The mitigation is always either "off", + * "dynamic" or "on". + */ +static bool spectre_v4_mitigations_off(void) +{ + bool ret = cpu_mitigations_off() || + __spectre_v4_policy == SPECTRE_V4_POLICY_MITIGATION_DISABLED; + + if (ret) + pr_info_once("spectre-v4 mitigation disabled by command-line option\n"); + + return ret; +} + +/* Do we need to toggle the mitigation state on entry to/exit from the kernel? */ +static bool spectre_v4_mitigations_dynamic(void) +{ + return !spectre_v4_mitigations_off() && + __spectre_v4_policy == SPECTRE_V4_POLICY_MITIGATION_DYNAMIC; +} + +static bool spectre_v4_mitigations_on(void) +{ + return !spectre_v4_mitigations_off() && + __spectre_v4_policy == SPECTRE_V4_POLICY_MITIGATION_ENABLED; +} + +ssize_t cpu_show_spec_store_bypass(struct device *dev, + struct device_attribute *attr, char *buf) +{ + switch (spectre_v4_state) { + case SPECTRE_UNAFFECTED: + return sprintf(buf, "Not affected\n"); + case SPECTRE_MITIGATED: + return sprintf(buf, "Mitigation: Speculative Store Bypass disabled via prctl\n"); + case SPECTRE_VULNERABLE: + fallthrough; + default: + return sprintf(buf, "Vulnerable\n"); + } +} + +enum mitigation_state arm64_get_spectre_v4_state(void) +{ + return spectre_v4_state; +} + +static enum mitigation_state spectre_v4_get_cpu_hw_mitigation_state(void) +{ + static const struct midr_range spectre_v4_safe_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A35), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A53), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), + MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53), + MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_3XX_SILVER), + MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_SILVER), + { /* sentinel */ }, + }; + + if (is_midr_in_range_list(read_cpuid_id(), spectre_v4_safe_list)) + return SPECTRE_UNAFFECTED; + + /* CPU features are detected first */ + if (this_cpu_has_cap(ARM64_SSBS)) + return SPECTRE_MITIGATED; + + return SPECTRE_VULNERABLE; +} + +static enum mitigation_state spectre_v4_get_cpu_fw_mitigation_state(void) +{ + int ret; + struct arm_smccc_res res; + + arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_2, &res); + + ret = res.a0; + switch (ret) { + case SMCCC_RET_SUCCESS: + return SPECTRE_MITIGATED; + case SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED: + fallthrough; + case SMCCC_RET_NOT_REQUIRED: + return SPECTRE_UNAFFECTED; + default: + fallthrough; + case SMCCC_RET_NOT_SUPPORTED: + return SPECTRE_VULNERABLE; + } +} + +bool has_spectre_v4(const struct arm64_cpu_capabilities *cap, int scope) +{ + enum mitigation_state state; + + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + + state = spectre_v4_get_cpu_hw_mitigation_state(); + if (state == SPECTRE_VULNERABLE) + state = spectre_v4_get_cpu_fw_mitigation_state(); + + return state != SPECTRE_UNAFFECTED; +} + +static int ssbs_emulation_handler(struct pt_regs *regs, u32 instr) +{ + if (user_mode(regs)) + return 1; + + if (instr & BIT(PSTATE_Imm_shift)) + regs->pstate |= PSR_SSBS_BIT; + else + regs->pstate &= ~PSR_SSBS_BIT; + + arm64_skip_faulting_instruction(regs, 4); + return 0; +} + +static struct undef_hook ssbs_emulation_hook = { + .instr_mask = ~(1U << PSTATE_Imm_shift), + .instr_val = 0xd500401f | PSTATE_SSBS, + .fn = ssbs_emulation_handler, +}; + +static enum mitigation_state spectre_v4_enable_hw_mitigation(void) +{ + static bool undef_hook_registered = false; + static DEFINE_RAW_SPINLOCK(hook_lock); + enum mitigation_state state; + + /* + * If the system is mitigated but this CPU doesn't have SSBS, then + * we must be on the safelist and there's nothing more to do. + */ + state = spectre_v4_get_cpu_hw_mitigation_state(); + if (state != SPECTRE_MITIGATED || !this_cpu_has_cap(ARM64_SSBS)) + return state; + + raw_spin_lock(&hook_lock); + if (!undef_hook_registered) { + register_undef_hook(&ssbs_emulation_hook); + undef_hook_registered = true; + } + raw_spin_unlock(&hook_lock); + + if (spectre_v4_mitigations_off()) { + sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_DSSBS); + asm volatile(SET_PSTATE_SSBS(1)); + return SPECTRE_VULNERABLE; + } + + /* SCTLR_EL1.DSSBS was initialised to 0 during boot */ + asm volatile(SET_PSTATE_SSBS(0)); + return SPECTRE_MITIGATED; +} + +/* + * Patch a branch over the Spectre-v4 mitigation code with a NOP so that + * we fallthrough and check whether firmware needs to be called on this CPU. + */ +void __init spectre_v4_patch_fw_mitigation_enable(struct alt_instr *alt, + __le32 *origptr, + __le32 *updptr, int nr_inst) +{ + BUG_ON(nr_inst != 1); /* Branch -> NOP */ + + if (spectre_v4_mitigations_off()) + return; + + if (cpus_have_final_cap(ARM64_SSBS)) + return; + + if (spectre_v4_mitigations_dynamic()) + *updptr = cpu_to_le32(aarch64_insn_gen_nop()); +} + +/* + * Patch a NOP in the Spectre-v4 mitigation code with an SMC/HVC instruction + * to call into firmware to adjust the mitigation state. + */ +void __init spectre_v4_patch_fw_mitigation_conduit(struct alt_instr *alt, + __le32 *origptr, + __le32 *updptr, int nr_inst) +{ + u32 insn; + + BUG_ON(nr_inst != 1); /* NOP -> HVC/SMC */ + + switch (arm_smccc_1_1_get_conduit()) { + case SMCCC_CONDUIT_HVC: + insn = aarch64_insn_get_hvc_value(); + break; + case SMCCC_CONDUIT_SMC: + insn = aarch64_insn_get_smc_value(); + break; + default: + return; + } + + *updptr = cpu_to_le32(insn); +} + +static enum mitigation_state spectre_v4_enable_fw_mitigation(void) +{ + enum mitigation_state state; + + state = spectre_v4_get_cpu_fw_mitigation_state(); + if (state != SPECTRE_MITIGATED) + return state; + + if (spectre_v4_mitigations_off()) { + arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_WORKAROUND_2, false, NULL); + return SPECTRE_VULNERABLE; + } + + arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_WORKAROUND_2, true, NULL); + + if (spectre_v4_mitigations_dynamic()) + __this_cpu_write(arm64_ssbd_callback_required, 1); + + return SPECTRE_MITIGATED; +} + +void spectre_v4_enable_mitigation(const struct arm64_cpu_capabilities *__unused) +{ + enum mitigation_state state; + + WARN_ON(preemptible()); + + state = spectre_v4_enable_hw_mitigation(); + if (state == SPECTRE_VULNERABLE) + state = spectre_v4_enable_fw_mitigation(); + + update_mitigation_state(&spectre_v4_state, state); +} + +static void __update_pstate_ssbs(struct pt_regs *regs, bool state) +{ + u64 bit = compat_user_mode(regs) ? PSR_AA32_SSBS_BIT : PSR_SSBS_BIT; + + if (state) + regs->pstate |= bit; + else + regs->pstate &= ~bit; +} + +void spectre_v4_enable_task_mitigation(struct task_struct *tsk) +{ + struct pt_regs *regs = task_pt_regs(tsk); + bool ssbs = false, kthread = tsk->flags & PF_KTHREAD; + + if (spectre_v4_mitigations_off()) + ssbs = true; + else if (spectre_v4_mitigations_dynamic() && !kthread) + ssbs = !test_tsk_thread_flag(tsk, TIF_SSBD); + + __update_pstate_ssbs(regs, ssbs); +} + +/* + * The Spectre-v4 mitigation can be controlled via a prctl() from userspace. + * This is interesting because the "speculation disabled" behaviour can be + * configured so that it is preserved across exec(), which means that the + * prctl() may be necessary even when PSTATE.SSBS can be toggled directly + * from userspace. + */ +static void ssbd_prctl_enable_mitigation(struct task_struct *task) +{ + task_clear_spec_ssb_noexec(task); + task_set_spec_ssb_disable(task); + set_tsk_thread_flag(task, TIF_SSBD); +} + +static void ssbd_prctl_disable_mitigation(struct task_struct *task) +{ + task_clear_spec_ssb_noexec(task); + task_clear_spec_ssb_disable(task); + clear_tsk_thread_flag(task, TIF_SSBD); +} + +static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl) +{ + switch (ctrl) { + case PR_SPEC_ENABLE: + /* Enable speculation: disable mitigation */ + /* + * Force disabled speculation prevents it from being + * re-enabled. + */ + if (task_spec_ssb_force_disable(task)) + return -EPERM; + + /* + * If the mitigation is forced on, then speculation is forced + * off and we again prevent it from being re-enabled. + */ + if (spectre_v4_mitigations_on()) + return -EPERM; + + ssbd_prctl_disable_mitigation(task); + break; + case PR_SPEC_FORCE_DISABLE: + /* Force disable speculation: force enable mitigation */ + /* + * If the mitigation is forced off, then speculation is forced + * on and we prevent it from being disabled. + */ + if (spectre_v4_mitigations_off()) + return -EPERM; + + task_set_spec_ssb_force_disable(task); + fallthrough; + case PR_SPEC_DISABLE: + /* Disable speculation: enable mitigation */ + /* Same as PR_SPEC_FORCE_DISABLE */ + if (spectre_v4_mitigations_off()) + return -EPERM; + + ssbd_prctl_enable_mitigation(task); + break; + case PR_SPEC_DISABLE_NOEXEC: + /* Disable speculation until execve(): enable mitigation */ + /* + * If the mitigation state is forced one way or the other, then + * we must fail now before we try to toggle it on execve(). + */ + if (task_spec_ssb_force_disable(task) || + spectre_v4_mitigations_off() || + spectre_v4_mitigations_on()) { + return -EPERM; + } + + ssbd_prctl_enable_mitigation(task); + task_set_spec_ssb_noexec(task); + break; + default: + return -ERANGE; + } + + spectre_v4_enable_task_mitigation(task); + return 0; +} + +int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, + unsigned long ctrl) +{ + switch (which) { + case PR_SPEC_STORE_BYPASS: + return ssbd_prctl_set(task, ctrl); + default: + return -ENODEV; + } +} + +static int ssbd_prctl_get(struct task_struct *task) +{ + switch (spectre_v4_state) { + case SPECTRE_UNAFFECTED: + return PR_SPEC_NOT_AFFECTED; + case SPECTRE_MITIGATED: + if (spectre_v4_mitigations_on()) + return PR_SPEC_NOT_AFFECTED; + + if (spectre_v4_mitigations_dynamic()) + break; + + /* Mitigations are disabled, so we're vulnerable. */ + fallthrough; + case SPECTRE_VULNERABLE: + fallthrough; + default: + return PR_SPEC_ENABLE; + } + + /* Check the mitigation state for this task */ + if (task_spec_ssb_force_disable(task)) + return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; + + if (task_spec_ssb_noexec(task)) + return PR_SPEC_PRCTL | PR_SPEC_DISABLE_NOEXEC; + + if (task_spec_ssb_disable(task)) + return PR_SPEC_PRCTL | PR_SPEC_DISABLE; + + return PR_SPEC_PRCTL | PR_SPEC_ENABLE; +} + +int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) +{ + switch (which) { + case PR_SPEC_STORE_BYPASS: + return ssbd_prctl_get(task); + default: + return -ENODEV; + } +} diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index d8ebfd813e28..f49b349e16a3 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -34,6 +34,7 @@ #include <asm/cpufeature.h> #include <asm/debug-monitors.h> #include <asm/fpsimd.h> +#include <asm/mte.h> #include <asm/pointer_auth.h> #include <asm/stacktrace.h> #include <asm/syscall.h> @@ -1032,6 +1033,35 @@ static int pac_generic_keys_set(struct task_struct *target, #endif /* CONFIG_CHECKPOINT_RESTORE */ #endif /* CONFIG_ARM64_PTR_AUTH */ +#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI +static int tagged_addr_ctrl_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + long ctrl = get_tagged_addr_ctrl(target); + + if (IS_ERR_VALUE(ctrl)) + return ctrl; + + return membuf_write(&to, &ctrl, sizeof(ctrl)); +} + +static int tagged_addr_ctrl_set(struct task_struct *target, const struct + user_regset *regset, unsigned int pos, + unsigned int count, const void *kbuf, const + void __user *ubuf) +{ + int ret; + long ctrl; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, 0, -1); + if (ret) + return ret; + + return set_tagged_addr_ctrl(target, ctrl); +} +#endif + enum aarch64_regset { REGSET_GPR, REGSET_FPR, @@ -1051,6 +1081,9 @@ enum aarch64_regset { REGSET_PACG_KEYS, #endif #endif +#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI + REGSET_TAGGED_ADDR_CTRL, +#endif }; static const struct user_regset aarch64_regsets[] = { @@ -1148,6 +1181,16 @@ static const struct user_regset aarch64_regsets[] = { }, #endif #endif +#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI + [REGSET_TAGGED_ADDR_CTRL] = { + .core_note_type = NT_ARM_TAGGED_ADDR_CTRL, + .n = 1, + .size = sizeof(long), + .align = sizeof(long), + .regset_get = tagged_addr_ctrl_get, + .set = tagged_addr_ctrl_set, + }, +#endif }; static const struct user_regset_view user_aarch64_view = { @@ -1691,6 +1734,12 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task) long arch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data) { + switch (request) { + case PTRACE_PEEKMTETAGS: + case PTRACE_POKEMTETAGS: + return mte_ptrace_copy_tags(child, request, addr, data); + } + return ptrace_request(child, request, addr, data); } @@ -1793,7 +1842,7 @@ void syscall_trace_exit(struct pt_regs *regs) * We also reserve IL for the kernel; SS is handled dynamically. */ #define SPSR_EL1_AARCH64_RES0_BITS \ - (GENMASK_ULL(63, 32) | GENMASK_ULL(27, 25) | GENMASK_ULL(23, 22) | \ + (GENMASK_ULL(63, 32) | GENMASK_ULL(27, 26) | GENMASK_ULL(23, 22) | \ GENMASK_ULL(20, 13) | GENMASK_ULL(5, 5)) #define SPSR_EL1_AARCH32_RES0_BITS \ (GENMASK_ULL(63, 32) | GENMASK_ULL(22, 22) | GENMASK_ULL(20, 20)) diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S index 542d6edc6806..84eec95ec06c 100644 --- a/arch/arm64/kernel/relocate_kernel.S +++ b/arch/arm64/kernel/relocate_kernel.S @@ -36,18 +36,6 @@ SYM_CODE_START(arm64_relocate_new_kernel) mov x14, xzr /* x14 = entry ptr */ mov x13, xzr /* x13 = copy dest */ - /* Clear the sctlr_el2 flags. */ - mrs x0, CurrentEL - cmp x0, #CurrentEL_EL2 - b.ne 1f - mrs x0, sctlr_el2 - mov_q x1, SCTLR_ELx_FLAGS - bic x0, x0, x1 - pre_disable_mmu_workaround - msr sctlr_el2, x0 - isb -1: - /* Check if the new image needs relocation. */ tbnz x16, IND_DONE_BIT, .Ldone diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c index a5e8b3b9d798..a6d18755652f 100644 --- a/arch/arm64/kernel/return_address.c +++ b/arch/arm64/kernel/return_address.c @@ -18,16 +18,16 @@ struct return_address_data { void *addr; }; -static int save_return_addr(struct stackframe *frame, void *d) +static bool save_return_addr(void *d, unsigned long pc) { struct return_address_data *data = d; if (!data->level) { - data->addr = (void *)frame->pc; - return 1; + data->addr = (void *)pc; + return false; } else { --data->level; - return 0; + return true; } } NOKPROBE_SYMBOL(save_return_addr); diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 77c4c9bad1b8..53acbeca4f57 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -280,7 +280,6 @@ u64 cpu_logical_map(int cpu) { return __cpu_logical_map[cpu]; } -EXPORT_SYMBOL_GPL(cpu_logical_map); void __init __no_sanitize_address setup_arch(char **cmdline_p) { diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 3b4f31f35e45..bdcaaf091e1e 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -244,7 +244,8 @@ static int preserve_sve_context(struct sve_context __user *ctx) if (vq) { /* * This assumes that the SVE state has already been saved to - * the task struct by calling preserve_fpsimd_context(). + * the task struct by calling the function + * fpsimd_signal_preserve_current_state(). */ err |= __copy_to_user((char __user *)ctx + SVE_SIG_REGS_OFFSET, current->thread.sve_state, @@ -748,6 +749,9 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka, regs->pstate |= PSR_BTYPE_C; } + /* TCO (Tag Check Override) always cleared for signal handlers */ + regs->pstate &= ~PSR_TCO_BIT; + if (ka->sa.sa_flags & SA_RESTORER) sigtramp = ka->sa.sa_restorer; else @@ -932,6 +936,12 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, if (thread_flags & _TIF_UPROBE) uprobe_notify_resume(regs); + if (thread_flags & _TIF_MTE_ASYNC_FAULT) { + clear_thread_flag(TIF_MTE_ASYNC_FAULT); + send_sig_fault(SIGSEGV, SEGV_MTEAERR, + (void __user *)NULL, current); + } + if (thread_flags & _TIF_SIGPENDING) do_signal(regs); diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 03957a1ae6c0..355ee9eed4dd 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -151,7 +151,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) break; } pr_crit("CPU%u: may not have shut down cleanly\n", cpu); - /* Fall through */ + fallthrough; case CPU_STUCK_IN_KERNEL: pr_crit("CPU%u: is stuck in kernel\n", cpu); if (status & CPU_STUCK_REASON_52_BIT_VA) diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c index c8a3fee00c11..5892e79fa429 100644 --- a/arch/arm64/kernel/smp_spin_table.c +++ b/arch/arm64/kernel/smp_spin_table.c @@ -83,9 +83,9 @@ static int smp_spin_table_cpu_prepare(unsigned int cpu) /* * We write the release address as LE regardless of the native - * endianess of the kernel. Therefore, any boot-loaders that + * endianness of the kernel. Therefore, any boot-loaders that * read this address need to convert this address to the - * boot-loader's endianess before jumping. This is mandated by + * boot-loader's endianness before jumping. This is mandated by * the boot protocol. */ writeq_relaxed(__pa_symbol(secondary_holding_pen), release_addr); diff --git a/arch/arm64/kernel/ssbd.c b/arch/arm64/kernel/ssbd.c deleted file mode 100644 index b26955f56750..000000000000 --- a/arch/arm64/kernel/ssbd.c +++ /dev/null @@ -1,129 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2018 ARM Ltd, All Rights Reserved. - */ - -#include <linux/compat.h> -#include <linux/errno.h> -#include <linux/prctl.h> -#include <linux/sched.h> -#include <linux/sched/task_stack.h> -#include <linux/thread_info.h> - -#include <asm/cpufeature.h> - -static void ssbd_ssbs_enable(struct task_struct *task) -{ - u64 val = is_compat_thread(task_thread_info(task)) ? - PSR_AA32_SSBS_BIT : PSR_SSBS_BIT; - - task_pt_regs(task)->pstate |= val; -} - -static void ssbd_ssbs_disable(struct task_struct *task) -{ - u64 val = is_compat_thread(task_thread_info(task)) ? - PSR_AA32_SSBS_BIT : PSR_SSBS_BIT; - - task_pt_regs(task)->pstate &= ~val; -} - -/* - * prctl interface for SSBD - */ -static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl) -{ - int state = arm64_get_ssbd_state(); - - /* Unsupported */ - if (state == ARM64_SSBD_UNKNOWN) - return -ENODEV; - - /* Treat the unaffected/mitigated state separately */ - if (state == ARM64_SSBD_MITIGATED) { - switch (ctrl) { - case PR_SPEC_ENABLE: - return -EPERM; - case PR_SPEC_DISABLE: - case PR_SPEC_FORCE_DISABLE: - return 0; - } - } - - /* - * Things are a bit backward here: the arm64 internal API - * *enables the mitigation* when the userspace API *disables - * speculation*. So much fun. - */ - switch (ctrl) { - case PR_SPEC_ENABLE: - /* If speculation is force disabled, enable is not allowed */ - if (state == ARM64_SSBD_FORCE_ENABLE || - task_spec_ssb_force_disable(task)) - return -EPERM; - task_clear_spec_ssb_disable(task); - clear_tsk_thread_flag(task, TIF_SSBD); - ssbd_ssbs_enable(task); - break; - case PR_SPEC_DISABLE: - if (state == ARM64_SSBD_FORCE_DISABLE) - return -EPERM; - task_set_spec_ssb_disable(task); - set_tsk_thread_flag(task, TIF_SSBD); - ssbd_ssbs_disable(task); - break; - case PR_SPEC_FORCE_DISABLE: - if (state == ARM64_SSBD_FORCE_DISABLE) - return -EPERM; - task_set_spec_ssb_disable(task); - task_set_spec_ssb_force_disable(task); - set_tsk_thread_flag(task, TIF_SSBD); - ssbd_ssbs_disable(task); - break; - default: - return -ERANGE; - } - - return 0; -} - -int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, - unsigned long ctrl) -{ - switch (which) { - case PR_SPEC_STORE_BYPASS: - return ssbd_prctl_set(task, ctrl); - default: - return -ENODEV; - } -} - -static int ssbd_prctl_get(struct task_struct *task) -{ - switch (arm64_get_ssbd_state()) { - case ARM64_SSBD_UNKNOWN: - return -ENODEV; - case ARM64_SSBD_FORCE_ENABLE: - return PR_SPEC_DISABLE; - case ARM64_SSBD_KERNEL: - if (task_spec_ssb_force_disable(task)) - return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; - if (task_spec_ssb_disable(task)) - return PR_SPEC_PRCTL | PR_SPEC_DISABLE; - return PR_SPEC_PRCTL | PR_SPEC_ENABLE; - case ARM64_SSBD_FORCE_DISABLE: - return PR_SPEC_ENABLE; - default: - return PR_SPEC_NOT_AFFECTED; - } -} - -int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) -{ - switch (which) { - case PR_SPEC_STORE_BYPASS: - return ssbd_prctl_get(task); - default: - return -ENODEV; - } -} diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 2dd8e3b8b94b..fa56af1a59c3 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -118,12 +118,12 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) NOKPROBE_SYMBOL(unwind_frame); void notrace walk_stackframe(struct task_struct *tsk, struct stackframe *frame, - int (*fn)(struct stackframe *, void *), void *data) + bool (*fn)(void *, unsigned long), void *data) { while (1) { int ret; - if (fn(frame, data)) + if (!fn(data, frame->pc)) break; ret = unwind_frame(tsk, frame); if (ret < 0) @@ -132,84 +132,89 @@ void notrace walk_stackframe(struct task_struct *tsk, struct stackframe *frame, } NOKPROBE_SYMBOL(walk_stackframe); -#ifdef CONFIG_STACKTRACE -struct stack_trace_data { - struct stack_trace *trace; - unsigned int no_sched_functions; - unsigned int skip; -}; - -static int save_trace(struct stackframe *frame, void *d) +static void dump_backtrace_entry(unsigned long where, const char *loglvl) { - struct stack_trace_data *data = d; - struct stack_trace *trace = data->trace; - unsigned long addr = frame->pc; - - if (data->no_sched_functions && in_sched_functions(addr)) - return 0; - if (data->skip) { - data->skip--; - return 0; - } - - trace->entries[trace->nr_entries++] = addr; - - return trace->nr_entries >= trace->max_entries; + printk("%s %pS\n", loglvl, (void *)where); } -void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) +void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk, + const char *loglvl) { - struct stack_trace_data data; struct stackframe frame; + int skip = 0; - data.trace = trace; - data.skip = trace->skip; - data.no_sched_functions = 0; + pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk); - start_backtrace(&frame, regs->regs[29], regs->pc); - walk_stackframe(current, &frame, save_trace, &data); -} -EXPORT_SYMBOL_GPL(save_stack_trace_regs); + if (regs) { + if (user_mode(regs)) + return; + skip = 1; + } -static noinline void __save_stack_trace(struct task_struct *tsk, - struct stack_trace *trace, unsigned int nosched) -{ - struct stack_trace_data data; - struct stackframe frame; + if (!tsk) + tsk = current; if (!try_get_task_stack(tsk)) return; - data.trace = trace; - data.skip = trace->skip; - data.no_sched_functions = nosched; - - if (tsk != current) { - start_backtrace(&frame, thread_saved_fp(tsk), - thread_saved_pc(tsk)); - } else { - /* We don't want this function nor the caller */ - data.skip += 2; + if (tsk == current) { start_backtrace(&frame, (unsigned long)__builtin_frame_address(0), - (unsigned long)__save_stack_trace); + (unsigned long)dump_backtrace); + } else { + /* + * task blocked in __switch_to + */ + start_backtrace(&frame, + thread_saved_fp(tsk), + thread_saved_pc(tsk)); } - walk_stackframe(tsk, &frame, save_trace, &data); + printk("%sCall trace:\n", loglvl); + do { + /* skip until specified stack frame */ + if (!skip) { + dump_backtrace_entry(frame.pc, loglvl); + } else if (frame.fp == regs->regs[29]) { + skip = 0; + /* + * Mostly, this is the case where this function is + * called in panic/abort. As exception handler's + * stack frame does not contain the corresponding pc + * at which an exception has taken place, use regs->pc + * instead. + */ + dump_backtrace_entry(regs->pc, loglvl); + } + } while (!unwind_frame(tsk, &frame)); put_task_stack(tsk); } -void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) +void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl) { - __save_stack_trace(tsk, trace, 1); + dump_backtrace(NULL, tsk, loglvl); + barrier(); } -EXPORT_SYMBOL_GPL(save_stack_trace_tsk); -void save_stack_trace(struct stack_trace *trace) +#ifdef CONFIG_STACKTRACE + +void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, + struct task_struct *task, struct pt_regs *regs) { - __save_stack_trace(current, trace, 0); + struct stackframe frame; + + if (regs) + start_backtrace(&frame, regs->regs[29], regs->pc); + else if (task == current) + start_backtrace(&frame, + (unsigned long)__builtin_frame_address(0), + (unsigned long)arch_stack_walk); + else + start_backtrace(&frame, thread_saved_fp(task), + thread_saved_pc(task)); + + walk_stackframe(task, &frame, consume_entry, cookie); } -EXPORT_SYMBOL_GPL(save_stack_trace); #endif diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index c1dee9066ff9..96cd347c7a46 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -10,6 +10,7 @@ #include <asm/daifflags.h> #include <asm/debug-monitors.h> #include <asm/exec.h> +#include <asm/mte.h> #include <asm/memory.h> #include <asm/mmu_context.h> #include <asm/smp_plat.h> @@ -72,8 +73,10 @@ void notrace __cpu_suspend_exit(void) * have turned the mitigation on. If the user has forcefully * disabled it, make sure their wishes are obeyed. */ - if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) - arm64_set_ssbd_mitigation(false); + spectre_v4_enable_mitigation(NULL); + + /* Restore additional MTE-specific configuration */ + mte_suspend_exit(); } /* diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index 5f0c04863d2c..e4c0dadf0d92 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -123,6 +123,16 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, local_daif_restore(DAIF_PROCCTX); user_exit(); + if (system_supports_mte() && (flags & _TIF_MTE_ASYNC_FAULT)) { + /* + * Process the asynchronous tag check fault before the actual + * syscall. do_notify_resume() will send a signal to userspace + * before the syscall is restarted. + */ + regs->regs[0] = -ERESTARTNOINTR; + return; + } + if (has_syscall_work(flags)) { /* * The de-facto standard way to skip a system call using ptrace diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 0801a0f3c156..ff1dd1dbfe64 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -36,21 +36,23 @@ void store_cpu_topology(unsigned int cpuid) if (mpidr & MPIDR_UP_BITMASK) return; - /* Create cpu topology mapping based on MPIDR. */ - if (mpidr & MPIDR_MT_BITMASK) { - /* Multiprocessor system : Multi-threads per core */ - cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); - cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 1); - cpuid_topo->package_id = MPIDR_AFFINITY_LEVEL(mpidr, 2) | - MPIDR_AFFINITY_LEVEL(mpidr, 3) << 8; - } else { - /* Multiprocessor system : Single-thread per core */ - cpuid_topo->thread_id = -1; - cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); - cpuid_topo->package_id = MPIDR_AFFINITY_LEVEL(mpidr, 1) | - MPIDR_AFFINITY_LEVEL(mpidr, 2) << 8 | - MPIDR_AFFINITY_LEVEL(mpidr, 3) << 16; - } + /* + * This would be the place to create cpu topology based on MPIDR. + * + * However, it cannot be trusted to depict the actual topology; some + * pieces of the architecture enforce an artificial cap on Aff0 values + * (e.g. GICv3's ICC_SGI1R_EL1 limits it to 15), leading to an + * artificial cycling of Aff1, Aff2 and Aff3 values. IOW, these end up + * having absolutely no relationship to the actual underlying system + * topology, and cannot be reasonably used as core / package ID. + * + * If the MT bit is set, Aff0 *could* be used to define a thread ID, but + * we still wouldn't be able to obtain a sane core ID. This means we + * need to entirely ignore MPIDR for any topology deduction. + */ + cpuid_topo->thread_id = -1; + cpuid_topo->core_id = cpuid; + cpuid_topo->package_id = cpu_to_node(cpuid); pr_debug("CPU%u: cluster %d core %d thread %d mpidr %#016llx\n", cpuid, cpuid_topo->package_id, cpuid_topo->core_id, diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 13ebd5ca2070..8af4e0e85736 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -34,6 +34,7 @@ #include <asm/daifflags.h> #include <asm/debug-monitors.h> #include <asm/esr.h> +#include <asm/extable.h> #include <asm/insn.h> #include <asm/kprobes.h> #include <asm/traps.h> @@ -53,11 +54,6 @@ static const char *handler[]= { int show_unhandled_signals = 0; -static void dump_backtrace_entry(unsigned long where, const char *loglvl) -{ - printk("%s %pS\n", loglvl, (void *)where); -} - static void dump_kernel_instr(const char *lvl, struct pt_regs *regs) { unsigned long addr = instruction_pointer(regs); @@ -83,66 +79,6 @@ static void dump_kernel_instr(const char *lvl, struct pt_regs *regs) printk("%sCode: %s\n", lvl, str); } -void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk, - const char *loglvl) -{ - struct stackframe frame; - int skip = 0; - - pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk); - - if (regs) { - if (user_mode(regs)) - return; - skip = 1; - } - - if (!tsk) - tsk = current; - - if (!try_get_task_stack(tsk)) - return; - - if (tsk == current) { - start_backtrace(&frame, - (unsigned long)__builtin_frame_address(0), - (unsigned long)dump_backtrace); - } else { - /* - * task blocked in __switch_to - */ - start_backtrace(&frame, - thread_saved_fp(tsk), - thread_saved_pc(tsk)); - } - - printk("%sCall trace:\n", loglvl); - do { - /* skip until specified stack frame */ - if (!skip) { - dump_backtrace_entry(frame.pc, loglvl); - } else if (frame.fp == regs->regs[29]) { - skip = 0; - /* - * Mostly, this is the case where this function is - * called in panic/abort. As exception handler's - * stack frame does not contain the corresponding pc - * at which an exception has taken place, use regs->pc - * instead. - */ - dump_backtrace_entry(regs->pc, loglvl); - } - } while (!unwind_frame(tsk, &frame)); - - put_task_stack(tsk); -} - -void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl) -{ - dump_backtrace(NULL, tsk, loglvl); - barrier(); -} - #ifdef CONFIG_PREEMPT #define S_PREEMPT " PREEMPT" #elif defined(CONFIG_PREEMPT_RT) @@ -200,9 +136,9 @@ void die(const char *str, struct pt_regs *regs, int err) oops_exit(); if (in_interrupt()) - panic("Fatal exception in interrupt"); + panic("%s: Fatal exception in interrupt", str); if (panic_on_oops) - panic("Fatal exception"); + panic("%s: Fatal exception", str); raw_spin_unlock_irqrestore(&die_lock, flags); @@ -412,7 +348,7 @@ exit: return fn ? fn(regs, instr) : 1; } -void force_signal_inject(int signal, int code, unsigned long address) +void force_signal_inject(int signal, int code, unsigned long address, unsigned int err) { const char *desc; struct pt_regs *regs = current_pt_regs(); @@ -438,7 +374,7 @@ void force_signal_inject(int signal, int code, unsigned long address) signal = SIGKILL; } - arm64_notify_die(desc, regs, signal, code, (void __user *)address, 0); + arm64_notify_die(desc, regs, signal, code, (void __user *)address, err); } /* @@ -455,7 +391,7 @@ void arm64_notify_segfault(unsigned long addr) code = SEGV_ACCERR; mmap_read_unlock(current->mm); - force_signal_inject(SIGSEGV, code, addr); + force_signal_inject(SIGSEGV, code, addr, 0); } void do_undefinstr(struct pt_regs *regs) @@ -468,17 +404,28 @@ void do_undefinstr(struct pt_regs *regs) return; BUG_ON(!user_mode(regs)); - force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); + force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0); } NOKPROBE_SYMBOL(do_undefinstr); void do_bti(struct pt_regs *regs) { BUG_ON(!user_mode(regs)); - force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); + force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0); } NOKPROBE_SYMBOL(do_bti); +void do_ptrauth_fault(struct pt_regs *regs, unsigned int esr) +{ + /* + * Unexpected FPAC exception or pointer authentication failure in + * the kernel: kill the task before it does any more harm. + */ + BUG_ON(!user_mode(regs)); + force_signal_inject(SIGILL, ILL_ILLOPN, regs->pc, esr); +} +NOKPROBE_SYMBOL(do_ptrauth_fault); + #define __user_cache_maint(insn, address, res) \ if (address >= user_addr_max()) { \ res = -EFAULT; \ @@ -528,7 +475,7 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs) __user_cache_maint("ic ivau", address, ret); break; default: - force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); + force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0); return; } @@ -581,7 +528,7 @@ static void mrs_handler(unsigned int esr, struct pt_regs *regs) sysreg = esr_sys64_to_sysreg(esr); if (do_emulate_mrs(regs, sysreg, rt) != 0) - force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); + force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0); } static void wfi_handler(unsigned int esr, struct pt_regs *regs) @@ -775,6 +722,7 @@ static const char *esr_class_str[] = { [ESR_ELx_EC_SYS64] = "MSR/MRS (AArch64)", [ESR_ELx_EC_SVE] = "SVE", [ESR_ELx_EC_ERET] = "ERET/ERETAA/ERETAB", + [ESR_ELx_EC_FPAC] = "FPAC", [ESR_ELx_EC_IMP_DEF] = "EL3 IMP DEF", [ESR_ELx_EC_IABT_LOW] = "IABT (lower EL)", [ESR_ELx_EC_IABT_CUR] = "IABT (current EL)", @@ -935,26 +883,6 @@ asmlinkage void enter_from_user_mode(void) } NOKPROBE_SYMBOL(enter_from_user_mode); -void __pte_error(const char *file, int line, unsigned long val) -{ - pr_err("%s:%d: bad pte %016lx.\n", file, line, val); -} - -void __pmd_error(const char *file, int line, unsigned long val) -{ - pr_err("%s:%d: bad pmd %016lx.\n", file, line, val); -} - -void __pud_error(const char *file, int line, unsigned long val) -{ - pr_err("%s:%d: bad pud %016lx.\n", file, line, val); -} - -void __pgd_error(const char *file, int line, unsigned long val) -{ - pr_err("%s:%d: bad pgd %016lx.\n", file, line, val); -} - /* GENERIC_BUG traps */ int is_valid_bugaddr(unsigned long addr) @@ -994,6 +922,21 @@ static struct break_hook bug_break_hook = { .imm = BUG_BRK_IMM, }; +static int reserved_fault_handler(struct pt_regs *regs, unsigned int esr) +{ + pr_err("%s generated an invalid instruction at %pS!\n", + in_bpf_jit(regs) ? "BPF JIT" : "Kernel text patching", + (void *)instruction_pointer(regs)); + + /* We cannot handle this */ + return DBG_HOOK_ERROR; +} + +static struct break_hook fault_break_hook = { + .fn = reserved_fault_handler, + .imm = FAULT_BRK_IMM, +}; + #ifdef CONFIG_KASAN_SW_TAGS #define KASAN_ESR_RECOVER 0x20 @@ -1059,6 +1002,7 @@ int __init early_brk64(unsigned long addr, unsigned int esr, void __init trap_init(void) { register_kernel_break_hook(&bug_break_hook); + register_kernel_break_hook(&fault_break_hook); #ifdef CONFIG_KASAN_SW_TAGS register_kernel_break_hook(&kasan_break_hook); #endif diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index d4202a32abc9..debb8995d57f 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -30,15 +30,11 @@ #include <asm/vdso.h> extern char vdso_start[], vdso_end[]; -#ifdef CONFIG_COMPAT_VDSO extern char vdso32_start[], vdso32_end[]; -#endif /* CONFIG_COMPAT_VDSO */ enum vdso_abi { VDSO_ABI_AA64, -#ifdef CONFIG_COMPAT_VDSO VDSO_ABI_AA32, -#endif /* CONFIG_COMPAT_VDSO */ }; enum vvar_pages { @@ -284,21 +280,17 @@ up_fail: /* * Create and map the vectors page for AArch32 tasks. */ -#ifdef CONFIG_COMPAT_VDSO static int aarch32_vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma) { return __vdso_remap(VDSO_ABI_AA32, sm, new_vma); } -#endif /* CONFIG_COMPAT_VDSO */ enum aarch32_map { AA32_MAP_VECTORS, /* kuser helpers */ -#ifdef CONFIG_COMPAT_VDSO + AA32_MAP_SIGPAGE, AA32_MAP_VVAR, AA32_MAP_VDSO, -#endif - AA32_MAP_SIGPAGE }; static struct page *aarch32_vectors_page __ro_after_init; @@ -309,7 +301,10 @@ static struct vm_special_mapping aarch32_vdso_maps[] = { .name = "[vectors]", /* ABI */ .pages = &aarch32_vectors_page, }, -#ifdef CONFIG_COMPAT_VDSO + [AA32_MAP_SIGPAGE] = { + .name = "[sigpage]", /* ABI */ + .pages = &aarch32_sig_page, + }, [AA32_MAP_VVAR] = { .name = "[vvar]", .fault = vvar_fault, @@ -319,11 +314,6 @@ static struct vm_special_mapping aarch32_vdso_maps[] = { .name = "[vdso]", .mremap = aarch32_vdso_mremap, }, -#endif /* CONFIG_COMPAT_VDSO */ - [AA32_MAP_SIGPAGE] = { - .name = "[sigpage]", /* ABI */ - .pages = &aarch32_sig_page, - }, }; static int aarch32_alloc_kuser_vdso_page(void) @@ -362,25 +352,25 @@ static int aarch32_alloc_sigpage(void) return 0; } -#ifdef CONFIG_COMPAT_VDSO static int __aarch32_alloc_vdso_pages(void) { + + if (!IS_ENABLED(CONFIG_COMPAT_VDSO)) + return 0; + vdso_info[VDSO_ABI_AA32].dm = &aarch32_vdso_maps[AA32_MAP_VVAR]; vdso_info[VDSO_ABI_AA32].cm = &aarch32_vdso_maps[AA32_MAP_VDSO]; return __vdso_init(VDSO_ABI_AA32); } -#endif /* CONFIG_COMPAT_VDSO */ static int __init aarch32_alloc_vdso_pages(void) { int ret; -#ifdef CONFIG_COMPAT_VDSO ret = __aarch32_alloc_vdso_pages(); if (ret) return ret; -#endif ret = aarch32_alloc_sigpage(); if (ret) @@ -449,14 +439,12 @@ int aarch32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) if (ret) goto out; -#ifdef CONFIG_COMPAT_VDSO - ret = __setup_additional_pages(VDSO_ABI_AA32, - mm, - bprm, - uses_interp); - if (ret) - goto out; -#endif /* CONFIG_COMPAT_VDSO */ + if (IS_ENABLED(CONFIG_COMPAT_VDSO)) { + ret = __setup_additional_pages(VDSO_ABI_AA32, mm, bprm, + uses_interp); + if (ret) + goto out; + } ret = aarch32_sigreturn_setup(mm); out: @@ -497,8 +485,7 @@ static int __init vdso_init(void) } arch_initcall(vdso_init); -int arch_setup_additional_pages(struct linux_binprm *bprm, - int uses_interp) +int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; int ret; @@ -506,11 +493,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, if (mmap_write_lock_killable(mm)) return -EINTR; - ret = __setup_additional_pages(VDSO_ABI_AA64, - mm, - bprm, - uses_interp); - + ret = __setup_additional_pages(VDSO_ABI_AA64, mm, bprm, uses_interp); mmap_write_unlock(mm); return ret; diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index 5139a5f19256..d6adb4677c25 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -208,7 +208,7 @@ quiet_cmd_vdsosym = VDSOSYM $@ cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ # Install commands for the unstripped file -quiet_cmd_vdso_install = INSTALL $@ +quiet_cmd_vdso_install = INSTALL32 $@ cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/vdso32.so vdso.so: $(obj)/vdso.so.dbg diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index ec8e894684a7..82801d98a2b7 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -20,6 +20,13 @@ ENTRY(_text) jiffies = jiffies_64; + +#define HYPERVISOR_EXTABLE \ + . = ALIGN(SZ_8); \ + __start___kvm_ex_table = .; \ + *(__kvm_ex_table) \ + __stop___kvm_ex_table = .; + #define HYPERVISOR_TEXT \ /* \ * Align to 4 KB so that \ @@ -35,6 +42,7 @@ jiffies = jiffies_64; __hyp_idmap_text_end = .; \ __hyp_text_start = .; \ *(.hyp.text) \ + HYPERVISOR_EXTABLE \ __hyp_text_end = .; #define IDMAP_TEXT \ @@ -97,7 +105,7 @@ SECTIONS *(.eh_frame) } - . = KIMAGE_VADDR + TEXT_OFFSET; + . = KIMAGE_VADDR; .head.text : { _text = .; @@ -266,4 +274,4 @@ ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) == PAGE_SIZE, /* * If padding is applied before .head.text, virt<->phys conversions will fail. */ -ASSERT(_text == (KIMAGE_VADDR + TEXT_OFFSET), "HEAD is misaligned") +ASSERT(_text == KIMAGE_VADDR, "HEAD is misaligned") |